# Stock prediction

This program predicts stock prices by using ML models.


In [13]:
# import quandl
import yfinance as yf
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split

In [31]:
#Stock data
df = yf.download("AAPL", start="2015-01-01", end="2025-11-01", progress=True)

  df = yf.download("AAPL", start="2015-01-01", end="2025-11-01", progress=True)
[*********************100%***********************]  1 of 1 completed


In [32]:
df.head()

Price,Close,High,Low,Open,Volume
Ticker,AAPL,AAPL,AAPL,AAPL,AAPL
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2015-01-02,24.261051,24.729274,23.821675,24.718178,212818400
2015-01-05,23.57757,24.110146,23.391169,24.03026,257142000
2015-01-06,23.579792,23.839422,23.218083,23.641926,263188400
2015-01-07,23.910437,24.010294,23.677434,23.788387,160423600
2015-01-08,24.829128,24.886824,24.121246,24.238858,237458000


In [33]:
# Adjusted Close Price
if "Adj Close" not in df.columns:
    df_adj = yf.download("AAPL", start="2015-01-01", end="2025-11-01", auto_adjust=True)
    df["Adj Close"] = df_adj["Close"]

[*********************100%***********************]  1 of 1 completed


In [35]:
print(df.head())

Price           Close       High        Low       Open     Volume  Adj Close
Ticker           AAPL       AAPL       AAPL       AAPL       AAPL           
Date                                                                        
2015-01-02  24.261051  24.729274  23.821675  24.718178  212818400  24.261051
2015-01-05  23.577570  24.110146  23.391169  24.030260  257142000  23.577570
2015-01-06  23.579792  23.839422  23.218083  23.641926  263188400  23.579792
2015-01-07  23.910437  24.010294  23.677434  23.788387  160423600  23.910437
2015-01-08  24.829128  24.886824  24.121246  24.238858  237458000  24.829128


In [36]:
# Get the Adjusted Close Price
df = df[["Adj Close"]]
print(df.head())

Price       Adj Close
Ticker               
Date                 
2015-01-02  24.261051
2015-01-05  23.577570
2015-01-06  23.579792
2015-01-07  23.910437
2015-01-08  24.829128


In [40]:
# A variable for predicting n days out into the future
forecast_out = 30

In [41]:
# Create another column (the target or dependent variable) shifted n units up
df["Prediction"]=df["Adj Close"].shift(-forecast_out)
df.head()

Price,Adj Close,Prediction
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1
Date,Unnamed: 1_level_2,Unnamed: 2_level_2
2015-01-02,24.261051,28.478273
2015-01-05,23.57757,28.676546
2015-01-06,23.579792,28.616396
2015-01-07,23.910437,28.850317
2015-01-08,24.829128,29.63006


In [42]:
# Check
df.tail()

Price,Adj Close,Prediction
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1
Date,Unnamed: 1_level_2,Unnamed: 2_level_2
2025-10-27,268.809998,
2025-10-28,269.0,
2025-10-29,269.700012,
2025-10-30,271.399994,
2025-10-31,270.369995,


In [48]:
# Create the independent data set (X)
# Convert the dataframe to a numpy array
X = np.array(df.drop(["Prediction"], axis=1))

# Remove the last n rows
X = X[:-forecast_out]
print(X)

[[ 24.26105118]
 [ 23.57756996]
 [ 23.57979202]
 ...
 [238.99000549]
 [237.88000488]
 [245.5       ]]


In [49]:
# Create the dependent data set (y)
# Convert the df to a numpy array
y = df["Prediction"][:-forecast_out]
print(y)

Date
2015-01-02     28.478273
2015-01-05     28.676546
2015-01-06     28.616396
2015-01-07     28.850317
2015-01-08     29.630060
                 ...    
2025-09-15    268.809998
2025-09-16    269.000000
2025-09-17    269.700012
2025-09-18    271.399994
2025-09-19    270.369995
Name: Prediction, Length: 2695, dtype: float64


In [50]:
# Split the data into 80% training and 20% testing
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [51]:
# Create and train the Support Vector Machine (Regressor)
svr_rbf = SVR(kernel="rbf", C=1e3, gamma=0.1)
svr_rbf.fit(x_train, y_train)

In [53]:
# Testing Model: Score returns the coefficient of determination R^2 of the prediction
svm_confidence = svr_rbf.score(x_test, y_test)
print("Confidence: ", svm_confidence)

Confidence:  0.9729099578519766


In [56]:
# Create and train the Linear Regression Model
lr = LinearRegression()
# Train the Model
lr.fit(x_train, y_train)

In [58]:
# Testing Model: Score returns the coefficient of determination R^2 of the prediction
lr_confidence = lr.score(x_test, y_test)
print("Lr confidence: ", lr_confidence)

Lr confidence:  0.9754997550081214


In [61]:
# Set x_forecast equal to the ;asy 30 rows of the original data set from Adj Close column
x_forecast = np.array(df.drop(["Prediction"], axis=1))[-forecast_out:]
print(x_forecast)

[[256.07998657]
 [254.42999268]
 [252.30999756]
 [256.86999512]
 [255.46000671]
 [254.42999268]
 [254.63000488]
 [255.44999695]
 [257.13000488]
 [258.01998901]
 [256.69000244]
 [256.48001099]
 [258.05999756]
 [254.03999329]
 [245.27000427]
 [247.66000366]
 [247.77000427]
 [249.33999634]
 [247.44999695]
 [252.28999329]
 [262.23999023]
 [262.76998901]
 [258.45001221]
 [259.57998657]
 [262.82000732]
 [268.80999756]
 [269.        ]
 [269.70001221]
 [271.3999939 ]
 [270.36999512]]


In [62]:
# Linear Regression Model: Predictions for the next n days
lr_prediction = lr.predict(x_forecast)
print(lr_prediction)

[260.04590277 258.38153668 256.24307544 260.84279264 259.4205226
 258.38153668 258.58329108 259.41042565 261.10506721 262.00280351
 260.66123215 260.44941157 262.04316054 257.98814022 249.14176063
 251.552578   251.66353676 253.24720417 251.34074203 256.22289692
 266.25956285 266.79417815 262.4365724  263.57638935 266.84463215
 272.8867979  273.07845535 273.78456498 275.49935428 274.46038375]


In [64]:
# Support Vector Regressor Model: Predictions for the next n days
svm_prediction = svr_rbf.predict(x_forecast)
print(svm_prediction)

[230.35864166 235.73878482 231.97654246 226.84299721 234.3125405
 235.73878482 236.1348267  234.36566231 226.85615205 231.33703036
 227.22725481 228.0380203  231.63051202 234.30544733 218.06619588
 233.25115021 233.25024418 232.28603573 233.0011788  232.05497036
 175.39659709 164.78670287 234.36186395 234.14612441 163.9199816
 143.28600101 143.28501922 143.28346563 143.28305539 143.28313477]
