In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf
from datetime import datetime

# 1. Data Collection

In [2]:
now = datetime.now()

ticker = 'AAPL' 
start = datetime(now.year - 10, now.month, now.day)
end = now
df = yf.download(ticker, start, end)
df

[*********************100%***********************]  1 of 1 completed


Price,Close,High,Low,Open,Volume
Ticker,AAPL,AAPL,AAPL,AAPL,AAPL
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2016-01-14,22.438671,22.655123,21.586399,22.086940,252680400
2016-01-15,21.899794,22.030566,21.500714,21.690107,319335600
2016-01-19,21.793831,22.242514,21.532286,22.188402,212350800
2016-01-20,21.823135,22.138792,21.063305,21.442092,289337600
2016-01-21,21.712658,22.068898,21.406020,21.884013,208646000
...,...,...,...,...,...
2026-01-07,260.329987,263.679993,259.809998,263.200012,48309800
2026-01-08,259.040009,259.290009,255.699997,257.019989,50419300
2026-01-09,259.369995,260.209991,256.220001,259.079987,39997000
2026-01-12,260.250000,261.299988,256.799988,259.160004,45263800


# 2. Data Exploration And Visualization

In [None]:
type(df)

In [None]:
df.shape

In [None]:
df.isna().sum()

In [None]:
df.describe()

In [None]:
df.dtypes

In [None]:
df.head()

In [3]:
df = df.reset_index()

In [8]:
df.head()

Price,Date,Close,High,Low,Open,Volume
Ticker,Unnamed: 1_level_1,AAPL,AAPL,AAPL,AAPL,AAPL
0,2016-01-11,22.21545,22.334949,21.947142,22.314657,198957600
1,2016-01-12,22.537878,22.702471,22.285352,22.670906,196616800
2,2016-01-13,21.958424,22.815206,21.938132,22.619048,249758400
3,2016-01-14,22.438665,22.655117,21.586393,22.086934,252680400
4,2016-01-15,21.899794,22.030566,21.500714,21.690107,319335600


In [None]:
df.columns

In [None]:
plt.figure(figsize=(10,5));
plt.plot(df.Close);
plt.title(ticker);
plt.xlabel('Days');
plt.ylabel('Close Price');

# 3. Feature Engineering

In [None]:
df1 = pd.DataFrame([11,223,11,33,45,6677,89,90])
df1


In [None]:
df1['MA_5'] = df1.rolling(5).mean()
df1

### 100 days moving Average

In [4]:
df['MA_100'] = df.Close.rolling(100).mean()
df.head(105)

Price,Date,Close,High,Low,Open,Volume,MA_100
Ticker,Unnamed: 1_level_1,AAPL,AAPL,AAPL,AAPL,AAPL,Unnamed: 7_level_1
0,2016-01-14,22.438671,22.655123,21.586399,22.086940,252680400,
1,2016-01-15,21.899794,22.030566,21.500714,21.690107,319335600,
2,2016-01-19,21.793831,22.242514,21.532286,22.188402,212350800,
3,2016-01-20,21.823135,22.138792,21.063305,21.442092,289337600,
4,2016-01-21,21.712658,22.068898,21.406020,21.884013,208646000,
...,...,...,...,...,...,...,...
100,2016-06-08,22.565500,22.706904,22.506201,22.583745,83392400,22.703916
101,2016-06-09,22.727432,22.804976,22.456025,22.465148,106405600,22.712192
102,2016-06-10,22.540421,22.659018,22.460595,22.471998,126851600,22.719658
103,2016-06-13,22.200583,22.606553,22.145846,22.508482,152082000,22.723433


In [None]:
plt.figure(figsize=(10,5));
plt.plot(df.Close);
plt.title("100 day Moving Average");
plt.xlabel('Days');
plt.ylabel('Close Price');
plt.plot(df['MA_100']);

### 200 days Mooving Average

In [5]:
df['MA_200'] = df.Close.rolling(200).mean()
df.head(205)

Price,Date,Close,High,Low,Open,Volume,MA_100,MA_200
Ticker,Unnamed: 1_level_1,AAPL,AAPL,AAPL,AAPL,AAPL,Unnamed: 7_level_1,Unnamed: 8_level_1
0,2016-01-14,22.438671,22.655123,21.586399,22.086940,252680400,,
1,2016-01-15,21.899794,22.030566,21.500714,21.690107,319335600,,
2,2016-01-19,21.793831,22.242514,21.532286,22.188402,212350800,,
3,2016-01-20,21.823135,22.138792,21.063305,21.442092,289337600,,
4,2016-01-21,21.712658,22.068898,21.406020,21.884013,208646000,,
...,...,...,...,...,...,...,...,...
200,2016-10-28,26.076921,26.418590,26.015007,26.111318,151446800,24.320727,23.512321
201,2016-10-31,26.035643,26.193866,25.957677,26.060867,105677600,24.353809,23.533001
202,2016-11-01,25.565559,26.088382,25.345424,26.017297,175303200,24.384060,23.551859
203,2016-11-02,25.588495,25.762770,25.505946,25.544928,113326800,24.417939,23.570686


In [None]:
plt.figure(figsize=(10,5));
plt.plot(df.Close);
plt.plot(df['MA_100'], 'g');
plt.plot(df['MA_200'], 'r');
plt.title("200 day Moving Average");
plt.xlabel('Days');
plt.ylabel('Close Price');


### Calculate % Percentage change in each trading Session

In [6]:
df['Percentage_Changed'] = df.Close.pct_change()
df[['Close','Percentage_Changed']]

Price,Close,Percentage_Changed
Ticker,AAPL,Unnamed: 2_level_1
0,22.438671,
1,21.899794,-0.024016
2,21.793831,-0.004839
3,21.823135,0.001345
4,21.712658,-0.005062
...,...,...
2509,260.329987,-0.007737
2510,259.040009,-0.004955
2511,259.369995,0.001274
2512,260.250000,0.003393


In [None]:
plt.figure(figsize=(10,5));
plt.plot(df.Percentage_Changed);

# 4. Data Preprocessing

In [None]:
# !pip install scikit-learn keras

In [None]:
df.shape

In [7]:
# Spliting data into training and testing 
data_training = pd.DataFrame(df.Close[0:int(len(df)*0.7)])
data_testing = pd.DataFrame(df.Close[int(len(df)*0.7):int(len(df))])

print(data_training)
print(data_testing)

Ticker        AAPL
0        22.438671
1        21.899794
2        21.793831
3        21.823135
4        21.712658
...            ...
1754    123.211205
1755    124.482048
1756    123.161942
1757    127.693573
1758    128.215683

[1759 rows x 1 columns]
Ticker        AAPL
1759    128.787079
1760    131.506073
1761    131.427261
1762    132.757172
1763    133.919662
...            ...
2509    260.329987
2510    259.040009
2511    259.369995
2512    260.250000
2513    261.049988

[755 rows x 1 columns]


In [None]:
# Scaling Down Data into 0 and 1 

In [8]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0,1))

In [9]:
data_training_array = scaler.fit_transform(data_training)
data_training_array

array([[0.01163593],
       [0.00821809],
       [0.00754602],
       ...,
       [0.6504745 ],
       [0.67921643],
       [0.68252792]])

In [None]:
data_training_array.shape

# 5. Sequence Creation

In [10]:
x_train = []
y_train = []

for i in range(100,data_training_array.shape[0]):
    x_train.append(data_training_array[i - 100 : i])
    y_train.append(data_training_array[i, 0])

x_train, y_train = np.array(x_train), np.array(y_train)

In [None]:
x_train

In [None]:
x_train.ndim

In [None]:
y_train.ndim

# 6. Model Building 

In [None]:
# !pip install tensorflow

In [11]:
import tensorflow as tf
tf.__version__

'2.20.0'

In [12]:
# ML model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Input

In [13]:
model = Sequential()

model.add(Input(shape=(100,1)))
model.add(LSTM(units=128, activation='tanh', return_sequences=True))
model.add(LSTM(units=64))
model.add(Dense(25))
model.add(Dense(1))

# 7. Train Model

In [14]:
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(x_train, y_train, epochs=50)

Epoch 1/50
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 114ms/step - loss: 0.0089
Epoch 2/50
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 110ms/step - loss: 6.2684e-04
Epoch 3/50
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 109ms/step - loss: 5.8989e-04
Epoch 4/50
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 140ms/step - loss: 5.7366e-04
Epoch 5/50
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 119ms/step - loss: 5.7381e-04
Epoch 6/50
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 116ms/step - loss: 5.2362e-04
Epoch 7/50
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 135ms/step - loss: 4.7424e-04
Epoch 8/50
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 108ms/step - loss: 4.7616e-04
Epoch 9/50
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 153ms/step - loss: 4.6505e-04
Epoch 10/50
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[3

<keras.src.callbacks.history.History at 0x231f4fcaa10>

In [15]:
model.summary()
model.save("Stock_prediction_model.h5")



# 8. Preparing Test Data

In [None]:
data_training.tail(100)

In [None]:
data_testing

In [None]:
past_100_days = data_training.tail(100)

In [None]:
past_100_days

In [None]:
final_df =  pd.concat([past_100_days, data_testing], ignore_index=True)
final_df

In [None]:
input_data = scaler.fit_transform(final_df)
input_data

In [None]:
input_data.shape

In [None]:
x_test = []
y_test = []

for i in range(100, input_data.shape[0]):
    x_test.append(input_data[i - 100  : i])
    y_test.append(input_data[i, 0])

In [None]:
x_test, y_test = np.array(x_test), np.array(y_test)

In [None]:
x_test

# 9. Making Prediction

In [None]:
y_predicted = model.predict(x_test)

In [None]:
y_predicted

In [None]:
y_test

In [None]:
y_predicted = scaler.inverse_transform(y_predicted.reshape(-1,1)).flatten()
y_test = scaler.inverse_transform(y_test.reshape(-1,1)).flatten()

In [None]:
y_predicted

In [None]:
y_test

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(y_test, 'b', label= 'Original Price')
plt.plot(y_predicted, 'r', label= 'Predicted Price')
plt.legend()
plt.xlabel('Days')
plt.ylabel('Price')

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(y_test, 'b', label= 'Original Price')
plt.plot(y_predicted, 'r', label= 'Predicted Price')
plt.legend()
plt.xlabel('Days')
plt.ylabel('Price')
plt.xlim(450, 750)
plt.ylim(160, 330)

# 10. Model Evalution

In [None]:
# Mean Squared Error(MSE)
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
mse = mean_squared_error(y_test, y_predicted)
print(f"MSE : {mse}")

In [None]:
# Root Mean Squared Error(RMSE)
rmse = np.sqrt(mse)
print("RMSE :", rmse )

In [None]:
# R-Squared
r2 = r2_score(y_test,y_predicted)
print("R-Squared :", r2)