In [39]:

import yfinance as yf
import pandas as pd
import numpy as np

In [41]:
ticker_symbol = input("Enter stock symbol (e.g., GAIL.NS): ")
start_date = input("Enter start date (YYYY-MM-DD): ")
end_date = input("Enter end date (YYYY-MM-DD): ")

Enter stock symbol (e.g., GAIL.NS):  GAIL.NS
Enter start date (YYYY-MM-DD):  2015-01-01
Enter end date (YYYY-MM-DD):  2020-01-01


In [42]:
stock_data = yf.download(ticker_symbol, start=start_date, end=end_date)

[*********************100%%**********************]  1 of 1 completed


In [43]:
stock_data

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-01-01,83.062538,83.465668,82.687538,82.987541,61.714222,4863981
2015-01-02,83.193794,84.037544,82.996918,83.362541,61.993095,3400766
2015-01-05,83.128166,84.534416,82.575043,83.625038,62.188301,4191283
2015-01-06,83.231293,83.231293,80.250038,80.981293,60.222256,16632727
2015-01-07,80.250038,81.131287,78.281288,78.918793,58.688477,9693360
...,...,...,...,...,...,...
2019-12-24,80.199997,80.800003,79.566666,79.833336,65.420357,7000588
2019-12-26,79.800003,80.233330,78.400002,78.733330,64.518944,12320313
2019-12-27,78.866669,79.466667,78.666664,79.033333,64.764778,7823829
2019-12-30,79.033333,80.300003,78.900002,79.266663,64.955994,8562607


In [44]:
stock_data['Next_Day_Trend'] = np.where(stock_data['Close'].shift(-1) > stock_data['Close'], 1, 0)

In [45]:
clean_stock_data = stock_data.fillna(method='ffill')

In [46]:
clean_stock_data['Short_MA'] = clean_stock_data['Close'].rolling(window=10).mean()
clean_stock_data['Long_MA'] = clean_stock_data['Close'].rolling(window=50).mean()

In [47]:
delta = clean_stock_data['Close'].diff()
gain = (delta.where(delta > 0, 0)).rolling(window=50).mean()
loss = (-delta.where(delta < 0, 0)).rolling(window=50).mean()
RS = gain / loss
RSI = 100 - (100 / (1 + RS))
clean_stock_data['RSI'] = RSI

# Drop NaN values generated by rolling calculations
clean_stock_data.dropna(inplace=True)

In [48]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.metrics import BinaryAccuracy


In [49]:
clean_stock_data['Next_Day_Trend'] = np.where(clean_stock_data['Close'].shift(-1) > clean_stock_data['Close'], 1, 0)

In [50]:
features = ['Short_MA', 'Long_MA', 'RSI', 'Next_Day_Trend']
target = 'Close'

In [53]:
train_size = int(0.8 * len(clean_stock_data))
train_data = clean_stock_data[:train_size]
test_data = clean_stock_data[train_size:]


In [54]:

# Training using Linear Regression as a baseline model
model_lr = LinearRegression()
model_lr.fit(train_data[features], train_data[target])



In [55]:

# Training using Support Vector Machine (SVR)
model_svr = SVR(kernel='linear')
model_svr.fit(train_data[features], train_data[target])

In [56]:
# Training using Random Forest
model_rf = RandomForestRegressor(n_estimators=100, random_state=42)
model_rf.fit(train_data[features], train_data[target])

In [57]:
# Training using Gradient Boosting
model_gb = GradientBoostingRegressor(n_estimators=100, random_state=42)
model_gb.fit(train_data[features], train_data[target])


In [58]:
# LSTM model
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(clean_stock_data[features + [target]])
X = []
y = []
for i in range(50, len(scaled_data)):
    X.append(scaled_data[i-50:i, :-1])
    y.append(scaled_data[i, -1])

X, y = np.array(X), np.array(y)
X = np.reshape(X, (X.shape[0], X.shape[1], len(features)))

model_lstm = Sequential()
model_lstm.add(LSTM(units=50, return_sequences=True, input_shape=(X.shape[1], X.shape[2])))
model_lstm.add(LSTM(units=50, return_sequences=False))
model_lstm.add(Dense(units=1))
model_lstm.compile(optimizer='adam', loss='mean_squared_error')
model_lstm.fit(X, y, epochs=10, batch_size=32)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0xf0ceadc4c0>

In [59]:
# Evaluation
def evaluate_model(model, test_features, test_target):
    predictions = model.predict(test_features)
    mse = mean_squared_error(test_target, predictions)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(test_target, predictions)
    r2 = r2_score(test_target, predictions)
    return mse, rmse, mae, r2


In [60]:
# Evaluate Linear Regression model
mse_lr, rmse_lr, mae_lr, r2_lr = evaluate_model(model_lr, test_data[features], test_data[target])
print("Linear Regression:")
print(f"MSE: {mse_lr}, RMSE: {rmse_lr}, MAE: {mae_lr}, R-squared: {r2_lr}")

Linear Regression:
MSE: 8.31992307828202, RMSE: 2.884427686436604, MAE: 2.307014419031996, R-squared: 0.9577966825279948


In [30]:
# Evaluate SVR model
mse_svr, rmse_svr, mae_svr, r2_svr = evaluate_model(model_svr, test_data[features], test_data[target])
print("Support Vector Machine (SVR):")
print(f"MSE: {mse_svr}, RMSE: {rmse_svr}, MAE: {mae_svr}, R-squared: {r2_svr}")

Support Vector Machine (SVR):
MSE: 8.936578157315399, RMSE: 2.9894110050836766, MAE: 2.3557594534533206, R-squared: 0.9632027883681649


In [31]:

# Evaluate Random Forest model
mse_rf, rmse_rf, mae_rf, r2_rf = evaluate_model(model_rf, test_data[features], test_data[target])
print("Random Forest:")
print(f"MSE: {mse_rf}, RMSE: {rmse_rf}, MAE: {mae_rf}, R-squared: {r2_rf}")

Random Forest:
MSE: 11.715181919455622, RMSE: 3.4227447932113813, MAE: 2.6687837893622266, R-squared: 0.9517616227590676


In [32]:
# Evaluate Gradient Boosting model
mse_gb, rmse_gb, mae_gb, r2_gb = evaluate_model(model_gb, test_data[features], test_data[target])
print("Gradient Boosting:")
print(f"MSE: {mse_gb}, RMSE: {rmse_gb}, MAE: {mae_gb}, R-squared: {r2_gb}")

Gradient Boosting:
MSE: 9.519333645087622, RMSE: 3.0853417387848014, MAE: 2.392933862255995, R-squared: 0.9608032371489307


In [33]:
# Evaluate LSTM model
test_data_lstm = scaled_data[train_size-50:, :-1]
X_test_lstm = []
for i in range(50, len(test_data_lstm)):
    X_test_lstm.append(test_data_lstm[i-50:i, :])

X_test_lstm = np.array(X_test_lstm)
X_test_lstm = np.reshape(X_test_lstm, (X_test_lstm.shape[0], X_test_lstm.shape[1], len(features)))

predictions_lstm = model_lstm.predict(X_test_lstm)
predictions_lstm = scaler.inverse_transform(np.concatenate((test_data_lstm[50:, :], predictions_lstm), axis=1))[:, -1]

mse_lstm = mean_squared_error(clean_stock_data['Close'].values[train_size:], predictions_lstm)
print("LSTM:")
print(f"MSE: {mse_lstm}")

LSTM:
MSE: 7.978692864096618


In [34]:
model_mse = {
    'Linear Regression': mse_lr,
    'SVR': mse_svr,
    'Random Forest': mse_rf,
    'Gradient Boosting': mse_gb,
    'LSTM': mse_lstm
}


In [35]:
# Find the model with the lowest MSE
best_model = min(model_mse, key=model_mse.get)
lowest_mse = model_mse[best_model]

In [36]:
# Print MSE values for all models
for model, mse in model_mse.items():
    print(f"{model} MSE: {mse}")

# Print the best model with the lowest MSE
print(f"\nThe best model with the lowest MSE is {best_model} with MSE: {lowest_mse}")

Linear Regression MSE: 8.812145561516894
SVR MSE: 8.936578157315399
Random Forest MSE: 11.715181919455622
Gradient Boosting MSE: 9.519333645087622
LSTM MSE: 7.978692864096618

The best model with the lowest MSE is LSTM with MSE: 7.978692864096618


In [37]:
# Assuming LSTM model is the best-performing model

future_days = 1  # Replace with the desired number of future days

last_short_ma = clean_stock_data['Short_MA'].iloc[-1]
last_long_ma = clean_stock_data['Long_MA'].iloc[-1]
last_rsi = clean_stock_data['RSI'].iloc[-1]
last_trend = clean_stock_data['Next_Day_Trend'].iloc[-1]

future_short_ma_values = []  # Store future Short_MA values
future_long_ma_values = []  # Store future Long_MA values
future_rsi_values = []  # Store future RSI values
future_trend_values = []  # Store future Next_Day_Trend values

for i in range(future_days):
    # Example: Suppose you're predicting the next day's trend based on today's values
    # Replace this with your own forecasting logic
    future_short_ma_values.append(last_short_ma + 1)  # Placeholder logic
    future_long_ma_values.append(last_long_ma + 1)  # Placeholder logic
    future_rsi_values.append(last_rsi + 1)  # Placeholder logic
    future_trend_values.append(last_trend + 1)  # Placeholder logic
    
    # Update the last known values for the next iteration
    last_short_ma = future_short_ma_values[-1]
    last_long_ma = future_long_ma_values[-1]
    last_rsi = future_rsi_values[-1]
    last_trend = future_trend_values[-1]

# Construct a DataFrame for future features
future_features = pd.DataFrame({
    'Short_MA': future_short_ma_values,
    'Long_MA': future_long_ma_values,
    'RSI': future_rsi_values,
    'Next_Day_Trend': future_trend_values
})

# Predict open prices for the next 'n' days using the selected model (e.g., Linear Regression)
predicted_open_prices = model_lr.predict(future_features)
print("Predicted future open prices:")
print(predicted_open_prices)

Predicted future open prices:
[86.33302167]


In [64]:

model_lr.score(test_data[features], test_data[target])

0.9577966825279948

In [65]:
model_svr.score(test_data[features], test_data[target])

0.956798215137612

In [66]:
model_rf.score(test_data[features], test_data[target])

0.9050494105464935

In [67]:
model_gb.score(test_data[features], test_data[target])

0.9341484250677458