<a href="https://colab.research.google.com/github/mithunboraiah007/project1/blob/main/stock_price_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:

import yfinance as yf
import pandas as pd
import numpy as np

In [6]:
ticker_symbol = input("Enter stock symbol (e.g., GAIL.NS): ")
start_date = input("Enter start date (YYYY-MM-DD): ")
end_date = input("Enter end date (YYYY-MM-DD): ")

Enter stock symbol (e.g., GAIL.NS): GAIL.NS
Enter start date (YYYY-MM-DD): 2023-01-01
Enter end date (YYYY-MM-DD): 2023-05-01


In [7]:
stock_data = yf.download(ticker_symbol, start=start_date, end=end_date)

[*********************100%%**********************]  1 of 1 completed


In [8]:
stock_data

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-01-02,96.500000,96.800003,95.500000,96.599998,90.261795,9433822
2023-01-03,96.699997,97.400002,96.400002,96.800003,90.448677,6053619
2023-01-04,97.099998,97.599998,95.150002,95.400002,89.140533,9398653
2023-01-05,95.900002,97.500000,95.150002,97.349998,90.962585,12382060
2023-01-06,97.699997,98.750000,96.849998,97.250000,90.869148,14032853
...,...,...,...,...,...,...
2023-04-24,108.400002,109.599998,108.400002,109.300003,105.989708,6760713
2023-04-25,109.849998,110.199997,108.750000,109.199997,105.892731,8661439
2023-04-26,109.300003,110.500000,108.599998,110.199997,106.862442,7305825
2023-04-27,110.099998,110.099998,108.800003,109.300003,105.989708,8349269


In [9]:
stock_data['Next_Day_Trend'] = np.where(stock_data['Close'].shift(-1) > stock_data['Close'], 1, 0)

In [10]:
clean_stock_data = stock_data.fillna(method='ffill')

  clean_stock_data = stock_data.fillna(method='ffill')


In [11]:
clean_stock_data['Short_MA'] = clean_stock_data['Close'].rolling(window=10).mean()
clean_stock_data['Long_MA'] = clean_stock_data['Close'].rolling(window=50).mean()

In [12]:
delta = clean_stock_data['Close'].diff()
gain = (delta.where(delta > 0, 0)).rolling(window=50).mean()
loss = (-delta.where(delta < 0, 0)).rolling(window=50).mean()
RS = gain / loss
RSI = 100 - (100 / (1 + RS))
clean_stock_data['RSI'] = RSI

# Drop NaN values generated by rolling calculations
clean_stock_data.dropna(inplace=True)

In [13]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.metrics import BinaryAccuracy


In [14]:
clean_stock_data['Next_Day_Trend'] = np.where(clean_stock_data['Close'].shift(-1) > clean_stock_data['Close'], 1, 0)

In [15]:
features = ['Short_MA', 'Long_MA', 'RSI', 'Next_Day_Trend']
target = 'Close'

In [16]:
train_size = int(0.8 * len(clean_stock_data))
train_data = clean_stock_data[:train_size]
test_data = clean_stock_data[train_size:]


In [17]:

# Training using Linear Regression as a baseline model
model_lr = LinearRegression()
model_lr.fit(train_data[features], train_data[target])



In [18]:

# Training using Support Vector Machine (SVR)
model_svr = SVR(kernel='linear')
model_svr.fit(train_data[features], train_data[target])

In [19]:
# Training using Random Forest
model_rf = RandomForestRegressor(n_estimators=100, random_state=42)
model_rf.fit(train_data[features], train_data[target])

In [20]:
# Training using Gradient Boosting
model_gb = GradientBoostingRegressor(n_estimators=100, random_state=42)
model_gb.fit(train_data[features], train_data[target])


In [22]:
# Evaluation
def evaluate_model(model, test_features, test_target):
    predictions = model.predict(test_features)
    mse = mean_squared_error(test_target, predictions)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(test_target, predictions)
    r2 = r2_score(test_target, predictions)
    return mse, rmse, mae, r2


In [23]:
# Evaluate Linear Regression model
mse_lr, rmse_lr, mae_lr, r2_lr = evaluate_model(model_lr, test_data[features], test_data[target])
print("Linear Regression:")
print(f"MSE: {mse_lr}, RMSE: {rmse_lr}, MAE: {mae_lr}, R-squared: {r2_lr}")

Linear Regression:
MSE: 0.9708511305214448, RMSE: 0.9853177814905426, MAE: 0.8899674085199839, R-squared: -0.1762972868362258


In [24]:
# Evaluate SVR model
mse_svr, rmse_svr, mae_svr, r2_svr = evaluate_model(model_svr, test_data[features], test_data[target])
print("Support Vector Machine (SVR):")
print(f"MSE: {mse_svr}, RMSE: {rmse_svr}, MAE: {mae_svr}, R-squared: {r2_svr}")

Support Vector Machine (SVR):
MSE: 0.997411710999376, RMSE: 0.9987050170092148, MAE: 0.8236452205413419, R-squared: -0.2084784707178422


In [25]:

# Evaluate Random Forest model
mse_rf, rmse_rf, mae_rf, r2_rf = evaluate_model(model_rf, test_data[features], test_data[target])
print("Random Forest:")
print(f"MSE: {mse_rf}, RMSE: {rmse_rf}, MAE: {mae_rf}, R-squared: {r2_rf}")

Random Forest:
MSE: 0.7063017148713326, RMSE: 0.8404175836281227, MAE: 0.6337503306070976, R-squared: 0.1442346156153851


In [26]:
# Evaluate Gradient Boosting model
mse_gb, rmse_gb, mae_gb, r2_gb = evaluate_model(model_gb, test_data[features], test_data[target])
print("Gradient Boosting:")
print(f"MSE: {mse_gb}, RMSE: {rmse_gb}, MAE: {mae_gb}, R-squared: {r2_gb}")

Gradient Boosting:
MSE: 0.8072384257318995, RMSE: 0.8984644821760621, MAE: 0.7650906714458685, R-squared: 0.021938235259226224


In [36]:
model_mse = {
    'Linear Regression': mse_lr,
    'SVR': mse_svr,
    'Random Forest': mse_rf,
    'Gradient Boosting': mse_gb,

}


In [31]:
# Assuming LSTM model is the best-performing model

future_days = 1  # Replace with the desired number of future days

last_short_ma = clean_stock_data['Short_MA'].iloc[-1]
last_long_ma = clean_stock_data['Long_MA'].iloc[-1]
last_rsi = clean_stock_data['RSI'].iloc[-1]
last_trend = clean_stock_data['Next_Day_Trend'].iloc[-1]

future_short_ma_values = []  # Store future Short_MA values
future_long_ma_values = []  # Store future Long_MA values
future_rsi_values = []  # Store future RSI values
future_trend_values = []  # Store future Next_Day_Trend values

for i in range(future_days):
    # Example: Suppose you're predicting the next day's trend based on today's values
    # Replace this with your own forecasting logic
    future_short_ma_values.append(last_short_ma + 1)  # Placeholder logic
    future_long_ma_values.append(last_long_ma + 1)  # Placeholder logic
    future_rsi_values.append(last_rsi + 1)  # Placeholder logic
    future_trend_values.append(last_trend + 1)  # Placeholder logic

    # Update the last known values for the next iteration
    last_short_ma = future_short_ma_values[-1]
    last_long_ma = future_long_ma_values[-1]
    last_rsi = future_rsi_values[-1]
    last_trend = future_trend_values[-1]

# Construct a DataFrame for future features
future_features = pd.DataFrame({
    'Short_MA': future_short_ma_values,
    'Long_MA': future_long_ma_values,
    'RSI': future_rsi_values,
    'Next_Day_Trend': future_trend_values
})

# Predict open prices for the next 'n' days using the selected model (e.g., Linear Regression)
predicted_open_prices = model_lr.predict(future_features)
print("Predicted future open prices:")
print(predicted_open_prices)

Predicted future open prices:
[106.31302737]


In [32]:

model_lr.score(test_data[features], test_data[target])

-0.1762972868362258

In [33]:
model_svr.score(test_data[features], test_data[target])

-0.2084784707178422

In [34]:
model_rf.score(test_data[features], test_data[target])

0.1442346156153851

In [35]:
model_gb.score(test_data[features], test_data[target])

0.021938235259226224