In [1]:
!pip install ta




[notice] A new release of pip is available: 24.0 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import ta  # Technical analysis library

# --- Fetch Stock and Nifty 50 Data from Yahoo Finance ---
ticker = "^NSEI"  # Reliance Industries

data = yf.download(ticker, start="2010-01-01", end="2024-01-01")
data.to_csv('output.csv')
output_df = pd.read_csv('output.csv')
# Drop the first two rows as they are now part of the header
new_header = output_df.iloc[0].combine_first(output_df.iloc[1])
output_df.columns = new_header
output_df = output_df[2:]
display(output_df)
# Step 2: Assign consistent column names (using the ones in data.csv as reference)
output_df.columns = [
    "Date", "Close", "High", "Low", "Open", "Volume"
]
# # Ensure correct data types
output_df = output_df.reset_index(drop=True)
output_df["Date"] = pd.to_datetime(output_df["Date"], errors='coerce')
output_df[["Close", "High", "Low", "Open", "Volume"]] = output_df[[
    "Close", "High", "Low", "Open", "Volume"
]].apply(pd.to_numeric, errors='coerce')
data = output_df[['Close']].rename(columns={'Close': 'Stock_Close'})


# Calculate Additional Indicators
data['SMA_20'] = data['Stock_Close'].rolling(window=3).mean()
data['EMA_20'] = data['Stock_Close'].ewm(span=3, adjust=False).mean()
data['Volatility'] = data['Stock_Close'].rolling(window=20).std()
data['Bollinger_Upper'] = data['SMA_20'] + (2 * data['Volatility'])
data['Bollinger_Lower'] = data['SMA_20'] - (2 * data['Volatility'])
data['Daily_Return'] = data['Stock_Close'].pct_change()
data['RSI'] = ta.momentum.RSIIndicator(data['Stock_Close'].squeeze(), window=14).rsi()

# Clip Extremes
mean_return = data['Daily_Return'].mean()
std_return = data['Daily_Return'].std()
lower_clip_std = mean_return - 3 * std_return
upper_clip_std = mean_return + 3 * std_return
data['Clipped_Return_Std'] = data['Daily_Return'].clip(lower=lower_clip_std, upper=upper_clip_std)
data['Volatility_Adjusted_Movement'] = data['Clipped_Return_Std'] / data['Volatility']

# Drop unused columns
data.drop(columns=['Clipped_Return_Std', 'Daily_Return'], inplace=True)
data.dropna(inplace=True)

# --- Exclude Stock_Close from Features ---
features = data.drop(columns=['Stock_Close']).values  # All columns except 'Stock_Close'
target = data['Stock_Close'].values.reshape(-1, 1)    # Target variable

# --- Scale the Features and Target ---
feature_scaler = MinMaxScaler(feature_range=(0, 1))
scaled_features = feature_scaler.fit_transform(features)

target_scaler = MinMaxScaler(feature_range=(0, 1))
scaled_target = target_scaler.fit_transform(target)

# --- Create Sequences for LSTM ---
def create_sequences(features, target, window=45, steps=1):
    X, Y = [], []
    for i in range(window, len(features) - steps):
        X.append(features[i-window:i])  # Use all features except 'Stock_Close'
        Y.append(target[i+steps])      # Predict 'Stock_Close' (future)
    return np.array(X), np.array(Y)

X, Y = create_sequences(scaled_features, scaled_target, window=45, steps=1)
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
Y_train, Y_test = Y[:train_size], Y[train_size:]

# --- Define and Train the LSTM Model ---
model = Sequential()
model.add(LSTM(units=100, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(LSTM(units=100))
model.add(Dense(units=1))
model.compile(optimizer='adam', loss='mean_squared_error')

print("Training the LSTM model...")
model.fit(X_train, Y_train, epochs=10, batch_size=32)


[*********************100%***********************]  1 of 1 completed


Unnamed: 0,Ticker,^NSEI,^NSEI.1,^NSEI.2,^NSEI.3,^NSEI.4
2,2010-01-04,5232.2001953125,5238.4501953125,5167.10009765625,5200.89990234375,0
3,2010-01-05,5277.89990234375,5288.35009765625,5242.39990234375,5277.14990234375,0
4,2010-01-06,5281.7998046875,5310.85009765625,5260.0498046875,5278.14990234375,0
5,2010-01-07,5263.10009765625,5302.5498046875,5244.75,5281.7998046875,0
6,2010-01-08,5244.75,5276.75,5234.7001953125,5264.25,0
...,...,...,...,...,...,...
3431,2023-12-22,21349.400390625,21390.5,21232.44921875,21295.849609375,284700
3432,2023-12-26,21441.349609375,21477.150390625,21329.44921875,21365.19921875,219500
3433,2023-12-27,21654.75,21675.75,21495.80078125,21497.650390625,256500
3434,2023-12-28,21778.69921875,21801.44921875,21678.0,21715.0,393100


Training the LSTM model...
Epoch 1/10


  super().__init__(**kwargs)


[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 69ms/step - loss: 0.0047
Epoch 2/10
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 69ms/step - loss: 1.7222e-04
Epoch 3/10
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 29ms/step - loss: 1.4124e-04
Epoch 4/10
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 28ms/step - loss: 1.3975e-04
Epoch 5/10
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 31ms/step - loss: 1.3387e-04
Epoch 6/10
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 29ms/step - loss: 1.1145e-04
Epoch 7/10
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 29ms/step - loss: 1.2857e-04
Epoch 8/10
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 29ms/step - loss: 1.3006e-04
Epoch 9/10
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 31ms/step - loss: 1.6570e-04
Epoch 10/10
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 30m

<keras.src.callbacks.history.History at 0x2061b0e9e80>

In [7]:
model.save('nifty_model.keras')

In [4]:
initial_data = data
# --- Initialize with the last available data ---
predictions = []
#initial_data.set_index('Date', inplace=True)

# Number of future predictions to make
num_iterations = 30

# Drop rows with missing values
initial_data.dropna(inplace=True)

# Define the feature columns (excluding 'Stock_Close' which is the target)
feature_columns = list(data.drop(columns=['Stock_Close']).columns)
features = [i for i in feature_columns]

# Ensure all values in Stock_Close are numeric initially
initial_data['Stock_Close'] = pd.to_numeric(initial_data['Stock_Close'].squeeze(), errors='coerce')

# Display the initial data for debugging
display(initial_data)

for _ in range(num_iterations):
    # Prepare the latest features using the last 45 rows of the initial data
    latest_features = initial_data[features].tail(45).values
    scaled_latest_features = feature_scaler.transform(latest_features)

    # Make the prediction
    prediction = model.predict(scaled_latest_features.reshape(1, 45, scaled_latest_features.shape[1]))
    predicted_close = target_scaler.inverse_transform(prediction)[0][0]  # Inverse transform the predicted value

    # Append the predicted close price to the predictions list
    predictions.append(predicted_close)

    # Update the features for the next prediction
    new_row = {'Stock_Close': float(predicted_close)}  # Ensure this is a float

    # Calculate additional indicators for the new row
    new_row['SMA_20'] = initial_data['Stock_Close'].iloc[-20:].mean()  # Simple Moving Average over the last 20 values
    alpha = 2 / (20 + 1)  # Smoothing factor for EMA
    new_row['EMA_20'] = initial_data['Stock_Close'].iloc[-1] * alpha + initial_data['EMA_20'].iloc[-1] * (1 - alpha)  # Exponential Moving Average
    new_row['Volatility'] = initial_data['Stock_Close'].iloc[-20:].std()
    new_row['Bollinger_Upper'] = new_row['SMA_20'] + (2 * new_row['Volatility'])
    new_row['Bollinger_Lower'] = new_row['SMA_20'] - (2 * new_row['Volatility'])

    # Calculate RSI for the new row
    recent_prices = pd.Series([predicted_close] + list(initial_data['Stock_Close'].iloc[-13:].astype(float)))
    new_row['RSI'] = ta.momentum.RSIIndicator(recent_prices, window=14).rsi().iloc[-1]

    # Calculate Volatility Adjusted Movement (simple example: scaling by volatility)
    old_close = initial_data['Stock_Close'].iloc[-1]
    predicted_change = ((predicted_close - old_close)/old_close)  # Change in stock price
    new_row['Volatility_Adjusted_Movement'] = predicted_change/new_row['Volatility']

    # Calculate Nifty Close (20 * Stock_Close)

    # Convert the new_row dict to a DataFrame with a single row
    new_row_df = pd.DataFrame([new_row])

    # Ensure the new_row_df contains only the required columns (no extra columns)
    new_row_df = new_row_df[['Stock_Close'] + features]

    # Append the new row to the initial data
    initial_data = pd.concat([initial_data, new_row_df], ignore_index=True)

    # Only keep the last '45 + 1' rows to simulate rolling window predictions
    initial_data = initial_data.iloc[-(45 + 1):]
    print(new_row_df)

# Print the predictions
print("Future predictions:", predictions)


Unnamed: 0,Stock_Close,SMA_20,EMA_20,Volatility,Bollinger_Upper,Bollinger_Lower,RSI,Volatility_Adjusted_Movement
19,4899.700195,4883.000000,4897.951018,157.243732,5197.487464,4568.512536,21.824193,0.000023
20,4830.100098,4870.616699,4864.025558,170.443903,5211.504506,4529.728892,18.504237,-0.000083
21,4931.850098,4887.216797,4897.937828,171.484323,5230.185442,4544.248152,34.251036,0.000123
22,4845.350098,4869.100098,4871.643963,175.653704,5220.407506,4517.792690,29.102801,-0.000100
23,4718.649902,4831.950033,4795.146933,188.204148,5208.358328,4455.541737,23.525029,-0.000139
...,...,...,...,...,...,...,...,...
3429,21349.400391,21251.533854,21306.631664,523.171997,22297.877848,20205.189860,72.117736,0.000008
3430,21441.349609,21348.600260,21373.990637,479.156503,22306.913267,20390.287254,73.712515,0.000009
3431,21654.750000,21481.833333,21514.370318,441.705114,22365.243562,20598.423104,77.000453,0.000023
3432,21778.699219,21624.932943,21646.534769,419.843074,22464.619090,20785.246795,78.669287,0.000014


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 188ms/step
    Stock_Close        SMA_20      EMA_20  Volatility  Bollinger_Upper  \
0  21674.070312  21165.990039  21693.0088  379.039451     21924.068942   

   Bollinger_Lower        RSI  Volatility_Adjusted_Movement  
0     20407.911137  58.280178                     -0.000007  
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
    Stock_Close        SMA_20        EMA_20  Volatility  Bollinger_Upper  \
0  21571.669922  21236.298535  21691.205134   331.06384     21898.426216   

   Bollinger_Lower        RSI  Volatility_Adjusted_Movement  
0     20574.170854  56.700271                     -0.000014  
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
    Stock_Close        SMA_20        EMA_20  Volatility  Bollinger_Upper  \
0  21462.160156  21280.541992  21679.820828  312.362692     21905.267376   

   Bollinger_Lower       RSI  Volatility_Adjusted_Movement  
0     20655.816608 

In [5]:
output_df

Unnamed: 0,Date,Close,High,Low,Open,Volume
0,2010-01-04,5232.200195,5238.450195,5167.100098,5200.899902,0
1,2010-01-05,5277.899902,5288.350098,5242.399902,5277.149902,0
2,2010-01-06,5281.799805,5310.850098,5260.049805,5278.149902,0
3,2010-01-07,5263.100098,5302.549805,5244.750000,5281.799805,0
4,2010-01-08,5244.750000,5276.750000,5234.700195,5264.250000,0
...,...,...,...,...,...,...
3429,2023-12-22,21349.400391,21390.500000,21232.449219,21295.849609,284700
3430,2023-12-26,21441.349609,21477.150391,21329.449219,21365.199219,219500
3431,2023-12-27,21654.750000,21675.750000,21495.800781,21497.650391,256500
3432,2023-12-28,21778.699219,21801.449219,21678.000000,21715.000000,393100


In [6]:
data = yf.download(ticker, start="2010-01-01", end="2024-01-01")
data.to_csv('output.csv')
output_df = pd.read_csv('output.csv')
# Drop the first two rows as they are now part of the header
new_header = output_df.iloc[0].combine_first(output_df.iloc[1])
output_df.columns = new_header
output_df = output_df[2:]
# Step 2: Assign consistent column names (using the ones in data.csv as reference)
output_df.columns = [
    "Date", "Close", "High", "Low", "Open", "Volume"
]
# # Ensure correct data types
output_df = output_df.reset_index(drop=True)
output_df["Date"] = pd.to_datetime(output_df["Date"], errors='coerce')
output_df[["Close", "High", "Low", "Open", "Volume"]] = output_df[[
    "Close", "High", "Low", "Open", "Volume"
]].apply(pd.to_numeric, errors='coerce')
data = output_df[['Close']].rename(columns={'Close': 'Stock_Close'})

[*********************100%***********************]  1 of 1 completed
