# Importing libraries 


In [None]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns
%matplotlib inline
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import ta


# Updating Data frame

In [None]:
# Read the CSV file
df = pd.read_csv("merged_data.csv")

# Display the DataFrame
print(df)

# Assuming you have a DataFrame called df
df = df[::-1]

# Assuming you have a DataFrame called df with a 'Date' column
df['Year'] = pd.to_datetime(df['Date']).dt.year


df = df.drop('Date', axis=1)

# Calculate the percentage change in stock price
df['Percentage Change'] = df['Close'].pct_change()

# Calculate RSI
df['rsi'] = ta.momentum.rsi(df['Close'])

# Calculate Stochastic RSI
df['stoch_rsi'] = ta.momentum.stochrsi(df['Close'])

# Calculate Fibonacci levels
fibonacci_levels = []
prev_level = df['Close'].min()  # Starting from the minimum value of Close

for i in range(len(df)):
    if i % 169 == 0:  # Calculate Fibonacci level every 169 rows (adjust as needed)
        level = prev_level + (df['Close'].max() - prev_level) * 0.618
        fibonacci_levels.append(level)
        prev_level = level
    else:
        fibonacci_levels.append(np.nan)

df['fibonacci'] = np.array(fibonacci_levels)

# Calculate MACD
df['macd'] = ta.trend.macd(df['Close'])

# Define the EPS, DPS, and P/E values corresponding to each year
eps_values = {
    2013: 3.50,
    2014: 4.30,
    2015: 3.70,
    2016: 4.10,
    2017: 4.50,
    2018: 5.30,
    2019: 5.70,
    2020: 6.10,
    2021: 7.30,
    2022: 8.50,
    2023: 31.65
}

dps_values = {
    2013: 1.80,
    2014: 2.20,
    2015: 1.90,
    2016: 2.10,
    2017: 2.30,
    2018: 2.70,
    2019: 2.90,
    2020: 3.10,
    2021: 3.70,
    2022: 4.30,
    2023: 4.50
}

pe_values = {
    2013: 13.00,
    2014: 11.00,
    2015: 12.70,
    2016: 10.70,
    2017: 9.78,
    2018: 8.25,
    2019: 7.89,
    2020: 7.05,
    2021: 5.96,
    2022: 5.05,
    2023: 63.19
}

# Match the years and update the EPS, DPS, and P/E columns
df['EPS (NRs)'] = df['Year'].map(eps_values)
df['DPS (NRs)'] = df['Year'].map(dps_values)
df['P/E'] = df['Year'].map(pe_values)

# Print the updated DataFrame with the calculated indicators
print(df)


# Standerdizing the Data frame

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler


# Create a copy of the DataFrame
df_standardized = df.copy()

# Select the columns to standardize
columns_to_standardize = ['Open', 'High', 'Low','Volume', 'Sentiment', 'bonus', 'dividend', 'Percentage Change','5-day SMA','8-day SMA','13-day SMA','50-day MA','200-day MA','stoch_rsi','rsi','macd','EPS (NRs)','DPS (NRs)','P/E']

# Initialize the StandardScaler
scaler = StandardScaler()

# Standardize the selected columns
df_standardized[columns_to_standardize] = scaler.fit_transform(df_standardized[columns_to_standardize])

# Print the standardized DataFrame
print(df_standardized)
 
df=df_standardized
df

# Feature importance 

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split



#data into features and target
X = df.drop( ['sn','Symbol','8-day SMA','13-day SMA','50-day MA','200-day MA','stoch_rsi','rsi','macd','EPS (NRs)','DPS (NRs)','P/E'], axis=1)
y = df['Close']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=186)

# Train the model
reg_RF = RandomForestRegressor(random_state=186)
reg_RF.fit(X_train, y_train)

# Make predictions
y_pred_RF = reg_RF.predict(X_test)

# Evaluate the model
print('The score for RandomForest is {}'.format(reg_RF.score(X_test, y_test)))

# Visualize feature importances
d = pd.DataFrame(index=X.columns, data=reg_RF.feature_importances_, columns=['Importance']).sort_values('Importance')
d.plot(kind='barh', figsize=(10,5))
plt.title('Feature Importance')
plt.show()

# Hyperparameters tuning 

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, LSTM, Dense, Layer, Bidirectional, Dropout
import tensorflow as tf

# Define the attention layer
class Attention(Layer):
    def __init__(self, **kwargs):
        super(Attention, self).__init__(**kwargs)

    def build(self, input_shape):
        self.W = self.add_weight(name='attention_weight', shape=(input_shape[-1], 1), initializer='random_normal', trainable=True)
        self.b = self.add_weight(name='attention_bias', shape=(input_shape[1], 1), initializer='zeros', trainable=True)
        super(Attention, self).build(input_shape)

    def call(self, x):
        e = tf.tanh(tf.matmul(x, self.W) + self.b)
        a = tf.nn.softmax(e, axis=1)
        output = x * a
        return tf.reduce_sum(output, axis=1)

# Split the data into features (X) and target variable (y)
X = df.drop(['sn','Symbol'], axis=1).values
y = df['Close'].values

# Define the parameter grid for grid search
dropout_rates = [0.2, 0.3, 0.4]
batch_sizes = [32, 64, 128]
random_states = [164, 328, 492]

best_r2 = -np.inf
best_dropout_rate = None
best_batch_size = None
best_random_state = None

# Perform grid search
for dropout_rate in dropout_rates:
    for batch_size in batch_sizes:
        for random_state in random_states:
            print(f"Testing dropout_rate={dropout_rate}, batch_size={batch_size}, random_state={random_state}")

            # Split the data into training and testing sets
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=random_state)

            # Reshape the input data
            time_steps = 23 # Number of time steps (previous days' data)
            features = 1 # Number of features
            X_train_reshaped = X_train.reshape(X_train.shape[0], time_steps, features)
            X_test_reshaped = X_test.reshape(X_test.shape[0], time_steps, features)

            # Create the model
            model = Sequential()
            model.add(Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(time_steps, features)))
            model.add(Bidirectional(LSTM(units=64, return_sequences=True)))
            model.add(Dropout(dropout_rate))
            model.add(Attention())
            model.add(Dense(units=1, activation='linear')) # Linear activation for regression

            # Compile the model
            model.compile(optimizer='adam', loss='mean_squared_error')

            # Train the model
            model.fit(X_train_reshaped, y_train, epochs=3000, batch_size=batch_size, verbose=0)

            # Evaluate the model
            y_pred = model.predict(X_test_reshaped)
            r2 = r2_score(y_test, y_pred)

            print("R-squared:", r2)

            # Update the best parameters if the current model is better
            if r2 > best_r2:
                best_r2 = r2
                best_dropout_rate = dropout_rate
                best_batch_size = batch_size
                best_random_state = random_state

# Print the best parameters and corresponding R-squared value
print("Best Dropout Rate:", best_dropout_rate)
print("Best Batch Size:", best_batch_size)
print("Best Random State:", best_random_state)
print("Best R-squared:", best_r2)


# CNN for stock market prediction

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, LSTM, Dense,Flatten, Layer, Bidirectional, Dropout
import tensorflow as tf


# Split the data into features (X) and target variable (y)
X = df.drop(['sn', 'Symbol','Close','Open','High','Low'], axis=1).values
y = df['Close'].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=164)

# Reshape the input data
time_steps = 18 # Number of time steps (previous days' data)
features = 1 # Number of features
X_train_reshaped = X_train.reshape(X_train.shape[0], time_steps, features)
X_test_reshaped = X_test.reshape(X_test.shape[0], time_steps, features)

# Create the model
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(time_steps, features)))
model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(Flatten())
model.add(Dense(units=1, activation='linear')) # Linear activation for regression


# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train_reshaped, y_train, epochs=3300, batch_size=16)

# Evaluate the model
y_pred = model.predict(X_test_reshaped)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)
print("Root Mean Squared Error:", rmse)
print("R-squared (R2):", r2)


# Convolutional neural network- Attention mechanism 

In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, LSTM, Dense, Layer, Bidirectional, Dropout
import tensorflow as tf

# Define the attention layer
class Attention(Layer):
    def __init__(self, **kwargs):
        super(Attention, self).__init__(**kwargs)

    def build(self, input_shape):
        self.W = self.add_weight(name='attention_weight', shape=(input_shape[-1], 1), initializer='random_normal', trainable=True)
        self.b = self.add_weight(name='attention_bias', shape=(input_shape[1], 1), initializer='zeros', trainable=True)
        super(Attention, self).build(input_shape)

    def call(self, x):
        e = tf.tanh(tf.matmul(x, self.W) + self.b)
        a = tf.nn.softmax(e, axis=1)
        output = x * a
        return tf.reduce_sum(output, axis=1)

# Split the data into features (X) and target variable (y)
X = df.drop(['sn', 'Symbol','Close','Open','High','Low'], axis=1).values
y = df['Close'].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=164)

# Reshape the input data
time_steps = 18 # Number of time steps (previous days' data)
features = 1 # Number of features
X_train_reshaped = X_train.reshape(X_train.shape[0], time_steps, features)
X_test_reshaped = X_test.reshape(X_test.shape[0], time_steps, features)

# Create the model
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(time_steps, features)))
model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(Attention())
model.add(Flatten())
model.add(Dense(units=1, activation='linear')) # Linear activation for regression


# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train_reshaped, y_train, epochs=3300, batch_size=32)

# Evaluate the model
y_pred = model.predict(X_test_reshaped)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)
print("Root Mean Squared Error:", rmse)
print("R-squared (R2):", r2)


NameError: name 'df' is not defined

# CNN-biDirectional LSTM-AM 

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, LSTM, Dense,Flatten, Layer, Bidirectional, Dropout
import tensorflow as tf

# Define the attention layer
class Attention(Layer):
    def __init__(self, **kwargs):
        super(Attention, self).__init__(**kwargs)

    def build(self, input_shape):
        self.W = self.add_weight(name='attention_weight', shape=(input_shape[-1], 1), initializer='random_normal', trainable=True)
        self.b = self.add_weight(name='attention_bias', shape=(input_shape[1], 1), initializer='zeros', trainable=True)
        super(Attention, self).build(input_shape)

    def call(self, x):
        e = tf.tanh(tf.matmul(x, self.W) + self.b)
        a = tf.nn.softmax(e, axis=1)
        output = x * a
        return tf.reduce_sum(output, axis=1)

# Split the data into features (X) and target variable (y)
X = df.drop(['sn', 'Symbol','Close','Open','High','Low'], axis=1).values
y = df['Close'].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=164)

# Reshape the input data
time_steps = 18 # Number of time steps (previous days' data)
features = 1 # Number of features
X_train_reshaped = X_train.reshape(X_train.shape[0], time_steps, features)
X_test_reshaped = X_test.reshape(X_test.shape[0], time_steps, features)

# Create the model
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(time_steps, features)))
model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(Bidirectional(LSTM(units=64, return_sequences=True)))
model.add(Dropout(0.2))  # Dropout layer with a rate of 0.2
model.add(Attention())
model.add(Flatten())
model.add(Dense(units=1, activation='linear')) # Linear activation for regression


# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train_reshaped, y_train, epochs=3300, batch_size=16)

# Evaluate the model
y_pred = model.predict(X_test_reshaped)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)
print("Root Mean Squared Error:", rmse)
print("R-squared (R2):", r2)
