# Importing Libraries

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import zscore

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
import helper
from helper import *

In [None]:
import importlib
import helper
importlib.reload(helper)

# Data Collection

In [None]:
# basic variables

start_date = '2000-01-01'
end_date = '2023-12-31' 
date_range = pd.date_range(start=start_date, end=end_date, freq='D')

In [None]:
nse_tickers = pd.read_csv("C:/Users/satya/Downloads/ind_nifty500list (1).csv")['Symbol']
nse_tickers = nse_tickers.to_list()
for count in range(len(nse_tickers)):
    nse_tickers[count] = nse_tickers[count] + ".NS"

In [None]:
price_data = yf.download(nse_tickers , start_date, end_date)
price_data = price_data['Adj Close']
price_data = price_data.dropna(axis=1, how='all')

In [None]:
nse_stocks_data = clean_data(price_data, date_range)

In [None]:
nse_stocks_data = price_data

# Setup

In [None]:
returns_df = calculate_returns(nse_stocks_data, rebalance='W')

In [None]:
returns_df.tail()

In [None]:
folder_path = 'C:/Users/satya/OneDrive/Desktop/model_outputs/'
filename = 'returns_df_nse_weekly.csv'

# Save the DataFrame to the specified folder
returns_df.to_csv(folder_path + filename, index=False)

In [None]:
momentum_df = calculate_12_1_momentum(nse_stocks_data, resample='W')
momentum_reversion = calculate_m_momentum(nse_stocks_data, 1, resample='W')

In [None]:
momentum_reversion.tail()

In [None]:
seasonality_3 = calculate_return_signals(returns_df, 3, rebalance='W')
seasonality_5 = calculate_return_signals(returns_df, 5, rebalance='W')

In [None]:
seasonality_3

In [None]:
vol_df = calculate_vol(returns_df, 1, resample='W')
vol_df.tail()

In [None]:
seasonality_3 = convert_signals(seasonality_3, 2)
seasonality_5 = convert_signals(seasonality_5, 3)

In [None]:
bin_returns = convert_df_values(returns_df, 0, 0)

In [None]:
seasonality_3 = seasonality_3.rename_axis('Ticker')
seasonality_5 = seasonality_5.rename_axis('Ticker')

In [None]:
# Melt each DataFrame
bin_returns_melted = melt_df(bin_returns, 'bin_Return')
returns_melted = melt_df(returns_df, 'Return')
momentum_reversal_melted = melt_df(momentum_reversion, 'Momentum_Reversal')
momentum_12_1_melted = melt_df(momentum_df, 'Momentum_12_1')
vol_melted = melt_df(vol_df, 'Volatility')
seasonality_3_melted = melt_df(seasonality_3, 'Seasonality_3')
seasonality_5_melted = melt_df(seasonality_5, 'Seasonality_5')

# Merge DataFrames
merged_df = bin_returns_melted
merged_df = merged_df.merge(returns_melted, on=['Stock', 'Date'])
merged_df = merged_df.merge(momentum_reversal_melted, on=['Stock', 'Date'])
merged_df = merged_df.merge(momentum_12_1_melted, on=['Stock', 'Date'])
merged_df = merged_df.merge(vol_melted, on=['Stock', 'Date'])
merged_df = merged_df.merge(seasonality_3_melted, on=['Stock', 'Date'])
merged_df = merged_df.merge(seasonality_5_melted, on=['Stock', 'Date'])

In [None]:
merged_df.head()

In [None]:
# Convert 'Date' column to datetime
merged_df['Date'] = pd.to_datetime(merged_df['Date'])

# Filter rows where the date is greater than 2005-12-31
filtered_df = merged_df[merged_df['Date'] > '2005-12-31']

In [None]:
filtered_df = filtered_df.drop(['Stock'], axis=1).reset_index(drop=True)

In [None]:
filtered_df.head()

In [None]:
filtered_df.shape

# EDA and Data Cleaning

In [None]:
threshold = 2.5

In [None]:
data = filtered_df.fillna(0)

In [None]:
data.head()

Removing the rows with all 0's

In [None]:
data = data[data.bin_Return != 0].reset_index(drop=True)

In [None]:
data.head()

In [None]:
df = data[data.Date > '2005-12-31'].reset_index(drop=True)

In [None]:
df.dtypes

In [None]:
df.head()

In [None]:
df.shape

In [None]:
# Numerical columns

num_cols = df.select_dtypes(include = ['float64']).columns
num_cols

In [None]:
# Categorical columns

cat_cols = df.select_dtypes(include = ['int64']).columns
cat_cols

In [None]:
# Define color mapping for the categories
color_map = {1: 'blue', 0: 'green', -1: 'red'}
colors = [color_map[1], color_map[0], color_map[-1]]

# Initialize the plot
count = 1
fig, axs = plt.subplots(1, len(cat_cols), figsize=(20, 10))  # Increase the overall figure size
plt.subplots_adjust(wspace=0.3, hspace=0.3)

for ax, col in zip(axs, cat_cols):
    # Calculate value counts and percentages
    value_counts = df[col].value_counts()
    percentages = value_counts / value_counts.sum() * 100
    
    # Plot pie chart with consistent colors
    wedges, texts, autotexts = ax.pie(percentages, labels=None, autopct='%1.1f%%', startangle=140, colors=[color_map[key] for key in percentages.index], textprops=dict(color="w", fontsize=14, fontweight='bold'))
    
    ax.set_title(f'Pie chart of {col}', fontsize=16)  # Increase title font size
    
    # Set properties for percentage labels
    for autotext in autotexts:
        autotext.set_fontsize(14)
        autotext.set_fontweight('bold')

# Create a legend on the bottom right
labels = ['1', '0', '-1']
handles = [plt.Line2D([0], [0], marker='o', color='w', markerfacecolor=color_map[int(label)], markersize=10) for label in labels]
fig.legend(handles, labels, title='Categories', loc='lower right', fontsize=14, title_fontsize='16')

plt.suptitle('Pie charts of categorical variables', size=25)
plt.show()

In [None]:
for col in num_cols:
    plt.figure(figsize=(16,8)) # Create a new figure for each plot with a specified size
    plt.subplot(2,2,1)
    sns.distplot(df[col])
    plt.subplot(2,2,2)
    sns.boxplot(data=df, y=col)
    plt.ylabel(col)
    plt.title(f'Box Plot of {col}', fontsize=16)
    plt.show()  


Removing Outlier using Z-score

In [None]:
df1 = df.drop(['Date'], axis=1)
# Define z-score threshold
z_threshold = threshold

# Calculate z-scores for numerical columns
z_scores = df1.apply(zscore)

# Filter rows where any z-score is greater than the threshold
df_filtered = df1[(z_scores.abs() <= z_threshold).all(axis=1)]

In [None]:
df = pd.merge(df_filtered, df, on=['bin_Return', 'Return', 'Momentum_Reversal', 'Momentum_12_1', 'Volatility',
                                   'Seasonality_3', 'Seasonality_5'], how='inner')

In [None]:
df.reset_index(drop=True, inplace=True)

In [None]:
df

In [None]:
# Define color mapping for the categories
color_map = {1: 'blue', 0: 'green', -1: 'red'}
colors = [color_map[1], color_map[0], color_map[-1]]

# Initialize the plot
count = 1
fig, axs = plt.subplots(1, len(cat_cols), figsize=(20, 10))  # Increase the overall figure size
plt.subplots_adjust(wspace=0.3, hspace=0.3)

for ax, col in zip(axs, cat_cols):
    # Calculate value counts and percentages
    value_counts = df[col].value_counts()
    percentages = value_counts / value_counts.sum() * 100
    
    # Plot pie chart with consistent colors
    wedges, texts, autotexts = ax.pie(percentages, labels=None, autopct='%1.1f%%', startangle=140, colors=[color_map[key] for key in percentages.index], textprops=dict(color="w", fontsize=14, fontweight='bold'))
    
    ax.set_title(f'Pie chart of {col}', fontsize=16)  # Increase title font size
    
    # Set properties for percentage labels
    for autotext in autotexts:
        autotext.set_fontsize(14)
        autotext.set_fontweight('bold')

# Create a legend on the bottom right
labels = ['1', '0', '-1']
handles = [plt.Line2D([0], [0], marker='o', color='w', markerfacecolor=color_map[int(label)], markersize=10) for label in labels]
fig.legend(handles, labels, title='Categories', loc='lower right', fontsize=14, title_fontsize='16')

plt.suptitle('Pie charts of categorical variables', size=25)
plt.show()

In [None]:
for col in num_cols:
    plt.figure(figsize=(16,8)) # Create a new figure for each plot with a specified size
    plt.subplot(2,2,1)
    sns.distplot(df[col])
    plt.subplot(2,2,2)
    sns.boxplot(data=df, y=col)
    plt.ylabel(col)
    plt.title(f'Box Plot of {col}', fontsize=16)
    plt.grid(False)
    plt.show()  

In [None]:
num_cols

In [None]:
for col in num_cols[1:]:
    plt.figure(figsize=(8, 6))
    sns.scatterplot(data = df, x= col, y= df_filtered.Return)
    plt.title(f'Scatter Plot of {col} vs Return')

In [None]:
# Calculate correlation matrix
corr_matrix = df_filtered.corr()

# Plot heatmap
plt.figure(figsize=(8, 6))  # Adjust the figure size as needed
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', vmin=-1, vmax=1)
plt.title('Correlation Heatmap')
plt.show()

In [None]:
#
# Merge the DataFrames based on index
# merged_df = df_filtered.merge(filtered_df[['Date']], left_index=True, right_index=True, how='left', suffixes=('', '_filtered'))


# Model Training

In [None]:
split_date = '2018-12-31'

# Split into training and testing data based on date
train_df = df[df['Date'] <= split_date].reset_index(drop=True)
test_df = df[df['Date'] > split_date].reset_index(drop=True)

In [None]:
train_df

In [None]:
test_df

In [None]:
# Features and target for training data
X_train = train_df.drop(columns=['Return', 'Date', 'bin_Return'])
y_train = train_df['bin_Return']
y_train_regn = train_df['Return']

# Features and target for testing data
X_test = test_df.drop(columns=['Return', 'Date', 'bin_Return'])
y_test = test_df['bin_Return']
y_test_regn = test_df['Return']


## Model Fitting

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import StackingClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, r2_score
from sklearn import tree
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam

### Classification Models

Fitting a decision tree

In [None]:
# Initialize the Decision Tree model
dt = DecisionTreeClassifier(max_depth=50, random_state=100)

# Fit the model on the training data
dt.fit(X_train, y_train)

In [None]:
# Predict on the testing data
y_pred_dt = dt.predict(X_test)

# Evaluate the classifier
accuracy = accuracy_score(y_test, y_pred_dt)
print("Accuracy:", accuracy)
print("\nClassification Report:")
print(classification_report(y_test, y_pred_dt))

Grid Search

In [None]:
# # Define the parameter grid to search through
# param_grid = {
#     'max_depth': [None, 10, 20, 50],
#     'min_samples_split': [2, 5, 10],
#     'min_samples_leaf': [1, 2, 4],
#     'max_features': ['auto', 'sqrt', 'log2']
# }

# # Initialize GridSearchCV to find the best parameters
# grid_search = GridSearchCV(estimator=dt, param_grid=param_grid, cv=5, scoring='accuracy')

# # Fit GridSearchCV on the training data0
# grid_search.fit(X_train, y_train)

# # Get the best parameters and the best score
# best_params = grid_search.best_params_
# best_score = grid_search.best_score_

In [None]:
# print("Best Parameters:", best_params)
# print("Best Accuracy:", best_score)

In [None]:
best = DecisionTreeClassifier(max_depth = 10, max_features = 'sqrt', min_samples_split = 2, random_state=75, max_leaf_nodes=4)
best.fit(X_train, y_train)

In [None]:
y_pred_gs = best.predict(X_test)
k = best.predict(X_train)

# Evaluate the classifier
accuracy = accuracy_score(y_test, y_pred_gs)
a = accuracy_score(y_train, k)
print("\nAccuracy:", accuracy, 'train', a)
print("\nClassification Report:")
print(classification_report(y_test, y_pred_gs))

Random Forest

In [None]:
for n in [20]:
    # Initialize the Random Forest classifier
    rf_classifier = RandomForestClassifier(n_estimators=600, random_state=90, max_depth=10, max_features = 'log2', max_leaf_nodes=8)

    # Fit the Random Forest classifier on the training data
    rf_classifier.fit(X_train, y_train)

    # Predict on the testing data
    y_pred_rf = rf_classifier.predict(X_test)
    k_pred = rf_classifier.predict(X_train)

    # Evaluate the classifier
    accuracy = accuracy_score(y_test, y_pred_rf)
    a = accuracy_score(y_train, k_pred)
    # print(f'n_estimators:{n}')
    print("Accuracy:", accuracy, a)
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred_rf))

In [None]:
# Feature importance
feature_names = X_train.columns
feature_importance = rf_classifier.feature_importances_
sorted_indices = np.argsort(feature_importance)[::-1]
for i,index in enumerate(sorted_indices):
    print(f"{i+1}. {feature_names[index]}: {feature_importance[index]}")

logistic regression

In [None]:
# Initialize the Logistic Regression model
log_reg = LogisticRegression(max_iter=10, random_state=75)

# Fit the model on the training data
log_reg.fit(X_train, y_train)

# Predict on the testing data
y_pred_log = log_reg.predict(X_test)
k_pred = log_reg.predict(X_train)

# Evaluate the classifier
accuracy = accuracy_score(y_test, y_pred_log)
a = accuracy_score(y_train, k_pred)
print("Accuracy:", accuracy, 'train:', a)
print("\nClassification Report:")
print(classification_report(y_test, y_pred_log))

BOOSTING

In [None]:
# Define the base classifiers
base_classifiers = [
    ('decision_tree', DecisionTreeClassifier( random_state=75)),
    ('log_reg', LogisticRegression(max_iter=50, random_state=75)),
    ('random_forest', RandomForestClassifier(n_estimators=500, random_state=75))
]

# Define the meta-learner
meta_learner = LogisticRegression()

# Create the StackingClassifier
stacking_clf = StackingClassifier(estimators=base_classifiers, final_estimator=meta_learner, cv=5)

# Fit the StackingClassifier on the training data
stacking_clf.fit(X_train, y_train)

# Predict on the testing data
y_pred_ada = stacking_clf.predict(X_test)
# Predict on the testing data
k_train = stacking_clf.predict(X_train)
# Evaluate the classifier
accuracy = accuracy_score(y_test, y_pred_ada)
a = accuracy_score(y_train, k_train)
print("Accuracy:", accuracy, a)
print("\nClassification Report:")
print(classification_report(y_test, y_pred_ada))

NN

In [None]:
from sklearn.neural_network import MLPClassifier
# from sklearn.model_selection import GridSearchCV

# # Define the parameter grid for classification
# param_grid = {
#     'hidden_layer_sizes': [(128, 64), (32, 16), (64, 32)],
#     'activation': ['relu', 'tanh', 'logistic'],
#     'learning_rate_init': [0.001, 0.01, 0.1],
#     'max_iter': [200, 500],
# }

# # Initialize the MLP Classifier model
# mlp = MLPClassifier(random_state=75)

# # Initialize the GridSearchCV
# grid_search = GridSearchCV(estimator=mlp, param_grid=param_grid, cv=3, n_jobs=-1, scoring='accuracy', verbose=2)

# # Fit the grid search on the training data
# grid_search.fit(X_train, y_train)

# # Print the best parameters
# print(f"Best Parameters: {grid_search.best_params_}")


In [None]:
# Get the best model from grid search
best_mlp_clf = MLPClassifier(activation='logistic', hidden_layer_sizes=(50, 10), learning_rate_init=0.01, max_iter=200, random_state=75)

# Fit the best model on the training data
best_mlp_clf.fit(X_train, y_train)

# Predict on the testing data using the best model
y_pred_nn_clf = best_mlp_clf.predict(X_test)
y_pred_train = best_mlp_clf.predict(X_train)

# Evaluate the best model
accuracy_test = accuracy_score(y_test, y_pred_nn_clf)
accuracy_train = accuracy_score(y_train, y_pred_train)

classification_report_test = classification_report(y_test, y_pred_nn_clf)
classification_report_train = classification_report(y_train, y_pred_train)

# Print accuracy up to 2 decimal places
print(f"Train Accuracy: {accuracy_train:.2f}, Test Accuracy: {accuracy_test:.2f}")

# Print classification reports
print("\nTrain Classification Report:\n", classification_report_train)
print("\nTest Classification Report:\n", classification_report_test)

### Cont models

Normalizing the variables.

In [None]:
def normalize_series(series):
    mean = series.mean()
    std = series.std()
    normalized_series = (series - mean) / std
    return normalized_series

def normalize_series_using_train_stats(train_series, test_series):
    mean = train_series.mean()
    std = train_series.std()
    normalized_test_series = (test_series - mean) / std
    return normalized_test_series

In [None]:
def normalize_columns(df, columns):
    for col in columns:
        mean = df[col].mean()
        std = df[col].std()
        df[col] = (df[col] - mean) / std
    return df

def normalize_columns_using_train_stats(train_df, test_df, columns):
    for col in columns:
        mean = train_df[col].mean()
        std = train_df[col].std()
        test_df[col] = (test_df[col] - mean) / std
    return test_df

In [None]:
mean_train_x, mean_sd_x = [], []
for col in num_cols[1:]:
    mean = df[col].mean()
    std = df[col].std()

mean_train_y = y_train_regn.mean()
sd_train_y = y_train_regn.std()

In [None]:
X_test_normalized = normalize_columns_using_train_stats(X_train, X_test, num_cols[1:])
X_train_normalized = normalize_columns(X_train, num_cols[1:])

In [None]:
min(X_train_normalized.Momentum_Reversal)

In [None]:
y_test_regn = normalize_series_using_train_stats(y_train_regn, y_test_regn)
y_train_regn = normalize_series(y_train_regn)

In [None]:
min(y_train_regn)

Regression

In [None]:
# Initialize the Linear Regression model
regression_model = LinearRegression()

# Fit the model on the training data
regression_model.fit(X_train_normalized, y_train_regn)

In [None]:
# Predict on the testing data
y_cont_pred = regression_model.predict(X_test_normalized)
k_pred = regression_model.predict(X_train_normalized)

# Evaluate the regressor on training data
train_mse = mean_squared_error(y_train_regn, k_pred)
train_r2 = r2_score(y_train_regn, k_pred)
train_adjusted_r2 = 1 - (1 - train_r2) * (len(y_train_regn) - 1) / (len(y_train_regn) - X_train_normalized.shape[1] - 1)


# Evaluate the regressor on testing data
test_mse = mean_squared_error(y_test_regn, y_cont_pred)
test_r2 = r2_score(y_test_regn, y_cont_pred)
test_adjusted_r2 = 1 - (1 - test_r2) * (len(y_test_regn) - 1) / (len(y_test_regn) - X_test_normalized.shape[1] - 1)

print(f"Training R2: {train_r2}")
print(f"Testing R2: {test_r2}")
print(f"Training Adjusted R2: {train_adjusted_r2}")
print(f"Testing Adjusted R2: {test_adjusted_r2}")
y_cont_pred = (y_cont_pred*sd_train_y) + mean_train_y
train_y_regn = (k_pred*sd_train_y) + mean_train_y

In [None]:
max(k_pred)

In [None]:
import matplotlib.pyplot as plt

# Assuming y_test and y_pred are your actual and predicted values, respectively

# Create a scatter plot
plt.figure(figsize=(10, 6))
plt.scatter(y_test_regn, y_cont_pred, alpha=0.5)
plt.title('Scatterplot of Predicted vs Actual Values')
plt.xlabel('Actual Values (y_train)')
plt.ylabel('Predicted Values (y_pred)')
# plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='red')  # Line y=x for reference
plt.show()


Random Forest Regressor

In [None]:
from sklearn.ensemble import RandomForestRegressor

for n in [10]:
    # print(n)
    # Initialize the Random Forest classifier
    rf_regressor = RandomForestRegressor(n_estimators=10, random_state=20, max_depth=5, max_features = 'log2')

    # Fit the Random Forest regressor on the training data
    rf_regressor.fit(X_train_normalized, y_train_regn)

    # Predict on the testing data
    y_pred_rf_regn = rf_regressor.predict(X_test_normalized)
    k_pred = rf_regressor.predict(X_train_normalized)

    # Evaluate the regressor on training data
    train_mse = mean_squared_error(y_train_regn, k_pred)
    train_r2 = r2_score(y_train_regn, k_pred)
    train_adjusted_r2 = 1 - (1 - train_r2) * (len(y_train_regn) - 1) / (len(y_train_regn) - X_train_normalized.shape[1] - 1)

    print(f"Training MSE: {train_mse}")
    print(f"Training R2: {train_r2}")
    print(f"Training Adjusted R2: {train_adjusted_r2}")

    # Evaluate the regressor on testing data
    test_mse = mean_squared_error(y_test_regn, y_pred_rf_regn)
    test_r2 = r2_score(y_test_regn, y_pred_rf_regn)
    test_adjusted_r2 = 1 - (1 - test_r2) * (len(y_test_regn) - 1) / (len(y_test_regn) - X_test_normalized.shape[1] - 1)

    print(f"Testing MSE: {test_mse}")
    print(f"Testing R2: {test_r2}")
    print(f"Testing Adjusted R2: {test_adjusted_r2}")
    y_pred_rf_regn = (y_pred_rf_regn*sd_train_y) + mean_train_y
    train_y_rf = (k_pred*sd_train_y) + mean_train_y

CART

In [None]:
# # Define the parameter grid for GridSearchCV
# param_grid = {
#     'max_depth': [5, 10, 30, 80],
#     'min_samples_split': [2, 8, 15, 20],
#     'min_samples_leaf': [5, 10, 20],
#     'criterion':['squared_error' ,'poisson'],
#     'splitter':['best', 'random'],
#     'max_features':['sqrt', 'log2']
# }

# # Initialize the Decision Tree regressor
# dt_regressor = DecisionTreeRegressor(random_state=20)

# # Initialize GridSearchCV
# grid_search = GridSearchCV(estimator=dt_regressor, param_grid=param_grid, cv=5, n_jobs=-1, scoring='neg_mean_squared_error')

# # Fit GridSearchCV on the training data
# grid_search.fit(X_train_normalized, y_train_regn)

# print(f"Best Parameters: {grid_search.best_params_}")

In [None]:
best_cart = DecisionTreeRegressor(criterion='squared_error', max_depth=10, min_samples_leaf=5, min_samples_split=2, splitter='random', max_features='sqrt')
best_cart.fit(X_train_normalized, y_train_regn)

# Predict on the testing data
y_pred_dt_regn = best_cart.predict(X_test_normalized)
k_pred = best_cart.predict(X_train_normalized)

# Evaluate the regressor on training data
train_mse = mean_squared_error(y_train_regn, k_pred)
train_r2 = r2_score(y_train_regn, k_pred)
train_adjusted_r2 = 1 - (1 - train_r2) * (len(y_train_regn) - 1) / (len(y_train_regn) - X_train_normalized.shape[1] - 1)

print(f"Training MSE: {train_mse}")
print(f"Training R2: {train_r2}")
print(f"Training Adjusted R2: {train_adjusted_r2}")

# Evaluate the regressor on testing data
test_mse = mean_squared_error(y_test_regn, y_pred_dt_regn)
test_r2 = r2_score(y_test_regn, y_pred_dt_regn)
test_adjusted_r2 = 1 - (1 - test_r2) * (len(y_test_regn) - 1) / (len(y_test_regn) - X_test_normalized.shape[1] - 1)

print(f"Testing MSE: {test_mse}")
print(f"Testing R2: {test_r2}")
print(f"Testing Adjusted R2: {test_adjusted_r2}")
y_pred_dt_regn = (y_pred_dt_regn*sd_train_y) + mean_train_y
train_y_cart = (k_pred*sd_train_y) + mean_train_y

Lasso Regression

In [None]:
# Initialize the Lasso Regression model
lasso_model = Lasso(alpha=0.1)  # Adjust alpha for regularization strength

# Fit the model on the training data
lasso_model.fit(X_train_normalized, y_train_regn)

# Predict on the testing data
y_lasso_pred = lasso_model.predict(X_test_normalized)
k_pred = lasso_model.predict(X_train_normalized)

# Evaluate the regressor on training data
train_mse = mean_squared_error(y_train_regn, k_pred)
train_r2 = r2_score(y_train_regn, k_pred)
train_adjusted_r2 = 1 - (1 - train_r2) * (len(y_train_regn) - 1) / (len(y_train_regn) - X_train_normalized.shape[1] - 1)

print(f"Training MSE: {train_mse}")
print(f"Training R2: {train_r2}")
print(f"Training Adjusted R2: {train_adjusted_r2}")

# Evaluate the regressor on testing data
test_mse = mean_squared_error(y_test_regn, y_lasso_pred)
test_r2 = r2_score(y_test_regn, y_lasso_pred)
test_adjusted_r2 = 1 - (1 - test_r2) * (len(y_test_regn) - 1) / (len(y_test_regn) - X_test_normalized.shape[1] - 1)

print(f"Testing MSE: {test_mse}")
print(f"Testing R2: {test_r2}")
print(f"Testing Adjusted R2: {test_adjusted_r2}")
y_lasso_pred = (y_lasso_pred*sd_train_y) + mean_train_y
train_y_lasso = (k_pred*sd_train_y) + mean_train_y

Ridge regression

In [None]:
# Initialize the Ridge Regression model
ridge_model = Ridge(alpha=1.0)  # You can adjust the alpha parameter for regularization strength

# Fit the model on the training data
ridge_model.fit(X_train_normalized, y_train_regn)

# Predict on the testing data
y_ridge_pred = ridge_model.predict(X_test_normalized)
k_pred = ridge_model.predict(X_train_normalized)

# Evaluate the regressor on training data
train_mse = mean_squared_error(y_train_regn, k_pred)
train_r2 = r2_score(y_train_regn, k_pred)
train_adjusted_r2 = 1 - (1 - train_r2) * (len(y_train_regn) - 1) / (len(y_train_regn) - X_train_normalized.shape[1] - 1)

print(f"Training MSE: {train_mse}")
print(f"Training R2: {train_r2}")
print(f"Training Adjusted R2: {train_adjusted_r2}")

# Evaluate the regressor on testing data
test_mse = mean_squared_error(y_test_regn, y_ridge_pred)
test_r2 = r2_score(y_test_regn, y_ridge_pred)
test_adjusted_r2 = 1 - (1 - test_r2) * (len(y_test_regn) - 1) / (len(y_test_regn) - X_test_normalized.shape[1] - 1)

print(f"Testing MSE: {test_mse}")
print(f"Testing R2: {test_r2}")
print(f"Testing Adjusted R2: {test_adjusted_r2}")
y_ridge_pred = (y_ridge_pred*sd_train_y) + mean_train_y
train_y_ridge = (k_pred*sd_train_y) + mean_train_y

Neural Network

In [None]:
# # Define the neural network model
# def create_nn_model(input_dim):
#     model = Sequential()
#     model.add(Dense(128, activation='relu', input_dim=input_dim))
#     model.add(Dropout(0.5))
#     model.add(Dense(64, activation='relu'))
#     model.add(Dropout(0.5))
#     model.add(Dense(1, activation='linear'))
    
#     model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
#     return model

# # Create and train the nn model
# nn_model = create_nn_model(X_train_scaled.shape[1])
# nn_model.fit(X_train_scaled, y_train_regn, epochs=50, batch_size=32, validation_split=0.2, verbose=1)

# # Predict on the training data
# y_train_pred_nn = nn_model.predict(X_train_scaled)

# # Predict on the testing data
# y_test_pred_nn = nn_model.predict(X_test_scaled)


# # Calculate MSE for the training data
# train_mse_nn = mean_squared_error(y_train_regn, y_train_pred_nn)
# # Calculate R² for the training data
# train_r2_nn = r2_score(y_train_regn, y_train_pred_nn)
# # Calculate adjusted R² for the training data
# train_adjusted_r2_nn = 1 - (1 - train_r2_nn) * (len(y_train_regn) - 1) / (len(y_train_regn) - X_train_scaled.shape[1] - 1)

# # Calculate MSE for the testing data
# test_mse_nn = mean_squared_error(y_test_regn, y_test_pred_nn)
# # Calculate R² for the testing data
# test_r2_nn = r2_score(y_test_regn, y_test_pred_nn)
# # Calculate adjusted R² for the testing data
# test_adjusted_r2_nn = 1 - (1 - test_r2_nn) * (len(y_test_regn) - 1) / (len(y_test_regn) - X_test_scaled.shape[1] - 1)

# # Print results
# print(f"Training MSE: {train_mse_nn}")
# print(f"Training R²: {train_r2_nn}")
# print(f"Training Adjusted R²: {train_adjusted_r2_nn}")

# print(f"Testing MSE: {test_mse_nn}")
# print(f"Testing R²: {test_r2_nn}")
# print(f"Testing Adjusted R²: {test_adjusted_r2_nn}")


In [None]:
# from sklearn.neural_network import MLPRegressor


# def adjusted_r2(r2, n, k):
#     return 1 - ((1 - r2) * (n - 1) / (n - k - 1))

# # Define the parameter grid
# param_grid = {
#     'hidden_layer_sizes': [(128, 64)],# (100, 50), (64, 32)
#     'activation': ['relu', 'tanh', 'logistic'],
#     'solver': ['adam'], #, 'sgd', 'lbfgs'
#     'learning_rate_init': [0.1],#0.001, 0.01, 0.1
#     'max_iter': [500],#200, 500, 1000
# }

# # Initialize the MLP Regressor model
# mlp = MLPRegressor(random_state=75)

# # Initialize the GridSearchCV
# grid_search = GridSearchCV(estimator=mlp, param_grid=param_grid, n_jobs=-1, scoring='neg_mean_squared_error', verbose=2)

# # Fit the grid search on the training data
# grid_search.fit(X_train_normalized, y_train_regn)

# # Print the best parameters
# print(f"Best Parameters: {grid_search.best_params_}")

In [None]:
def adjusted_r2(r2, n, k):
    return 1 - ((1 - r2) * (n - 1) / (n - k - 1))

# Get the best model from grid search
best_mlp = mlp = MLPRegressor(hidden_layer_sizes=(64, 32), activation='logistic', solver='adam', learning_rate_init=0.1, max_iter=500, random_state=75)


# Fit the best model on the training data
best_mlp.fit(X_train_normalized, y_train_regn)

# Predict on the testing data using the best model
y_pred_nn = best_mlp.predict(X_test_normalized)
y_pred_nn_train = best_mlp.predict(X_train_normalized)

# Evaluate the best model
mse_test = mean_squared_error(y_test_regn, y_pred_nn)
mse_train = mean_squared_error(y_train_regn, y_pred_nn_train)

r2_test = r2_score(y_test_regn, y_pred_nn)
r2_train = r2_score(y_train, y_pred_nn_train)

adj_r2_test = adjusted_r2(r2_test, len(y_test_regn), X_test.shape[1])
adj_r2_train = adjusted_r2(r2_train, len(y_train), X_train.shape[1])

print(f"Train MSE: {mse_train}, Test MSE: {mse_test}")
print(f"Train Adjusted R^2: {adj_r2_train}, Test Adjusted R^2: {adj_r2_test}")
y_pred_nn = (y_pred_nn*sd_train_y) + mean_train_y
train_y_nn = (y_pred_nn_train*sd_train_y) + mean_train_y

# Buy Sell on Train

In [None]:
training_df =  pd.merge(train_df, merged_df[['Date', 'Stock', 'Return']], 
                      on=['Date', 'Return'], how='inner')

In [None]:
training_df

### Classification Models

Decision Tree

In [None]:
y = best.predict(X_train)
# Initialize buy and sell dictionaries
buy_dict_dt_train = {}
sell_dict_dt_train = {}

# Classify stocks into buy and sell based on predictions
for idx, pred in enumerate(y):
    stock = training_df.iloc[idx]['Stock']
    date = training_df.iloc[idx]['Date']
    
    if pred == 1:
        if date in buy_dict_dt_train:
            buy_dict_dt_train[date].append(stock)
        else:
            buy_dict_dt_train[date] = [stock]
    else:
        if date in sell_dict_dt_train:
            sell_dict_dt_train[date].append(stock)
        else:
            sell_dict_dt_train[date] = [stock]

buy_sell_dt_train = portfolio_return(returns_df, buy_dict_dt_train, sell_dict_dt_train, 50)

Random Forest

In [None]:
y = rf_classifier.predict(X_train)
# Initialize buy and sell dictionaries
buy_dict_rf_train = {}
sell_dict_rf_train = {}

# Classify stocks into buy and sell based on predictions
for idx, pred in enumerate(y):
    stock = training_df.iloc[idx]['Stock']
    date = training_df.iloc[idx]['Date']
    
    
    if pred == 1:
        if date in buy_dict_rf_train:
            buy_dict_rf_train[date].append(stock)
        else:
            buy_dict_rf_train[date] = [stock]
    else:
        if date in sell_dict_rf_train:
            sell_dict_rf_train[date].append(stock)
        else:
            sell_dict_rf_train[date] = [stock]

buy_sell_rf_train = portfolio_return(returns_df, buy_dict_rf_train, sell_dict_rf_train, 50)

AdaBoost

In [None]:
y = stacking_clf.predict(X_train)
# Initialize buy and sell dictionaries
buy_dict_ada_train = {}
sell_dict_ada_train = {}

# Classify stocks into buy and sell based on predictions
for idx, pred in enumerate(y):
    stock = training_df.iloc[idx]['Stock']
    date = training_df.iloc[idx]['Date']
    
    if pred == 1:
        if date in buy_dict_ada_train:
            buy_dict_ada_train[date].append(stock)
        else:
            buy_dict_ada_train[date] = [stock]
    else:
        if date in sell_dict_ada_train:
            sell_dict_ada_train[date].append(stock)
        else:
            sell_dict_ada_train[date] = [stock]

buy_sell_ada_train = portfolio_return(returns_df, buy_dict_ada_train, sell_dict_ada_train, 50)

Logistic

In [None]:
y = log_reg.predict(X_train)
# Initialize buy and sell dictionaries
buy_dict_log_train = {}
sell_dict_log_train = {}

# Classify stocks into buy and sell based on predictions
for idx, pred in enumerate(y):
    stock = training_df.iloc[idx]['Stock']
    date = training_df.iloc[idx]['Date']
    
    if pred == 1:
        if date in buy_dict_log_train:
            buy_dict_log_train[date].append(stock)
        else:
            buy_dict_log_train[date] = [stock]
    else:
        if date in sell_dict_log_train:
            sell_dict_log_train[date].append(stock)
        else:
            sell_dict_log_train[date] = [stock]

buy_sell_log_train = portfolio_return(returns_df, buy_dict_log_train, sell_dict_log_train, 50)

NN

In [None]:
# Initialize buy and sell dictionaries
buy_dict_nn_clf_train = {}
sell_dict_nn_clf_train = {}

# Classify stocks into buy and sell based on redictions
for idx, pred in enumerate(y_pred_train):
    stock = training_df.iloc[idx]['Stock']
    date = training_df.iloc[idx]['Date']
    
    if pred == 1:
        if date in buy_dict_nn_clf_train:
            buy_dict_nn_clf_train[date].append(stock)
        else:
            buy_dict_nn_clf_train[date] = [stock]
    else:
        if date in sell_dict_nn_clf_train:
            sell_dict_nn_clf_train[date].append(stock)
        else:
            sell_dict_nn_clf_train[date] = [stock]

buy_sell_nn_clf_train = portfolio_return(returns_df, buy_dict_nn_clf_train, sell_dict_nn_clf_train, 50)

### Cont Models

Regression

In [None]:
# y = regression_model.predict(X_train_normalized)
# Initialize buy and sell dictionaries
buy_dict_regn_train = {}
sell_dict_regn_train = {}

# Classify stocks into buy and sell based on predictions
for idx, pred in enumerate(train_y_regn):
    stock = training_df.iloc[idx]['Stock']
    date = training_df.iloc[idx]['Date']
    
    if pred >0:
        if date in buy_dict_regn_train:
            buy_dict_regn_train[date].append(stock)
        else:
            buy_dict_regn_train[date] = [stock]
    else:
        if date in sell_dict_regn_train:
            sell_dict_regn_train[date].append(stock)
        else:
            sell_dict_regn_train[date] = [stock]

buy_sell_regn_train = portfolio_return(returns_df, buy_dict_regn_train, sell_dict_regn_train, 50)

Rf Regression

In [None]:
# Initialize buy and sell dictionaries
buy_dict_regn_rf_train = {}
sell_dict_regn_rf_train = {}

# Classify stocks into buy and sell based on predictions
for idx, pred in enumerate(train_y_rf):
    stock = training_df.iloc[idx]['Stock']
    date = training_df.iloc[idx]['Date']
    
    if pred >0:
        if date in buy_dict_regn_rf_train:
            buy_dict_regn_rf_train[date].append(stock)
        else:
            buy_dict_regn_rf_train[date] = [stock]
    else:
        if date in sell_dict_regn_rf_train:
            sell_dict_regn_rf_train[date].append(stock)
        else:
            sell_dict_regn_rf_train[date] = [stock]

buy_sell_regn_rf_train = portfolio_return(returns_df, buy_dict_regn_rf_train, sell_dict_regn_rf_train, 50)

CART

In [None]:
# Initialize buy and sell dictionaries
buy_dict_cart_train = {}
sell_dict_cart_train = {}

# Classify stocks into buy and sell based on predictions
for idx, pred in enumerate(train_y_cart):
    stock = training_df.iloc[idx]['Stock']
    date = training_df.iloc[idx]['Date']
    
    if pred >0:
        if date in buy_dict_cart_train:
            buy_dict_cart_train[date].append(stock)
        else:
            buy_dict_cart_train[date] = [stock]
    else:
        if date in sell_dict_cart_train:
            sell_dict_cart_train[date].append(stock)
        else:
            sell_dict_cart_train[date] = [stock]

buy_sell_cart_train = portfolio_return(returns_df, buy_dict_cart_train, sell_dict_cart_train, 50)

Ridge

In [None]:
# Initialize buy and sell dictionaries
buy_dict_ridge_train = {}
sell_dict_ridge_train = {}

# Classify stocks into buy and sell based on predictions
for idx, pred in enumerate(train_y_ridge):
    stock = training_df.iloc[idx]['Stock']
    date = training_df.iloc[idx]['Date']

    if pred > 0:
        if date in buy_dict_ridge_train:
            buy_dict_ridge_train[date].append(stock)
        else:
            buy_dict_ridge_train[date] = [stock]
    elif pred < 0:
        if date in sell_dict_ridge_train:
            sell_dict_ridge_train[date].append(stock)
        else:
            sell_dict_ridge_train[date] = [stock]

buy_sell_ridge_train = portfolio_return(returns_df, buy_dict_ridge_train, sell_dict_ridge_train, 50)

Lasso

In [None]:
# Initialize buy and sell dictionaries
buy_dict_lasso_train = {}
sell_dict_lasso_train = {}

# Classify stocks into buy and sell based on predictions
for idx, pred in enumerate(train_y_lasso):
    stock = training_df.iloc[idx]['Stock']
    date = training_df.iloc[idx]['Date']

    if pred > 0:
        if date in buy_dict_lasso_train:
            buy_dict_lasso_train[date].append(stock)
        else:
            buy_dict_lasso_train[date] = [stock]
    elif pred < 0:
        if date in sell_dict_lasso_train:
            sell_dict_lasso_train[date].append(stock)
        else:
            sell_dict_lasso_train[date] = [stock]

buy_sell_lasso_train = portfolio_return(returns_df, buy_dict_lasso_train, sell_dict_lasso_train, 50)

NN

In [None]:
# Initialize buy and sell dictionaries
buy_dict_nn_train = {}
sell_dict_nn_train = {}

# Classify stocks into buy and sell based on predictions
for idx, pred in enumerate(train_y_nn):
    stock = training_df.iloc[idx]['Stock']
    date = training_df.iloc[idx]['Date']

    if pred > 0:
        if date in buy_dict_nn_train:
            buy_dict_nn_train[date].append(stock)
        else:
            buy_dict_nn_train[date] = [stock]
    elif pred < 0:
        if date in sell_dict_nn_train:
            sell_dict_nn_train[date].append(stock)
        else:
            sell_dict_nn_train[date] = [stock]

buy_sell_nn_train = portfolio_return(returns_df, buy_dict_nn_train, sell_dict_nn_train, 50)

# Buy-Sell based on test

In [None]:
# merged_df['Date'] = pd.to_datetime(merged_df['Date'])
merged_df[['Date', 'Stock', 'Return']]

In [None]:
cols = test_df.columns
list(cols)

In [None]:
testing_df = pd.merge(test_df, merged_df[['Date', 'Stock', 'Return']], 
                      on=['Date', 'Return'], how='inner')

In [None]:
testing_df.drop(['bin_Return', 'Return'], axis=1, inplace=True)
testing_df.reset_index(drop=True, inplace=True)
testing_df.head()

In [None]:
training_df =  pd.merge(train_df, merged_df[['Date', 'Stock', 'Return']], 
                      on=['Date', 'Return'], how='inner')

In [None]:
training_df

In [None]:
training_df.drop(['bin_Return', 'Return'], axis=1, inplace=True)

### Classification Models

In [None]:
import json

Decision Tree based on Grid Search

In [None]:
probabilities = best.predict_proba(X_test)
#  Initialize buy and sell dictionaries
buy_dict_dt = {}
sell_dict_dt = {}

# Classify stocks into buy and sell based on predictions
for idx, pred in enumerate(y_pred_gs):
    stock = testing_df.iloc[idx]['Stock']
    date = testing_df.iloc[idx]['Date'].strftime('%Y-%m-%d')  # Keep datetime format as string
    prob = probabilities[idx][1]  # Assuming class 1 is the positive class

    if pred == 1:
        if date in buy_dict_dt:
            buy_dict_dt[date].append((stock, prob))
        else:
            buy_dict_dt[date] = [(stock, prob)]
    else:
        if date in sell_dict_dt:
            sell_dict_dt[date].append((stock, prob))
        else:
            sell_dict_dt[date] = [(stock, prob)]

# Sort the buy and sell dictionaries by probability
for date in buy_dict_dt:
    buy_dict_dt[date] = sorted(buy_dict_dt[date], key=lambda x: x[1], reverse=True)

for date in sell_dict_dt:
    sell_dict_dt[date] = sorted(sell_dict_dt[date], key=lambda x: x[1], reverse=True)

# Remove probabilities, keeping only the stocks
buy_dict_dt = {date: [stock for stock, _ in stocks] for date, stocks in buy_dict_dt.items()}
sell_dict_dt = {date: [stock for stock, _ in stocks] for date, stocks in sell_dict_dt.items()}

# Save the dictionaries to a local JSON file
output_data = {
    'buy_dict': buy_dict_dt,
    'sell_dict': sell_dict_dt
}

with open('C:/Users/satya/OneDrive/Desktop/model_outputs/nse_weekly_decisiontree.json', 'w') as f:
    json.dump(output_data, f, indent=4)

print("Dictionaries saved to buy_sell_dict.json")

In [None]:
# Initialize buy and sell dictionaries
buy_dict_dt = {}
sell_dict_dt = {}

# Classify stocks into buy and sell based on predictions
for idx, pred in enumerate(y_pred_gs):
    stock = testing_df.iloc[idx]['Stock']
    date = testing_df.iloc[idx]['Date']
    
    if pred == 1:
        if date in buy_dict_dt:
            buy_dict_dt[date].append(stock)
        else:
            buy_dict_dt[date] = [stock]
    else:
        if date in sell_dict_dt:
            sell_dict_dt[date].append(stock)
        else:
            sell_dict_dt[date] = [stock]

buy_sell_dt = portfolio_return(returns_df, buy_dict_dt, sell_dict_dt, 50)

Random Forest

In [None]:
probabilities = rf_classifier.predict_proba(X_test)
#  Initialize buy and sell dictionaries
buy_dict_rf = {}
sell_dict_rf = {}

# Classify stocks into buy and sell based on predictions
for idx, pred in enumerate(y_pred_rf):
    stock = testing_df.iloc[idx]['Stock']
    date = testing_df.iloc[idx]['Date'].strftime('%Y-%m-%d')  # Keep datetime format as string
    prob = probabilities[idx][1]  # Assuming class 1 is the positive class

    if pred == 1:
        if date in buy_dict_rf:
            buy_dict_rf[date].append((stock, prob))
        else:
            buy_dict_rf[date] = [(stock, prob)]
    else:
        if date in sell_dict_rf:
            sell_dict_rf[date].append((stock, prob))
        else:
            sell_dict_rf[date] = [(stock, prob)]

# Sort the buy and sell dictionaries by probability
for date in buy_dict_rf:
    buy_dict_rf[date] = sorted(buy_dict_rf[date], key=lambda x: x[1], reverse=True)

for date in sell_dict_rf:
    sell_dict_rf[date] = sorted(sell_dict_rf[date], key=lambda x: x[1], reverse=True)

# Remove probabilities, keeping only the stocks
buy_dict_rf = {date: [stock for stock, _ in stocks] for date, stocks in buy_dict_rf.items()}
sell_dict_rf = {date: [stock for stock, _ in stocks] for date, stocks in sell_dict_rf.items()}

# Save the dictionaries to a local JSON file
output_data = {
    'buy_dict': buy_dict_rf,
    'sell_dict': sell_dict_rf
}

with open('C:/Users/satya/OneDrive/Desktop/model_outputs/nse_weekly_randomforest.json', 'w') as f:
    json.dump(output_data, f, indent=4)

print("Dictionaries saved to buy_sell_dict.json")

In [None]:
# Initialize buy and sell dictionaries
buy_dict_rf = {}
sell_dict_rf = {}

# Classify stocks into buy and sell based on predictions
for idx, pred in enumerate(y_pred_rf):
    stock = testing_df.iloc[idx]['Stock']
    date = testing_df.iloc[idx]['Date']
    
    if pred == 1:
        if date in buy_dict_rf:
            buy_dict_rf[date].append(stock)
        else:
            buy_dict_rf[date] = [stock]
    else:
        if date in sell_dict_rf:
            sell_dict_rf[date].append(stock)
        else:
            sell_dict_rf[date] = [stock]

buy_sell_rf = portfolio_return(returns_df, buy_dict_rf, sell_dict_rf, 50)


Adaboost

In [None]:
# Initialize buy and sell dictionaries
buy_dict_ada = {}
sell_dict_ada = {}

# Classify stocks into buy and sell based on predictions
for idx, pred in enumerate(y_pred_ada):
    stock = testing_df.iloc[idx]['Stock']
    date = testing_df.iloc[idx]['Date']
    
    if pred == 1:
        if date in buy_dict_ada:
            buy_dict_ada[date].append(stock)
        else:
            buy_dict_ada[date] = [stock]
    else:
        if date in sell_dict_ada:
            sell_dict_ada[date].append(stock)
        else:
            sell_dict_ada[date] = [stock]

buy_sell_ada = portfolio_return(returns_df, buy_dict_ada, sell_dict_ada, 50)


In [None]:
probabilities = stacking_clf.predict_proba(X_test)
# Initialize buy and sell dictionaries
buy_dict_ada = {}
sell_dict_ada = {}

# Classify stocks into buy and sell based on predictions
for idx, pred in enumerate(y_pred_ada):
    stock = testing_df.iloc[idx]['Stock']
    date = testing_df.iloc[idx]['Date'].strftime('%Y-%m-%d')  # Keep datetime format as string
    prob = probabilities[idx][1]  # Assuming class 1 is the positive class

    if pred == 1:
        if date in buy_dict_ada:
            buy_dict_ada[date].append((stock, prob))
        else:
            buy_dict_ada[date] = [(stock, prob)]
    else:
        if date in sell_dict_ada:
            sell_dict_ada[date].append((stock, prob))
        else:
            sell_dict_ada[date] = [(stock, prob)]

# Sort the buy and sell dictionaries by probability
for date in buy_dict_ada:
    buy_dict_ada[date] = sorted(buy_dict_ada[date], key=lambda x: x[1], reverse=True)

for date in sell_dict_ada:
    sell_dict_ada[date] = sorted(sell_dict_ada[date], key=lambda x: x[1], reverse=True)

# Remove probabilities, keeping only the stocks
buy_dict_ada = {date: [stock for stock, _ in stocks] for date, stocks in buy_dict_ada.items()}
sell_dict_ada = {date: [stock for stock, _ in stocks] for date, stocks in sell_dict_ada.items()}

# Save the dictionaries to a local JSON file
output_data = {
    'buy_dict': buy_dict_ada,
    'sell_dict': sell_dict_ada
}

with open('C:/Users/satya/OneDrive/Desktop/model_outputs/nse_weekly_adaboost.json', 'w') as f:
    json.dump(output_data, f, indent=4)

print("Dictionaries saved to buy_sell_dict.json")

Logistic

In [None]:
# Initialize buy and sell dictionaries
buy_dict_log = {}
sell_dict_log = {}

# Classify stocks into buy and sell based on predictions
for idx, pred in enumerate(y_pred_log):
    stock = testing_df.iloc[idx]['Stock']
    date = testing_df.iloc[idx]['Date']
    
    if pred == 1:
        if date in buy_dict_log:
            buy_dict_log[date].append(stock)
        else:
            buy_dict_log[date] = [stock]
    else:
        if date in sell_dict_log:
            sell_dict_log[date].append(stock)
        else:
            sell_dict_log[date] = [stock]

buy_sell_log = portfolio_return(returns_df, buy_dict_log, sell_dict_log, 50)

In [None]:
probabilities = log_reg.predict_proba(X_test)
#  Initialize buy and sell dictionaries
buy_dict_log = {}
sell_dict_log = {}

# Classify stocks into buy and sell based on predictions
for idx, pred in enumerate(y_pred_log):
    stock = testing_df.iloc[idx]['Stock']
    date = testing_df.iloc[idx]['Date'].strftime('%Y-%m-%d')  # Keep datetime format as string
    prob = probabilities[idx][1]  # Assuming class 1 is the positive class

    if pred == 1:
        if date in buy_dict_log:
            buy_dict_log[date].append((stock, prob))
        else:
            buy_dict_log[date] = [(stock, prob)]
    else:
        if date in sell_dict_log:
            sell_dict_log[date].append((stock, prob))
        else:
            sell_dict_log[date] = [(stock, prob)]

# Sort the buy and sell dictionaries by probability
for date in buy_dict_log:
    buy_dict_log[date] = sorted(buy_dict_log[date], key=lambda x: x[1], reverse=True)

for date in sell_dict_log:
    sell_dict_log[date] = sorted(sell_dict_log[date], key=lambda x: x[1], reverse=True)

# Remove probabilities, keeping only the stocks
buy_dict_log = {date: [stock for stock, _ in stocks] for date, stocks in buy_dict_log.items()}
sell_dict_log = {date: [stock for stock, _ in stocks] for date, stocks in sell_dict_log.items()}

# Save the dictionaries to a local JSON file
output_data = {
    'buy_dict': buy_dict_log,
    'sell_dict': sell_dict_log
}

with open('C:/Users/satya/OneDrive/Desktop/model_outputs/nse_weekly_logistic.json', 'w') as f:
    json.dump(output_data, f, indent=4)

print("Dictionaries saved to buy_sell_dict.json")

NN

In [None]:
# Initialize buy and sell dictionaries
buy_dict_nn_clf = {}
sell_dict_nn_clf = {}

# Classify stocks into buy and sell based on predictions
for idx, pred in enumerate(y_pred_nn_clf):
    stock = testing_df.iloc[idx]['Stock']
    date = testing_df.iloc[idx]['Date']
    
    if pred == 1:
        if date in buy_dict_nn_clf:
            buy_dict_nn_clf[date].append(stock)
        else:
            buy_dict_nn_clf[date] = [stock]
    else:
        if date in sell_dict_nn_clf:
            sell_dict_nn_clf[date].append(stock)
        else:
            sell_dict_nn_clf[date] = [stock]

buy_sell_nn_clf = portfolio_return(returns_df, buy_dict_nn_clf, sell_dict_nn_clf, 50)

In [None]:
probabilities = best_mlp_clf.predict_proba(X_test)
#  Initialize buy and sell dictionaries
buy_dict_nn_clf = {}
sell_dict_nn_clf = {}

# Classify stocks into buy and sell based on predictions
for idx, pred in enumerate(y_pred_nn_clf):
    stock = testing_df.iloc[idx]['Stock']
    date = testing_df.iloc[idx]['Date'].strftime('%Y-%m-%d')  # Keep datetime format as string
    prob = probabilities[idx][1]  # Assuming class 1 is the positive class

    if pred == 1:
        if date in buy_dict_nn_clf:
            buy_dict_nn_clf[date].append((stock, prob))
        else:
            buy_dict_nn_clf[date] = [(stock, prob)]
    else:
        if date in sell_dict_nn_clf:
            sell_dict_nn_clf[date].append((stock, prob))
        else:
            sell_dict_nn_clf[date] = [(stock, prob)]


# Sort the buy and sell dictionaries by probability
for date in buy_dict_nn_clf:
    buy_dict_nn_clf[date] = sorted(buy_dict_nn_clf[date], key=lambda x: x[1], reverse=True)

for date in sell_dict_nn_clf:
    sell_dict_nn_clf[date] = sorted(sell_dict_nn_clf[date], key=lambda x: x[1], reverse=True)

# Remove probabilities, keeping only the stocks
buy_dict_nn_clf = {date: [stock for stock, _ in stocks] for date, stocks in buy_dict_nn_clf.items()}
sell_dict_nn_clf = {date: [stock for stock, _ in stocks] for date, stocks in sell_dict_nn_clf.items()}

# Save the dictionaries to a local JSON file
output_data = {
    'buy_dict': buy_dict_nn_clf,
    'sell_dict': sell_dict_nn_clf
}

with open('C:/Users/satya/OneDrive/Desktop/model_outputs/nse_weekly_neuralnetwork_classifier.json', 'w') as f:
    json.dump(output_data, f, indent=4)

print("Dictionaries saved to buy_sell_dict.json")

### Cont models

Regression 

In [None]:
import json

# Initialize buy and sell dictionaries
buy_dict_regn = {}
sell_dict_regn = {}

# Classify stocks into buy and sell based on predictions
for idx, pred in enumerate(y_cont_pred):
    stock = testing_df.iloc[idx]['Stock']
    date = testing_df.iloc[idx]['Date'].strftime('%Y-%m-%d')  # Keep datetime format as string
    
    if pred > 0:
        if date in buy_dict_regn:
            buy_dict_regn[date].append((stock, pred))
        else:
            buy_dict_regn[date] = [(stock, pred)]
    elif pred < 0:
        if date in sell_dict_regn:
            sell_dict_regn[date].append((stock, pred))
        else:
            sell_dict_regn[date] = [(stock, pred)]

# Sort the buy and sell dictionaries by predicted value
for date in buy_dict_regn:
    buy_dict_regn[date] = sorted(buy_dict_regn[date], key=lambda x: x[1], reverse=True)

for date in sell_dict_regn:
    sell_dict_regn[date] = sorted(sell_dict_regn[date], key=lambda x: x[1])

# Remove predicted values, keeping only the stocks
buy_dict_regn = {date: [stock for stock, _ in stocks] for date, stocks in buy_dict_regn.items()}
sell_dict_regn = {date: [stock for stock, _ in stocks] for date, stocks in sell_dict_regn.items()}

# Save the dictionaries to a local JSON file
output_data_regn = {
    'buy_dict_regn': buy_dict_regn,
    'sell_dict_regn': sell_dict_regn
}

with open('C:/Users/satya/OneDrive/Desktop/model_outputs/nse_weekly_regression.json', 'w') as f:
    json.dump(output_data_regn, f, indent=4)

print("Dictionaries saved to buy_sell_dict_regn.json")

buy_sell_regn = portfolio_return(returns_df, buy_dict_regn, sell_dict_regn, 50)

RF Regression

In [None]:
import json

# Initialize buy and sell dictionaries
buy_dict_regn_rf = {}
sell_dict_regn_rf = {}

# Classify stocks into buy and sell based on predictions
for idx, pred in enumerate(y_pred_rf_regn):
    stock = testing_df.iloc[idx]['Stock']
    date = testing_df.iloc[idx]['Date']
    
    if pred > 0:
        if date in buy_dict_regn_rf:
            buy_dict_regn_rf[date].append((stock, pred))
        else:
            buy_dict_regn_rf[date] = [(stock, pred)]
    elif pred < 0:
        if date in sell_dict_regn_rf:
            sell_dict_regn_rf[date].append((stock, pred))
        else:
            sell_dict_regn_rf[date] = [(stock, pred)]

# Sort the buy and sell dictionaries by predicted value
for date in buy_dict_regn_rf:
    buy_dict_regn_rf[date] = sorted(buy_dict_regn_rf[date], key=lambda x: x[1], reverse=True)

for date in sell_dict_regn_rf:
    sell_dict_regn_rf[date] = sorted(sell_dict_regn_rf[date], key=lambda x: x[1])

# Remove predicted values, keeping only the stocks
buy_dict_regn_rf = {date: [stock for stock, _ in stocks] for date, stocks in buy_dict_regn_rf.items()}
sell_dict_regn_rf = {date: [stock for stock, _ in stocks] for date, stocks in sell_dict_regn_rf.items()}

# Convert the keys to strings for JSON serialization
buy_dict_regn_rf_str = {date.strftime('%Y-%m-%d'): stocks for date, stocks in buy_dict_regn_rf.items()}
sell_dict_regn_rf_str = {date.strftime('%Y-%m-%d'): stocks for date, stocks in sell_dict_regn_rf.items()}

# Save the dictionaries to a local JSON file
output_data_regn_rf = {
    'buy_dict_regn_rf': buy_dict_regn_rf_str,
    'sell_dict_regn_rf': sell_dict_regn_rf_str
}

with open('C:/Users/satya/OneDrive/Desktop/model_outputs/nse_weekly_randomforestregression.json', 'w') as f:
    json.dump(output_data_regn_rf, f, indent=4)

print("Dictionaries saved to nse_monthly_rfregn.json")

buy_sell_regn_rf = portfolio_return(returns_df, buy_dict_regn_rf, sell_dict_regn_rf, 50)

CART

In [None]:
import json

# Initialize buy and sell dictionaries
buy_dict_regn_cart = {}
sell_dict_regn_cart = {}

# Classify stocks into buy and sell based on predictions
for idx, pred in enumerate(y_pred_dt_regn):
    stock = testing_df.iloc[idx]['Stock']
    date = testing_df.iloc[idx]['Date']
    
    if pred > 0:
        if date in buy_dict_regn_cart:
            buy_dict_regn_cart[date].append((stock, pred))
        else:
            buy_dict_regn_cart[date] = [(stock, pred)]
    elif pred < 0:
        if date in sell_dict_regn_cart:
            sell_dict_regn_cart[date].append((stock, pred))
        else:
            sell_dict_regn_cart[date] = [(stock, pred)]

# Sort the buy and sell dictionaries by predicted value
for date in buy_dict_regn_cart:
    buy_dict_regn_cart[date] = sorted(buy_dict_regn_cart[date], key=lambda x: x[1], reverse=True)

for date in sell_dict_regn_cart:
    sell_dict_regn_cart[date] = sorted(sell_dict_regn_cart[date], key=lambda x: x[1])

# Remove predicted values, keeping only the stocks
buy_dict_regn_cart = {date: [stock for stock, _ in stocks] for date, stocks in buy_dict_regn_cart.items()}
sell_dict_regn_cart = {date: [stock for stock, _ in stocks] for date, stocks in sell_dict_regn_cart.items()}

# Convert the keys to strings for JSON serialization
buy_dict_regn_cart_str = {date.strftime('%Y-%m-%d'): stocks for date, stocks in buy_dict_regn_cart.items()}
sell_dict_regn_cart_str = {date.strftime('%Y-%m-%d'): stocks for date, stocks in sell_dict_regn_cart.items()}

# Save the dictionaries to a local JSON file
output_data_regn_cart = {
    'buy_dict_regn_cart': buy_dict_regn_cart_str,
    'sell_dict_regn_cart': sell_dict_regn_cart_str
}

with open('C:/Users/satya/OneDrive/Desktop/model_outputs/nse_weekly_cart.json', 'w') as f:
    json.dump(output_data_regn_cart, f, indent=4)

print("Dictionaries saved to nse_weekly_regn_cart.json")

buy_sell_regn_cart = portfolio_return(returns_df, buy_dict_regn_cart, sell_dict_regn_cart, 50)

Lasso

In [None]:
import json

# Initialize buy and sell dictionaries
buy_dict_lasso = {}
sell_dict_lasso = {}

# Classify stocks into buy and sell based on predictions
for idx, pred in enumerate(y_lasso_pred):
    stock = testing_df.iloc[idx]['Stock']
    date = testing_df.iloc[idx]['Date']
    
    if pred > 0:
        if date in buy_dict_lasso:
            buy_dict_lasso[date].append((stock, pred))
        else:
            buy_dict_lasso[date] = [(stock, pred)]
    elif pred < 0:
        if date in sell_dict_lasso:
            sell_dict_lasso[date].append((stock, pred))
        else:
            sell_dict_lasso[date] = [(stock, pred)]

# Sort the buy and sell dictionaries by predicted value
for date in buy_dict_lasso:
    buy_dict_lasso[date] = sorted(buy_dict_lasso[date], key=lambda x: x[1], reverse=True)

for date in sell_dict_lasso:
    sell_dict_lasso[date] = sorted(sell_dict_lasso[date], key=lambda x: x[1])

# Remove predicted values, keeping only the stocks
buy_dict_lasso = {date: [stock for stock, _ in stocks] for date, stocks in buy_dict_lasso.items()}
sell_dict_lasso = {date: [stock for stock, _ in stocks] for date, stocks in sell_dict_lasso.items()}

# Convert the keys to strings for JSON serialization
buy_dict_lasso_str = {date.strftime('%Y-%m-%d'): stocks for date, stocks in buy_dict_lasso.items()}
sell_dict_lasso_str = {date.strftime('%Y-%m-%d'): stocks for date, stocks in sell_dict_lasso.items()}

# Save the dictionaries to a local JSON file
output_data_lasso = {
    'buy_dict_lasso': buy_dict_lasso_str,
    'sell_dict_lasso': sell_dict_lasso_str
}

with open('C:/Users/satya/OneDrive/Desktop/model_outputs/nse_weekly_lassoregression.json', 'w') as f:
    json.dump(output_data_lasso, f, indent=4)

print("Dictionaries saved to nse_monthly_lasso.json")


buy_sell_lasso = portfolio_return(returns_df, buy_dict_lasso, sell_dict_lasso,50)

RIDGE

In [None]:
import json

# Initialize buy and sell dictionaries
buy_dict_regn_ridge = {}
sell_dict_regn_ridge = {}

# Classify stocks into buy and sell based on predictions
for idx, pred in enumerate(y_ridge_pred):
    stock = testing_df.iloc[idx]['Stock']
    date = testing_df.iloc[idx]['Date']
    
    if pred > 0:
        if date in buy_dict_regn_ridge:
            buy_dict_regn_ridge[date].append((stock, pred))
        else:
            buy_dict_regn_ridge[date] = [(stock, pred)]
    elif pred < 0:
        if date in sell_dict_regn_ridge:
            sell_dict_regn_ridge[date].append((stock, pred))
        else:
            sell_dict_regn_ridge[date] = [(stock, pred)]

# Sort the buy and sell dictionaries by predicted value
for date in buy_dict_regn_ridge:
    buy_dict_regn_ridge[date] = sorted(buy_dict_regn_ridge[date], key=lambda x: x[1], reverse=True)

for date in sell_dict_regn_ridge:
    sell_dict_regn_ridge[date] = sorted(sell_dict_regn_ridge[date], key=lambda x: x[1])

# Remove predicted values, keeping only the stocks
buy_dict_regn_ridge = {date: [stock for stock, _ in stocks] for date, stocks in buy_dict_regn_ridge.items()}
sell_dict_regn_ridge = {date: [stock for stock, _ in stocks] for date, stocks in sell_dict_regn_ridge.items()}

# Convert the keys to strings for JSON serialization
buy_dict_regn_ridge_str = {date.strftime('%Y-%m-%d'): stocks for date, stocks in buy_dict_regn_ridge.items()}
sell_dict_regn_ridge_str = {date.strftime('%Y-%m-%d'): stocks for date, stocks in sell_dict_regn_ridge.items()}

# Save the dictionaries to a local JSON file
output_data_regn_ridge = {
    'buy_dict_regn_ridge': buy_dict_regn_ridge_str,
    'sell_dict_regn_ridge': sell_dict_regn_ridge_str
}

with open('C:/Users/satya/OneDrive/Desktop/model_outputs/nse_weekly_ridgeregression.json', 'w') as f:
    json.dump(output_data_regn_ridge, f, indent=4)

print("Dictionaries saved to nse_monthly_regn_ridge.json")

# Calculate the portfolio return
buy_sell_ridge = portfolio_return(returns_df, buy_dict_regn_ridge, sell_dict_regn_ridge, 50)


NN

In [None]:
import json

# Initialize buy and sell dictionaries
buy_dict_regn_nn = {}
sell_dict_regn_nn = {}

# Classify stocks into buy and sell based on predictions
for idx, pred in enumerate(y_pred_nn):
    stock = testing_df.iloc[idx]['Stock']
    date = testing_df.iloc[idx]['Date']
    
    if pred > 0:
        if date in buy_dict_regn_nn:
            buy_dict_regn_nn[date].append((stock, pred))
        else:
            buy_dict_regn_nn[date] = [(stock, pred)]
    elif pred < 0:
        if date in sell_dict_regn_nn:
            sell_dict_regn_nn[date].append((stock, pred))
        else:
            sell_dict_regn_nn[date] = [(stock, pred)]

# Sort the buy and sell dictionaries by predicted value
for date in buy_dict_regn_nn:
    buy_dict_regn_nn[date] = sorted(buy_dict_regn_nn[date], key=lambda x: x[1], reverse=True)

for date in sell_dict_regn_nn:
    sell_dict_regn_nn[date] = sorted(sell_dict_regn_nn[date], key=lambda x: x[1])

# Remove predicted values, keeping only the stocks
buy_dict_regn_nn = {date: [stock for stock, _ in stocks] for date, stocks in buy_dict_regn_nn.items()}
sell_dict_regn_nn = {date: [stock for stock, _ in stocks] for date, stocks in sell_dict_regn_nn.items()}

# Convert the keys to strings for JSON serialization
buy_dict_regn_nn_str = {date.strftime('%Y-%m-%d'): stocks for date, stocks in buy_dict_regn_nn.items()}
sell_dict_regn_nn_str = {date.strftime('%Y-%m-%d'): stocks for date, stocks in sell_dict_regn_nn.items()}

# Save the dictionaries to a local JSON file
output_data_regn_nn = {
    'buy_dict_regn_nn': buy_dict_regn_nn_str,
    'sell_dict_regn_nn': sell_dict_regn_nn_str
}

with open('C:/Users/satya/OneDrive/Desktop/model_outputs/nse_weekly_regn_nn.json', 'w') as f:
    json.dump(output_data_regn_nn, f, indent=4)

print("Dictionaries saved to nse_weekly_neuralnetwork.json")

# Calculate the portfolio return
buy_sell_nn = portfolio_return(returns_df, buy_dict_regn_nn, sell_dict_regn_nn, 50)


# Plots

### Plots on training

In [None]:
dt_df_train = pd.DataFrame.from_dict(dict(sorted(buy_sell_dt_train.items())),orient='index', columns=['Decision Tree'])
rf_df_train = pd.DataFrame.from_dict(dict(sorted(buy_sell_rf_train.items())),orient='index', columns=['RandomForest'])
ada_df_train = pd.DataFrame.from_dict(dict(sorted(buy_sell_ada_train.items())),orient='index', columns=['AdaBoost'])
regn_df_train = pd.DataFrame.from_dict(dict(sorted(buy_sell_regn_train.items())),orient='index', columns=['Regn'])
log_df_train = pd.DataFrame.from_dict(dict(sorted(buy_sell_log_train.items())),orient='index', columns=['Logistic Regn'])
nn_clf_train = pd.DataFrame.from_dict(dict(sorted(buy_sell_nn_clf_train.items())),orient='index', columns=['NN_Clf'])
ridge_df_train = pd.DataFrame.from_dict(dict(sorted(buy_sell_ridge_train.items())),orient='index', columns=['Ridge'])
lasso_df_train = pd.DataFrame.from_dict(dict(sorted(buy_sell_lasso_train.items())),orient='index', columns=['Lasso'])
rf_regn_train = pd.DataFrame.from_dict(dict(sorted(buy_sell_regn_rf_train.items())),orient='index', columns=['RFRegn'])
cart_train = pd.DataFrame.from_dict(dict(sorted(buy_sell_cart_train.items())),orient='index', columns=['CART'])
nn_train = pd.DataFrame.from_dict(dict(sorted(buy_sell_nn_train.items())),orient='index', columns=['NN'])
svr_train = pd.DataFrame.from_dict(dict(sorted(buy_sell_svr_train.items())),orient='index', columns=['SVR'])


In [None]:
plot_df_train = pd.concat([dt_df_train, rf_df_train, log_df_train, nn_clf_train, ada_df_train,  
                        regn_df_train, rf_regn_train, cart_train, ridge_df_train, 
                        lasso_df_train, nn_train], axis=1)
# svr_train  

In [None]:
plot_df_train.tail()

In [None]:
import plotly.graph_objs as go
import plotly.express as px

def calculate_cumulative_returns(df):
    cumulative_returns_df = (1 + df / 100).cumprod()
    return cumulative_returns_df

def plot_strategy_performance(strategy_df):
    # Ensure the index is a datetime index
    strategy_df.index = pd.to_datetime(strategy_df.index)
    
    # Calculate cumulative returns
    cumulative_returns_df = calculate_cumulative_returns(strategy_df)
    
    # Create the plot
    fig = go.Figure()

    # Add traces for each strategy
    for strategy in cumulative_returns_df.columns:
        fig.add_trace(go.Scatter(
            x=cumulative_returns_df.index,
            y=cumulative_returns_df[strategy],
            mode='lines',
            name=strategy
        ))
    
    # Add titles and labels
    fig.update_layout(
        title='Cumulative Returns of Strategies',
        xaxis_title='Date',
        yaxis_title='Cumulative Return',
        legend_title='Strategies'
    )

    # Show the plot
    fig.show()

plot_strategy_performance(plot_df_train)


### Plots on testing

In [None]:
buy_sell_regn = {pd.to_datetime((date)): stocks for date, stocks in buy_sell_regn.items()}

In [None]:
dt_df = pd.DataFrame.from_dict(dict(sorted(buy_sell_dt.items())),orient='index', columns=['Decision Tree'])
rf_df = pd.DataFrame.from_dict(dict(sorted(buy_sell_rf.items())),orient='index', columns=['RandomForest'])
ada_df = pd.DataFrame.from_dict(dict(sorted(buy_sell_ada.items())),orient='index', columns=['AdaBoost'])
regn_df = pd.DataFrame.from_dict(dict(sorted(buy_sell_regn.items())),orient='index', columns=['Regn'])
log_df = pd.DataFrame.from_dict(dict(sorted(buy_sell_log.items())),orient='index', columns=['Logistic Regn'])
nn_clf = pd.DataFrame.from_dict(dict(sorted(buy_sell_nn_clf.items())),orient='index', columns=['NN_Clf'])
ridge_df = pd.DataFrame.from_dict(dict(sorted(buy_sell_ridge.items())),orient='index', columns=['Ridge'])
lasso_df = pd.DataFrame.from_dict(dict(sorted(buy_sell_lasso.items())),orient='index', columns=['Lasso'])
rf_regn = pd.DataFrame.from_dict(dict(sorted(buy_sell_regn_rf.items())),orient='index', columns=['RFRegn'])
cart = pd.DataFrame.from_dict(dict(sorted(buy_sell_regn_cart.items())),orient='index', columns=['Cart'])
nn_df = pd.DataFrame.from_dict(dict(sorted(buy_sell_nn.items())),orient='index', columns=['NN'])


In [None]:
plot_df = pd.concat([dt_df, rf_df,  log_df, nn_clf, ada_df, 
                    regn_df, ridge_df,lasso_df,  rf_regn, cart, nn_df], axis=1)
# svr_df  

In [None]:
import plotly.graph_objs as go
import plotly.express as px

def calculate_cumulative_returns(df):
    cumulative_returns_df = (1 + df / 100).cumprod()
    return cumulative_returns_df

def plot_all_strategy_performance(strategy_df):
    # Ensure the index is a datetime index
    strategy_df.index = pd.to_datetime(strategy_df.index)
    
    # Calculate cumulative returns
    cumulative_returns_df = calculate_cumulative_returns(strategy_df)
    
    # Create the plot
    fig = go.Figure()

    # Add traces for each strategy
    for strategy in cumulative_returns_df.columns:
        fig.add_trace(go.Scatter(
            x=cumulative_returns_df.index,
            y=cumulative_returns_df[strategy],
            mode='lines',
            name=strategy
        ))
    
    # Add titles and labels
    fig.update_layout(
        title='Cumulative Returns of Strategies',
        xaxis_title='Date',
        yaxis_title='Cumulative Return',
        legend_title='Strategies'
    )

    # Show the plot
    fig.show()

plot_all_strategy_performance(plot_df)


# Benchmark

In [None]:
nse = yf.download('^CRSLDX', '2005-01-01')
nse = nse['Adj Close']

In [None]:
weekly_nse = nse.resample('W').last()

In [None]:
nse_profit = pd.DataFrame(weekly_nse.pct_change()*100)

In [None]:
nse_test = nse_profit[(nse_profit.index > '2019') ]
nse_test = nse_test[(nse_test.index < '2024') ]
nse_train = nse_profit[nse_profit.index < '2019']

In [None]:
nse_train.tail()

In [None]:
calculate_overall_cagrs(nse_train, 'W')

In [None]:
calculate_overall_sharpe_ratios(nse_train, 'W')

In [None]:
calculate_overall_cagrs(nse_test, 'W')

In [None]:
calculate_overall_sharpe_ratios(nse_test, 'W')

# Vol Adjusted graph

### Training period

In [None]:
x_train = pd.concat([plot_df_train, nse_train], axis=1)

In [None]:
x_train = x_train[:-2]
# Calculate the volatility (standard deviation) of each strategy and the benchmark
volatilities = x_train.std()

# Ex_traintract the volatility of the benchmark
benchmark_volatility = volatilities['Adj Close']

# Calculate the scaling factor for each strategy to match the benchmark volatility
scaling_factors = benchmark_volatility / volatilities

# Apply the scaling factor to each strategy
adjusted_x_train = x_train.mul(scaling_factors, axis=1)

# Calculate the cumulative returns
cumulative_returns = (1 + adjusted_x_train/100).cumprod()


# Generate individual plots for each strategy
for column in cumulative_returns.columns:
    if column != 'Adj Close':
        # Create Plotly traces
        fig = go.Figure()

        # Add the benchmark trace
        fig.add_trace(go.Scatter(x=cumulative_returns.index, y=cumulative_returns['Adj Close'], mode='lines', name='Benchmark'))

        # Add the adjusted strategy trace
        fig.add_trace(go.Scatter(x=cumulative_returns.index, y=cumulative_returns[column], mode='lines', name=f'{column} (Adjusted)'))

        # Update layout
        fig.update_layout(
            title=f'Benchmark vs {column} (Adjusted)',
            xaxis_title='Date',
            yaxis_title='Cumulative Return',
            legend_title='Legend',
            template='plotly_dark'
        )

        # Show the figure
        fig.show()

vol_adj_plot(x_train[:-2])

In [None]:
x_train = x_train[:-2]
# Calculate the volatility (standard deviation) of each strategy and the benchmark
volatilities = x_train.std()

# Ex_traintract the volatility of the benchmark
benchmark_volatility = volatilities['Adj Close']

# Calculate the scaling factor for each strategy to match the benchmark volatility
scaling_factors = benchmark_volatility / volatilities

# Apply the scaling factor to each strategy
adjusted_x_train = x_train.mul(scaling_factors, axis=1)

In [None]:
import plotly.graph_objs as go
import plotly.express as px

def calculate_cumulative_returns(df):
    cumulative_returns_df = (1 + df / 100).cumprod()
    return cumulative_returns_df

def plot_strategy_performance(strategy_df):
    # Ensure the index is a datetime index
    strategy_df.index = pd.to_datetime(strategy_df.index)
    
    # Calculate cumulative returns
    cumulative_returns_df = calculate_cumulative_returns(strategy_df)
    
    # Create the plot
    fig = go.Figure()

    # Add traces for each strategy
    for strategy in cumulative_returns_df.columns:
        fig.add_trace(go.Scatter(
            x=cumulative_returns_df.index,
            y=cumulative_returns_df[strategy],
            mode='lines',
            name=strategy
        ))
    
    # Add titles and labels
    fig.update_layout(
        title='Cumulative Returns of Strategies',
        xaxis_title='Date',
        yaxis_title='Cumulative Return',
        legend_title='Strategies'
    )

    # Show the plot
    fig.show()

plot_strategy_performance(adjusted_x_train)

In [None]:
cagr_adj = calculate_overall_cagrs(adjusted_x_train, 'W')
cagr_adj

In [None]:
sharpe_ratio_adj = calculate_overall_sharpe_ratios(adjusted_x_train, 'W')
sharpe_ratio_adj

### Testing period

In [None]:
x_test = pd.concat([plot_df, nse_test], axis=1)

In [None]:
x_test = x_test[:-2]
# Calculate the volatility (standard deviation) of each strategy and the benchmark
volatilities = x_test.std()

# Ex_testtract the volatility of the benchmark
benchmark_volatility = volatilities['Adj Close']

# Calculate the scaling factor for each strategy to match the benchmark volatility
scaling_factors = benchmark_volatility / volatilities

# Apply the scaling factor to each strategy
adjusted_x_test = x_test.mul(scaling_factors, axis=1)

# Calculate the cumulative returns
cumulative_returns = (1 + adjusted_x_test/100).cumprod()


# Generate individual plots for each strategy
for column in cumulative_returns.columns:
    if column != 'Adj Close':
        # Create Plotly traces
        fig = go.Figure()

        # Add the benchmark trace
        fig.add_trace(go.Scatter(x=cumulative_returns.index, y=cumulative_returns['Adj Close'], mode='lines', name='Benchmark'))

        # Add the adjusted strategy trace
        fig.add_trace(go.Scatter(x=cumulative_returns.index, y=cumulative_returns[column], mode='lines', name=f'{column} (Adjusted)'))

        # Update layout
        fig.update_layout(
            title=f'Benchmark vs {column} (Adjusted)',
            xaxis_title='Date',
            yaxis_title='Cumulative Return',
            legend_title='Legend',
            template='plotly_dark'
        )

        # Show the figure
        fig.show()

In [None]:
cagr_adj = calculate_overall_cagrs(adjusted_x_test, 'W')
cagr_adj

In [None]:
sharpe_ratio_adj = calculate_overall_sharpe_ratios(adjusted_x_test, 'W')
type(sharpe_ratio_adj)

In [None]:
# Removing the 'Adj Close' entry from the series and sorting by value
sr_adj_filtered = sharpe_ratio_adj.drop(['Adj Close', 'Decision Tree']).sort_values()

# Creating the horizontal bar chart
plt.figure(figsize=(10, 7))
plt.barh(sr_adj_filtered.index, sr_adj_filtered.values, color='skyblue')

# Adding the values at the end of the bars
for index, value in enumerate(sr_adj_filtered.values):
    plt.text(value, index, f'{value:.2f}', va='center')

# Adding the dotted line for 'Adj Close' value
plt.axvline(x=sharpe_ratio_adj['Adj Close'], color='red', linestyle='--')

# Removing the top and right spines (axes)
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)

# Turning off gridlines
plt.grid(False)

# Labels and Title
plt.xlabel('Sharpe Ratio')
plt.ylabel('Models')
plt.title('Sharpe Ratio of Different Models')

# Displaying the plot
plt.show()


In [None]:
cumulative_returns_df = calculate_cumulative_returns(adjusted_x_test)

In [None]:
cumulative_returns_df_train = calculate_cumulative_returns(adjusted_x_train)

In [None]:
import matplotlib.pyplot as plt

columns = ['RandomForest',  'Logistic Regn', 'NN_Clf']

# Create a figure and axis
fig, ax = plt.subplots(figsize=(12, 8))

# Plot each column against 'Adj Close'
ax.plot(cumulative_returns_df.index, cumulative_returns_df['Adj Close'], label='Benchmark', color='blue')

for column in columns:
    ax.plot(cumulative_returns_df.index, cumulative_returns_df[column], label=column)

# Set title and labels
ax.set_title('Classification Models')
ax.set_xlabel('Date')
ax.set_ylabel('Value')
ax.legend()

# Adjust layout
plt.tight_layout()
plt.grid(False)
# Display the plot
plt.show()


In [None]:
import matplotlib.pyplot as plt

cols = ['NN', 'CART', 'RFRegn', 'Ridge', 'Regn']
# Create a figure and axis
fig, ax = plt.subplots(figsize=(12, 8))

# Plot each column against 'Adj Close'
ax.plot(cumulative_returns_df_train.index, cumulative_returns_df_train['Adj Close'], label='Benchmark', color='blue')

for column in cols:
    ax.plot(cumulative_returns_df_train.index, cumulative_returns_df_train[column], label=column)

# Set title and labels
ax.set_title('Regression Models')
ax.set_xlabel('Date')
ax.set_ylabel('Value')
ax.legend()

# Adjust layout
plt.tight_layout()
plt.grid(False)
# Display the plot
plt.show()


In [None]:
return_corr = plot_df.corr()
# Plot heatmap
plt.figure(figsize=(8, 6))  # Adjust the figure size as needed
sns.heatmap(return_corr, annot=True, cmap='coolwarm', vmin=-0.5, vmax=1)
plt.title('Correlation Heatmap')
plt.show()