In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# module imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import itertools
import random
import os
# import mplfinance as mpf

In [None]:
df = pd.read_csv("/content/drive/MyDrive/combined_data_indice.csv")

In [None]:
df.isna().sum()

Date                  0
Index_Code            0
Index_Name            0
Daily_Index        7124
Previous_Close     7124
Daily_Variation    7124
Highest_Index      7124
Lowest_Index       7124
Opening_Index      7124
dtype: int64

In [None]:
df['price_change'] = df['Daily_Index'] - df['Opening_Index']


In [None]:
df['returns'] = df['Daily_Index'].pct_change()


In [None]:
df['average_price'] = (df['Daily_Index'] + df['Opening_Index']) / 2


In [None]:
df['price_range'] = df['Highest_Index'] - df['Lowest_Index']


In [None]:
df['volume_change'] = df['Daily_Variation'].diff()


In [None]:


df['price_volume_correlation'] = df['Daily_Index'].corr(df['Daily_Variation'])


In [None]:
df['returns_volume_correlation'] = df['returns'].corr(df['Daily_Variation'])


In [None]:

df['price_volume_covariance'] = df['Daily_Index'].cov(df['Daily_Variation'])


In [None]:
df['returns_volume_covariance'] = df['returns'].cov(df['Daily_Variation'])


In [None]:
df['moving_average_5'] = df['Daily_Index'].rolling(window=5).mean()


In [None]:
df['moving_average_10'] = df['Daily_Index'].rolling(window=10).mean()


In [None]:
df['moving_average_20'] = df['Daily_Index'].rolling(window=20).mean()


In [None]:
df['exponential_moving_average_5'] = df['Daily_Index'].ewm(span=5).mean()


In [None]:
df['exponential_moving_average_20'] = df['Daily_Index'].ewm(span=20).mean()


In [None]:
df['exponential_moving_average_12'] = df['Daily_Index'].ewm(span=12).mean()
df['exponential_moving_average_26'] = df['Daily_Index'].ewm(span=26).mean()
df['macd'] = df['exponential_moving_average_12'] - df['exponential_moving_average_26']


In [None]:
df['macd_signal'] = df['macd'].rolling(window=9).mean()


In [None]:
df['macd_histogram'] = df['macd'] - df['macd_signal']


In [None]:
gain = df['Daily_Index'].diff()
gain[gain < 0] = 0
loss = - df['Daily_Index'].diff()
loss[loss < 0] = 0
avg_gain = gain.rolling(window=14).mean()
avg_loss = loss.rolling(window=14).mean()
rs = avg_gain / avg_loss
rsi = 100 - (100 / (1 + rs))
df['rsi'] = rsi


In [None]:
df.head(2)

Unnamed: 0,Date,Index_Code,Index_Name,Daily_Index,Previous_Close,Daily_Variation,Highest_Index,Lowest_Index,Opening_Index,price_change,...,moving_average_10,moving_average_20,exponential_moving_average_5,exponential_moving_average_20,exponential_moving_average_12,exponential_moving_average_26,macd,macd_signal,macd_histogram,rsi
0,2008-05-30,905001,TUNINDEX,2957.56,2950.88,0.23,2958.27,2943.14,2946.62,10.94,...,,,2957.56,2957.56,2957.56,2957.56,0.0,,,
1,2008-06-02,905001,TUNINDEX,2961.87,2957.56,0.15,2970.14,2951.67,2962.93,-1.06,...,,,2960.146,2959.82275,2959.894583,2959.797885,0.096699,,,


In [None]:
df.columns

Index(['Date', 'Index_Code', 'Index_Name', 'Daily_Index', 'Previous_Close',
       'Daily_Variation', 'Highest_Index', 'Lowest_Index', 'Opening_Index',
       'price_change', 'returns', 'average_price', 'price_range',
       'volume_change', 'price_volume_correlation',
       'returns_volume_correlation', 'price_volume_covariance',
       'returns_volume_covariance', 'moving_average_5', 'moving_average_10',
       'moving_average_20', 'exponential_moving_average_5',
       'exponential_moving_average_20', 'exponential_moving_average_12',
       'exponential_moving_average_26', 'macd', 'macd_signal',
       'macd_histogram', 'rsi'],
      dtype='object')

In [None]:
grouped_data = df.groupby('Index_Name')['Daily_Index']
skewness_per_asset = grouped_data.skew()
kurtosis_per_asset = grouped_data.apply(pd.Series.kurtosis)


In [None]:

df['skewness'] = df['Index_Name'].map(skewness_per_asset)
df['kurtosis'] = df['Index_Name'].map(kurtosis_per_asset)


In [None]:
mean_daily_returns = df.groupby('Index_Name')['returns'].mean()
std_daily_returns = df.groupby('Index_Name')['returns'].std()


In [None]:
risk_free_rate = 0.0  # Define the risk-free rate
sharpe_ratio = (mean_daily_returns - risk_free_rate) / std_daily_returns
df = df.merge(sharpe_ratio.rename('sharpe_ratio'), on='Index_Name', how='left')


In [None]:
volatility = df.groupby('Index_Name')['returns'].std()
df = df.merge(volatility.rename('volatility'), on='Index_Name', how='left')


In [None]:
liquidity = df.groupby('Index_Name')['Daily_Variation'].mean()
df = df.merge(liquidity.rename('liquidity'), on='Index_Name', how='left')


In [None]:
df = df.dropna()


In [None]:
df["Date"] = pd.to_datetime(df["Date"])

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 46372 entries, 19 to 53640
Data columns (total 34 columns):
 #   Column                         Non-Null Count  Dtype         
---  ------                         --------------  -----         
 0   Date                           46372 non-null  datetime64[ns]
 1   Index_Code                     46372 non-null  int64         
 2   Index_Name                     46372 non-null  object        
 3   Daily_Index                    46372 non-null  float64       
 4   Previous_Close                 46372 non-null  float64       
 5   Daily_Variation                46372 non-null  float64       
 6   Highest_Index                  46372 non-null  float64       
 7   Lowest_Index                   46372 non-null  float64       
 8   Opening_Index                  46372 non-null  float64       
 9   price_change                   46372 non-null  float64       
 10  returns                        46372 non-null  float64       
 11  average_price      

In [None]:
df.columns

Index(['Date', 'Index_Code', 'Index_Name', 'Daily_Index', 'Previous_Close',
       'Daily_Variation', 'Highest_Index', 'Lowest_Index', 'Opening_Index',
       'price_change', 'returns', 'average_price', 'price_range',
       'volume_change', 'price_volume_correlation',
       'returns_volume_correlation', 'price_volume_covariance',
       'returns_volume_covariance', 'moving_average_5', 'moving_average_10',
       'moving_average_20', 'exponential_moving_average_5',
       'exponential_moving_average_20', 'exponential_moving_average_12',
       'exponential_moving_average_26', 'macd', 'macd_signal',
       'macd_histogram', 'rsi', 'skewness', 'kurtosis', 'sharpe_ratio',
       'volatility', 'liquidity'],
      dtype='object')

In [None]:
df.to_csv('dfindice.csv', index=True)

In [None]:
selected_features = ['Date', 'Index_Name', 'Daily_Index', 'Previous_Close',
        'Highest_Index', 'Lowest_Index', 'Opening_Index',
        'average_price', 'volume_change', 'returns_volume_correlation', 'moving_average_5', 'moving_average_10',
       'moving_average_20',
        'macd', 'rsi', 'skewness', 'kurtosis', 'sharpe_ratio',
       'volatility', 'liquidity']

In [None]:
dftoscale = df[selected_features]

In [None]:
unique_values = dftoscale['Index_Name'].unique()

In [None]:
for name in dftoscale['Index_Name'].unique():
    globals()['data_' + name.replace(' ', '_')] = dftoscale[dftoscale['Index_Name'] == name]

In [None]:
for name in dftoscale['Index_Name'].unique():
  globals()['data_' + name.replace(' ', '_')] = globals()['data_' + name.replace(' ', '_')].drop(columns=["Index_Name"])

In [None]:
for name in dftoscale['Index_Name'].unique():
    df_name = 'data_' + name.replace(' ', '_')
    if df_name in globals():
        globals()[df_name] = globals()[df_name].set_index("Date")
    else:
        print(f"DataFrame {df_name} not found in globals().")

In [None]:
from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import MinMaxScaler
# scaler = MinMaxScaler()

# for name in unique_values:
#     df_name = 'data_' + name.replace(' ', '_')
#     if df_name in globals():
#         # Randomly split the data into train and test sets
#         train_data, test_data = train_test_split(globals()[df_name], test_size=0.2, random_state=42)

#         # Scale the features in the train set
#         scaled_train_features = scaler.fit_transform(train_data)
#         scaled_train_df = pd.DataFrame(scaled_train_features, columns=train_data.columns, index=train_data.index)

#         # Scale the features in the test set using the scaler fitted on the train set
#         scaled_test_features = scaler.transform(test_data)
#         scaled_test_df = pd.DataFrame(scaled_test_features, columns=test_data.columns, index=test_data.index)

#         # Store the scaled train and test sets in new DataFrames with prefixes "scaled_train_" and "scaled_test_"
#         globals()['scaled_train_' + df_name] = scaled_train_df
#         globals()['scaled_test_' + df_name] = scaled_test_df
#     else:
#         print(f"DataFrame {df_name} not found in globals().")


In [None]:
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, GRU, Bidirectional
from keras.optimizers import SGD
import math
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler


In [None]:
# Function to calculate RMSE
def return_rmse(test_set, predicted_stock_price):
    rmse = np.sqrt(np.mean((predicted_stock_price - test_set) ** 2))
    return rmse


In [None]:
# import plotly.graph_objects as go
# from keras.optimizers import RMSprop

# # Function to plot predictions using Plotly
# def plot_predictions_plotly(test_set, predicted_stock_price, company_name):
#     fig = go.Figure()
#     fig.add_trace(go.Scatter(x=np.arange(len(test_set)), y=test_set, mode='lines', name='Actual Stock Price'))
#     fig.add_trace(go.Scatter(x=np.arange(len(test_set)), y=predicted_stock_price, mode='lines', name='Predicted Stock Price'))
#     fig.update_layout(title=f"Actual vs Predicted Stock Price for {company_name}",
#                       xaxis_title="Time",
#                       yaxis_title="Stock Price")
#     fig.show()


# import numpy as np
# from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import MinMaxScaler
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import LSTM, Dropout, Dense

# # Function to train LSTM model for a single company
# def train_model_for_company(name):
#     print(f"Training model for {name}")
#     df_name = 'data_' + name.replace(' ', '_')
#     if df_name in globals():
#         # Extract features and target
#         features = globals()[df_name].values
#         target = globals()[df_name]['Daily_Index'].values

#         # Scale the features
#         scaler = MinMaxScaler()
#         scaled_features = scaler.fit_transform(features)

#         # Reshape the data for LSTM input
#         X = []
#         y = []
#         for i in range(len(scaled_features) - 60):
#             X.append(scaled_features[i:i+60])
#             y.append(target[i+60])
#         X, y = np.array(X), np.array(y)

#         # Check if the training set is non-empty
#         if len(X) == 0:
#             print(f"Skipping training for {name} due to an empty training set.")
#             return

#         # Split the data into train and test sets
#         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, train_size=0.8, random_state=42)

#         # Define the LSTM model
#         model = Sequential([
#             LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
#             Dropout(0.2),
#             LSTM(units=50, return_sequences=True),
#             Dropout(0.2),
#             LSTM(units=50),
#             Dropout(0.2),
#             Dense(units=1)
#         ])

#         # Compile the model
#         model.compile(optimizer=RMSprop(), loss='mean_squared_error')

#         # Fit the model to the training data
#         model.fit(X_train, y_train, epochs=100, batch_size=32, verbose=1)  # Increase epochs for better training

#         # Make predictions on the test data
#         y_pred = model.predict(X_test)
#         plot_predictions_plotly(y_test, y_pred, name)

#         # Evaluate the model
#         rmse = np.sqrt(np.mean((y_test - y_pred)**2))
#         print(f"RMSE for {name}: {rmse}")

#         # Continue with other operations like plotting, storing models, etc.
#     else:
#         print(f"DataFrame {df_name} not found in globals().")

# # Loop over each company name and train the model sequentially
# trained_models = {}
# for name in unique_values:
#     train_model_for_company(name)

# print("All models trained successfully.")


In [None]:
# import numpy as np
# from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import MinMaxScaler
# from xgboost import XGBRegressor

# # Function to train XGBoost model for a single company
# def train_model_for_company(name):
#     print(f"Training model for {name}")
#     df_name = 'data_' + name.replace(' ', '_')
#     if df_name in globals():
#         # Extract features and target
#         features = globals()[df_name].values
#         target = globals()[df_name]['Daily_Index'].values

#         # Scale the features
#         scaler = MinMaxScaler()
#         scaled_features = scaler.fit_transform(features)

#         # Reshape the data for XGBoost input
#         X = []
#         y = []
#         for i in range(len(scaled_features) - 60):
#             X.append(scaled_features[i:i+60])
#             y.append(target[i+60])
#         X, y = np.array(X), np.array(y)

#         # Check if the training set is non-empty
#         if len(X) == 0:
#             print(f"Skipping training for {name} due to an empty training set.")
#             return

#         # Split the data into train and test sets
#         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, train_size=0.8, random_state=42)

#         # Define the XGBoost model
#         model = XGBRegressor(objective='reg:squarederror', n_estimators=100, max_depth=5, learning_rate=0.1)

#         # Fit the model to the training data
#         model.fit(X_train.reshape(X_train.shape[0], -1), y_train, eval_metric='rmse')

#         # Make predictions on the test data
#         y_pred = model.predict(X_test.reshape(X_test.shape[0], -1))

#         # Evaluate the model
#         rmse = np.sqrt(np.mean((y_test - y_pred)**2))
#         print(f"RMSE for {name}: {rmse}")

#         # Continue with other operations like plotting, storing models, etc.
#     else:
#         print(f"DataFrame {df_name} not found in globals().")

# # Loop over each company name and train the model sequentially
# trained_models = {}
# for name in unique_values:
#     train_model_for_company(name)

# print("All models trained successfully.")


In [None]:
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.arima.model import ARIMA

def train_model_for_company_with_ARIMA(name):
    print(f"Training ARIMA model for {name}")
    df_name = 'data_' + name.replace(' ', '_')
    if df_name in globals():
        # Extract features and target
        target = globals()[df_name]['Daily_Index']

        # Split the data into train and test sets
        train_size = int(len(target) * 0.8)
        train, test = target[:train_size], target[train_size:]

        # Fit ARIMA model
        model = ARIMA(train, order=(20,1,0)) # Example order, you may need to adjust this
        model_fit = model.fit()

        # Make predictions
        history = [x for x in train]
        predictions = list()
        for t in range(len(test)):
            model = ARIMA(history, order=(5,1,0))
            model_fit = model.fit()
            output = model_fit.forecast()
            yhat = output[0]
            predictions.append(yhat)
            obs = test[t]
            history.append(obs)

        # Calculate RMSE
        rmse = np.sqrt(mean_squared_error(test, predictions))
        print(f"RMSE for {name}: {rmse}")

        # Continue with other operations like plotting, storing models, etc.
    else:
        print(f"DataFrame {df_name} not found in globals().")


In [None]:
data_TUNINDEX.dtypes

Daily_Index                   float64
Previous_Close                float64
Highest_Index                 float64
Lowest_Index                  float64
Opening_Index                 float64
average_price                 float64
volume_change                 float64
returns_volume_correlation    float64
moving_average_5              float64
moving_average_10             float64
moving_average_20             float64
macd                          float64
rsi                           float64
skewness                      float64
kurtosis                      float64
sharpe_ratio                  float64
volatility                    float64
liquidity                     float64
dtype: object

In [None]:
trained_models = {}
for name in unique_values:
    train_model_for_company_with_ARIMA(name)

print("All models trained successfully.")

Training ARIMA model for TUNINDEX


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


RMSE for TUNINDEX: 31.520242557989594
Training ARIMA model for INDSF


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


RMSE for INDSF: 30.27850952209858
Training ARIMA model for INDBQ


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


RMSE for INDBQ: 30.901161568195842
Training ARIMA model for INSFI


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


RMSE for INSFI: 36.95311761024569
Training ARIMA model for INDSC


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


RMSE for INDSC: 18.53194003074808
Training ARIMA model for INBCO


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


RMSE for INBCO: 60.698961519118164
Training ARIMA model for INAUE


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


RMSE for INAUE: 24.674218048951307
Training ARIMA model for INDIN


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


RMSE for INDIN: 12.693528548907357
Training ARIMA model for INBMC


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


RMSE for INBMC: 8.772009375156573
Training ARIMA model for INDMB


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


RMSE for INDMB: 40.51064090590274
Training ARIMA model for INDDI


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


RMSE for INDDI: 30.092449729398187
Training ARIMA model for INDAS


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


RMSE for INDAS: 95.60593844187842
Training ARIMA model for INAAB


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


RMSE for INAAB: 86.76862060458924
Training ARIMA model for TUNINDEX20


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


RMSE for TUNINDEX20: 16.98545675991035
Training ARIMA model for INPMP


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


RMSE for INPMP: 26.073229581525137
Training ARIMA model for TUNFIN


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


RMSE for TUNFIN: 11.541425552607436
Training ARIMA model for TUNBANQ


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


RMSE for TUNBANQ: 11.510172863694024
Training ARIMA model for TUNASS


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


RMSE for TUNASS: 58.81616865040501
Training ARIMA model for TUNSEFI


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


RMSE for TUNSEFI: 16.35774096229559
Training ARIMA model for TUNSAC


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


RMSE for TUNSAC: 27.028054390298543
Training ARIMA model for TUNDIS


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


RMSE for TUNDIS: 42.7782477710863
Training ARIMA model for TUNCONS


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


RMSE for TUNCONS: 36.403405703798065
Training ARIMA model for TUNALIM


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


RMSE for TUNALIM: 42.36236185697052
Training ARIMA model for TUNMENAG


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


RMSE for TUNMENAG: 21.1130768314796
Training ARIMA model for TUNIND


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'


RMSE for TUNIND: 13.57916722947214
Training ARIMA model for TUNBATIM


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'


RMSE for TUNBATIM: 8.651450734999393
Training ARIMA model for TUNBASE


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


RMSE for TUNBASE: 19.731149919185636
Training ARIMA model for STES FINANCIERES


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'


RMSE for STES FINANCIERES: 27.967949465925496
Training ARIMA model for PX1
RMSE for PX1: 29.12904791762782
Training ARIMA model for TUN20


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


RMSE for TUN20: 13.419716814718154
All models trained successfully.


In [None]:
import plotly.graph_objects as go

# Filter the DataFrame for the 'Tunidex' index


# Create a line plot for the closing prices of Tunidex
fig = go.Figure()
fig.add_trace(go.Scatter(x=data_TUNINDEX['Date'], y=data_TUNINDEX['Daily_Index'], mode='lines', name='Tunidex'))

# Customize layout
fig.update_layout(title='Closing Price of Tunidex',
                  xaxis_title='Date',
                  yaxis_title='Closing Price',
                  showlegend=True)

# Show plot
fig.show()

In [None]:
plot_predictions_plotly( , data_TUNINDEX)
