In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.ensemble import RandomForestRegressor

# Load the dataset
file_path = '/Users/tanishq/Desktop/Projects/bank stock prices/master_stock_prices.csv'  # Replace with your file path
data = pd.read_csv(file_path)

# Preprocessing
# Convert 'Date' to datetime format
data['Date'] = pd.to_datetime(data['Date'])

# Fill missing values in lagged features using forward fill
data['Price Lag 1'] = data['Price Lag 1'].fillna(method='ffill')
data['Change % Lag 1'] = data['Change % Lag 1'].fillna(method='ffill')

# Clean and convert numeric columns
data['DFM Index'] = data['DFM Index'].str.replace(',', '').astype(float)
data['Brent Oil Price (AED)'] = data['Brent Oil Price (AED)'].astype(float)

# Encode the 'Bank Name' column
label_encoder = LabelEncoder()
data['Bank Name Encoded'] = label_encoder.fit_transform(data['Bank Name'])

data.to_csv('/Users/tanishq/Desktop/Projects/bank stock prices/master_final.csv', index=False)





  data['Price Lag 1'] = data['Price Lag 1'].fillna(method='ffill')
  data['Change % Lag 1'] = data['Change % Lag 1'].fillna(method='ffill')


In [2]:
# Define numeric columns
numeric_columns = [
    'Price', 'Open', 'High', 'Low', 'Vol.', 'Change %', 'Price Lag 1',
    'Change % Lag 1', 'Price MA 5', 'Price MA 10', 'Price per Vol',
    'DFM Index', 'USD to AED Exchange Rate', 'Brent Oil Price (AED)',
    'GDP_Quarterly', 'Overnight'
]

# Normalize numeric features
scaler = MinMaxScaler()
data[numeric_columns] = scaler.fit_transform(data[numeric_columns])

# Fill remaining missing values with column means
data[numeric_columns] = data[numeric_columns].fillna(data[numeric_columns].mean())


In [3]:
# Function to forecast prices
def forecast_prices(data, numeric_columns, forecast_days=30, forecast_months=24):
    # Train a Random Forest model for each bank
    adjusted_predictions_daily = {}
    adjusted_predictions_monthly = {}
    for bank in data['Bank Name'].unique():
        bank_data = data[data['Bank Name'] == bank]
        X = bank_data[numeric_columns]
        y = bank_data['Price']
        
        # Time-based train-test split (80% train, 20% test)
        train_size = int(0.8 * len(bank_data))
        X_train, y_train = X.iloc[:train_size], y.iloc[:train_size]
        
        # Train the Random Forest model
        rf_model = RandomForestRegressor(random_state=42)
        rf_model.fit(X_train, y_train)
        
        # Predict daily prices for the next `forecast_days`
        last_data = bank_data.iloc[-1].copy()
        future_daily = pd.DataFrame(columns=numeric_columns)
        for day in range(forecast_days):
            features = pd.DataFrame([last_data[numeric_columns]], columns=numeric_columns)
            scaled_features = scaler.transform(features)
            predicted_price = rf_model.predict(scaled_features)[0]
            predicted_price += np.random.normal(scale=0.005)  # Add randomness
            
            # Update lagged values
            last_data['Price Lag 1'] = last_data['Price']
            last_data['Price'] = predicted_price
            last_data['Change % Lag 1'] = (predicted_price - last_data['Price Lag 1']) / last_data['Price Lag 1']
            
            if 'Price MA 5' in numeric_columns:
                last_data['Price MA 5'] = (last_data['Price MA 5'] * 4 + predicted_price) / 5
            
            # Append to future_daily
            future_daily = pd.concat(
                [future_daily, pd.DataFrame([last_data[numeric_columns]], columns=numeric_columns)],
                ignore_index=True
            )
        adjusted_predictions_daily[bank] = future_daily['Price']
        
        # Predict monthly prices for the next `forecast_months`
        future_monthly = pd.DataFrame(columns=numeric_columns)
        for month in range(forecast_months):
            features = pd.DataFrame([last_data[numeric_columns]], columns=numeric_columns)
            scaled_features = scaler.transform(features)
            predicted_price = rf_model.predict(scaled_features)[0]
            predicted_price += np.random.normal(scale=0.005)
            
            # Update lagged values
            last_data['Price Lag 1'] = last_data['Price']
            last_data['Price'] = predicted_price
            last_data['Change % Lag 1'] = (predicted_price - last_data['Price Lag 1']) / last_data['Price Lag 1']
            
            if 'Price MA 5' in numeric_columns:
                last_data['Price MA 5'] = (last_data['Price MA 5'] * 4 + predicted_price) / 5
            
            # Append to future_monthly
            future_monthly = pd.concat(
                [future_monthly, pd.DataFrame([last_data[numeric_columns]], columns=numeric_columns)],
                ignore_index=True
            )
        adjusted_predictions_monthly[bank] = future_monthly['Price']
    
    # Convert predictions to DataFrame
    adjusted_forecast_daily_df = pd.DataFrame(adjusted_predictions_daily)
    adjusted_forecast_daily_df.index.name = 'Future Day'
    
    adjusted_forecast_monthly_df = pd.DataFrame(adjusted_predictions_monthly)
    adjusted_forecast_monthly_df.index.name = 'Future Month'
    
    return adjusted_forecast_daily_df, adjusted_forecast_monthly_df

In [4]:
# Generate forecasts
daily_forecast, monthly_forecast = forecast_prices(data, numeric_columns, forecast_days=30, forecast_months=24)

# Restore original prices for both forecasts
def restore_original_prices(normalized_prices, scaler, column_index):
    min_val = scaler.data_min_[column_index]
    max_val = scaler.data_max_[column_index]
    return normalized_prices * (max_val - min_val) + min_val

restored_daily_forecast = daily_forecast.copy()
restored_monthly_forecast = monthly_forecast.copy()

for bank in restored_daily_forecast.columns:
    restored_daily_forecast[bank] = restore_original_prices(
        restored_daily_forecast[bank].values, scaler, numeric_columns.index('Price')
    )

for bank in restored_monthly_forecast.columns:
    restored_monthly_forecast[bank] = restore_original_prices(
        restored_monthly_forecast[bank].values, scaler, numeric_columns.index('Price')
    )


  future_daily = pd.concat(
  future_monthly = pd.concat(
  future_daily = pd.concat(
  future_monthly = pd.concat(
  future_daily = pd.concat(
  future_monthly = pd.concat(
  future_daily = pd.concat(
  future_monthly = pd.concat(


In [5]:
# Current prices for recalibration
current_prices = {"ADCB": 10.3, "DIB": 7.05, "ENBD": 21.15, "FAB": 13.58}

# Recalibrate predictions to align with today's prices
adjusted_daily_forecast_scaled = restored_daily_forecast.copy()
adjusted_monthly_forecast_scaled = restored_monthly_forecast.copy()

for bank, current_price in current_prices.items():
    scaling_factor = current_price / restored_daily_forecast[bank].iloc[0]
    adjusted_daily_forecast_scaled[bank] *= scaling_factor

    scaling_factor = current_price / restored_monthly_forecast[bank].iloc[0]
    adjusted_monthly_forecast_scaled[bank] *= scaling_factor

# Display forecasts
print("Daily Forecast:")
print(adjusted_daily_forecast_scaled.head())
print("\nMonthly Forecast:")
print(adjusted_monthly_forecast_scaled.head())

Daily Forecast:
                 ADCB       DIB       ENBD        FAB
Future Day                                           
0           10.300000  7.050000  21.150000  13.580000
1           10.626135  6.709523  22.362955  13.485311
2           10.198979  6.862584  21.997864  13.530117
3           10.391287  5.998203  21.548099  13.303263
4           10.091277  6.936939  21.128006  13.466504

Monthly Forecast:
                   ADCB       DIB       ENBD        FAB
Future Month                                           
0             10.300000  7.050000  21.150000  13.580000
1             10.361959  7.106741  21.022990  13.641032
2              9.896876  7.265058  21.516094  13.835358
3              9.714834  6.887950  21.150394  13.742086
4              9.738094  7.015445  22.122876  13.567179


In [6]:
adjusted_daily_forecast_scaled

Unnamed: 0_level_0,ADCB,DIB,ENBD,FAB
Future Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,10.3,7.05,21.15,13.58
1,10.626135,6.709523,22.362955,13.485311
2,10.198979,6.862584,21.997864,13.530117
3,10.391287,5.998203,21.548099,13.303263
4,10.091277,6.936939,21.128006,13.466504
5,10.420136,6.575189,21.056479,13.612574
6,10.236528,6.756452,21.771232,13.468557
7,10.702811,6.892184,21.843182,13.306181
8,10.791804,6.809919,22.021885,13.546988
9,10.422173,6.780444,21.691699,13.51268


In [7]:
adjusted_monthly_forecast_scaled

Unnamed: 0_level_0,ADCB,DIB,ENBD,FAB
Future Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,10.3,7.05,21.15,13.58
1,10.361959,7.106741,21.02299,13.641032
2,9.896876,7.265058,21.516094,13.835358
3,9.714834,6.88795,21.150394,13.742086
4,9.738094,7.015445,22.122876,13.567179
5,9.510181,7.027231,21.378546,13.786167
6,10.359932,7.384472,22.169082,13.281588
7,10.142848,6.749583,21.381049,13.645253
8,9.983033,6.932769,21.652472,13.78411
9,9.933524,7.268647,21.139551,13.764581


In [8]:
from datetime import timedelta

# Generate future dates for daily forecasts
last_date = data['Date'].max()  # Get the last available date in the dataset
future_daily_dates = [last_date + timedelta(days=i) for i in range(1, len(adjusted_daily_forecast_scaled) + 1)]

# Generate future dates for monthly forecasts
future_monthly_dates = [
    last_date + timedelta(days=30 * i) for i in range(1, len(adjusted_monthly_forecast_scaled) + 1)
]

# Convert forecasts into DataFrames with dates
daily_forecast_with_dates = adjusted_daily_forecast_scaled.copy()
daily_forecast_with_dates.insert(0, 'Date', future_daily_dates)

monthly_forecast_with_dates = adjusted_monthly_forecast_scaled.copy()
monthly_forecast_with_dates.insert(0, 'Date', future_monthly_dates)

# Combine daily and monthly forecasts
daily_forecast_with_dates['Forecast Type'] = 'Daily'
monthly_forecast_with_dates['Forecast Type'] = 'Monthly'

combined_forecasts = pd.concat([daily_forecast_with_dates, monthly_forecast_with_dates], ignore_index=True)

# Save to CSV
output_path = '/Users/tanishq/Desktop/Projects/bank stock prices/combined_forecasts.csv'  # Replace with desired path
combined_forecasts.to_csv(output_path, index=False)

# Display a preview of the combined dataset
print("Combined Forecasts:")
print(combined_forecasts.head())


Combined Forecasts:
        Date       ADCB       DIB       ENBD        FAB Forecast Type
0 2024-12-17  10.300000  7.050000  21.150000  13.580000         Daily
1 2024-12-18  10.626135  6.709523  22.362955  13.485311         Daily
2 2024-12-19  10.198979  6.862584  21.997864  13.530117         Daily
3 2024-12-20  10.391287  5.998203  21.548099  13.303263         Daily
4 2024-12-21  10.091277  6.936939  21.128006  13.466504         Daily


In [9]:
combined_forecasts

Unnamed: 0,Date,ADCB,DIB,ENBD,FAB,Forecast Type
0,2024-12-17,10.3,7.05,21.15,13.58,Daily
1,2024-12-18,10.626135,6.709523,22.362955,13.485311,Daily
2,2024-12-19,10.198979,6.862584,21.997864,13.530117,Daily
3,2024-12-20,10.391287,5.998203,21.548099,13.303263,Daily
4,2024-12-21,10.091277,6.936939,21.128006,13.466504,Daily
5,2024-12-22,10.420136,6.575189,21.056479,13.612574,Daily
6,2024-12-23,10.236528,6.756452,21.771232,13.468557,Daily
7,2024-12-24,10.702811,6.892184,21.843182,13.306181,Daily
8,2024-12-25,10.791804,6.809919,22.021885,13.546988,Daily
9,2024-12-26,10.422173,6.780444,21.691699,13.51268,Daily


In [10]:
# Restore the original scale for predictions
def restore_original_scale(scaled_prices, scaler, column_name):
    """
    Restores the original scale of predictions or prices using the MinMaxScaler.
    """
    column_index = numeric_columns.index(column_name)
    min_val = scaler.data_min_[column_index]
    max_val = scaler.data_max_[column_index]
    return scaled_prices * (max_val - min_val) + min_val

# Restore daily and monthly forecasts to their original scale
restored_daily_forecast = adjusted_daily_forecast_scaled.copy()
restored_monthly_forecast = adjusted_monthly_forecast_scaled.copy()

for bank in restored_daily_forecast.columns[:-1]:  # Exclude the 'Date' column
    restored_daily_forecast[bank] = restore_original_scale(
        restored_daily_forecast[bank], scaler, 'Price'
    )
    
for bank in restored_monthly_forecast.columns[:-1]:  # Exclude the 'Date' column
    restored_monthly_forecast[bank] = restore_original_scale(
        restored_monthly_forecast[bank], scaler, 'Price'
    )

# Generate future dates for daily and monthly forecasts
future_daily_dates = pd.date_range(
    start=data['Date'].max() + pd.Timedelta(days=1), 
    periods=len(restored_daily_forecast)
)
future_monthly_dates = pd.date_range(
    start=future_daily_dates[-1] + pd.Timedelta(days=1), 
    periods=len(restored_monthly_forecast), 
    freq='MS'  # Monthly Start frequency
)

# Add dates to the forecasts
restored_daily_forecast['Date'] = future_daily_dates
restored_monthly_forecast['Date'] = future_monthly_dates

# Add a "Prediction Type" column
restored_daily_forecast['Prediction Type'] = 'Daily'
restored_monthly_forecast['Prediction Type'] = 'Monthly'

# Combine daily and monthly forecasts
combined_forecast = pd.concat([restored_daily_forecast, restored_monthly_forecast], ignore_index=True)

# Reshape the combined forecast to long format
forecast_long = combined_forecast.melt(
    id_vars=['Date', 'Prediction Type'], 
    var_name='Bank Name', 
    value_name='Predicted Price'
)

# Prepare the actual data for merging
actual_data = data[['Date', 'Bank Name', 'Price']].rename(columns={'Price': 'Actual Price'})

# Merge actual data with the combined forecast
combined_data = pd.merge(actual_data, forecast_long, on=['Date', 'Bank Name'], how='outer')

# Sort by Date and Bank Name for better readability
combined_data = combined_data.sort_values(by=['Date', 'Bank Name'])

# Save the combined dataset
combined_data.to_csv('/Users/tanishq/Desktop/Projects/bank_stock_prices_combined_actual_predicted.csv', index=False)

# Display the combined dataset
combined_data.head()


Unnamed: 0,Date,Bank Name,Actual Price,Prediction Type,Predicted Price
0,2014-12-17,ADCB,0.141491,,
1,2014-12-17,DIB,0.05478,,
2,2014-12-17,ENBD,0.150096,,
3,2014-12-17,FAB,0.326004,,
4,2014-12-18,ADCB,0.184512,,


In [11]:
combined_data

Unnamed: 0,Date,Bank Name,Actual Price,Prediction Type,Predicted Price
0,2014-12-17,ADCB,0.141491,,
1,2014-12-17,DIB,0.054780,,
2,2014-12-17,ENBD,0.150096,,
3,2014-12-17,FAB,0.326004,,
4,2014-12-18,ADCB,0.184512,,
...,...,...,...,...,...
10144,2026-12-01,FAB,,Monthly,13.665176
10145,2027-01-01,ADCB,,Monthly,215.232927
10146,2027-01-01,DIB,,Monthly,149.067551
10147,2027-01-01,ENBD,,Monthly,456.561792
