In [1]:
# DataFrame
import pandas as pd

# Numerical Python
import numpy as np

# Machine learning
from sklearn.ensemble import RandomForestRegressor
from sklearn import metrics
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn import preprocessing

# Technical Analysis Library
import talib as ta

# Plotly
import plotly.express as px
from plotly.subplots import make_subplots

In [2]:
pd.options.plotting.backend = "plotly" # Use plotly as the plotting backend

In [40]:
data = pd.read_csv('merged_stock_gdp_inflation.csv')
data['Date'] = pd.to_datetime(data['Date'])
data.set_index('Date', inplace=True)

# Prepare the dataset
data['Monthly_Return'] = data['Close'].pct_change()
data.dropna(inplace=True)

# Extract the features and target variable
X = data.drop(columns=['Monthly_Return'])
y = data['Monthly_Return']

In [41]:
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X, y)

In [42]:
prediction_start_date = "2023-05-30"
prediction_end_date = "2023-12-31"
prediction_dates = pd.date_range(start=prediction_start_date, end=prediction_end_date, freq='M')
prediction_data = pd.DataFrame(index=prediction_dates, columns=X.columns)

latest_data = data.iloc[-1]
prediction_data.iloc[0] = latest_data.drop(['Monthly_Return'])

# Predict the monthly returns for June to September 2023
for i in range(1, len(prediction_data)):
    prediction = rf_model.predict(prediction_data.iloc[i-1].values.reshape(1, -1))
    prediction_data.iloc[i] = prediction

# Convert predicted returns to actual prices
for i in range(1, len(prediction_data)):
    prediction_data.iloc[i] = prediction_data.iloc[i-1] * (1 + prediction_data.iloc[i])




In [43]:
# Combine the prediction data with the original data (Adj Close prices)
prediction_data = pd.concat([data, prediction_data], axis=0)
prediction_data['Monthly_Return'] = prediction_data['Close'].pct_change()
prediction_data.index

DatetimeIndex(['2014-02-28', '2014-03-31', '2014-04-30', '2014-05-31',
               '2014-06-30', '2014-07-31', '2014-08-31', '2014-09-30',
               '2014-10-31', '2014-11-30',
               ...
               '2023-03-31', '2023-04-30', '2023-05-31', '2023-06-30',
               '2023-07-31', '2023-08-31', '2023-09-30', '2023-10-31',
               '2023-11-30', '2023-12-31'],
              dtype='datetime64[ns]', length=119, freq=None)

In [47]:
prediction_data.plot(y=['Close'])

In [44]:
prediction_data.to_csv('prediction_data.csv')

Unnamed: 0,Open,Close,Inflation,GDP,Monthly_Return
2014-02-28,6041.77002,6015.299805,4.2,1.5,
2014-03-31,6424.850098,6407.52002,4.1,1.5,0.065204
2014-04-30,6429.560059,6514.720215,3.9,1.5,0.016730
2014-05-31,6707.910156,6742.970215,4.1,1.4,0.035036
2014-06-30,6647.919922,6710.399902,4.5,1.4,-0.004830
...,...,...,...,...,...
2023-08-31,5067.327736,5071.739438,5.902799,1.553368,-0.112365
2023-09-30,4497.938857,4501.854839,5.239533,1.378824,-0.112365
2023-10-31,3992.529202,3996.005166,4.650794,1.223893,-0.112365
2023-11-30,3543.909762,3546.995151,4.128209,1.086371,-0.112365
