In [22]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import warnings
warnings.simplefilter(action = 'ignore')

In [11]:
amzn = pd.read_csv("dataset/AMZN_2006-01-01_to_2018-01-01.csv")
googl = pd.read_csv("dataset/GOOGL_2006-01-01_to_2018-01-01.csv")
ibm = pd.read_csv("dataset/IBM_2006-01-01_to_2018-01-01.csv")
msft = pd.read_csv("dataset/MSFT_2006-01-01_to_2018-01-01.csv")
aapl = pd.read_csv("dataset/AAPL_2006-01-01_to_2018-01-01.csv")
intc = pd.read_csv("dataset/INTC_2006-01-01_to_2018-01-01.csv")
vz = pd.read_csv("dataset/VZ_2006-01-01_to_2018-01-01.csv")
amd = pd.read_csv("dataset/AMD.csv")
nvda = pd.read_csv("dataset/nvidia_all_time_daily_stock_prices.csv")
aaba = pd.read_csv("dataset/AABA_2006-01-01_to_2018-01-01.csv")

In [12]:
amd['Date'] = pd.to_datetime(amd['Date'])
nvda['Date'] = pd.to_datetime(nvda['Date'])
start_date = pd.to_datetime('2006-01-03')
end_date = pd.to_datetime('2017-12-29')
amd_crop = amd[(amd['Date'] >= start_date) & (amd['Date'] <= end_date)]
amd_crop = amd_crop.reset_index(drop=True)
amd_crop['Close'] = amd_crop['Close'].round(2)
nvda_crop = nvda[(nvda['Date'] >= start_date) & (nvda['Date'] <= end_date)]
nvda_crop = nvda_crop.reset_index(drop = True)
nvda_crop['Close'] = nvda_crop['Close'].round(2)

In [13]:
data = pd.concat([amzn[['Date','Close']], googl["Close"], msft["Close"], aapl["Close"], intc['Close'], 
                  nvda_crop['Close'], ibm["Close"], vz['Close'], aaba['Close'], amd_crop['Close']], axis = 1)
data.columns = ['Date', 'amzn', 'googl', 'msft', 'aapl', 'intc','nvda', 'ibm', 'vz', 'aaba', 'amd']
data = data.drop(data.index[-1])
data['Date'] = pd.to_datetime(data['Date'])
cols = list(data)[1:11]

train_data = data[:-100]
test_data = data[-100:]
test_data.head(5)

Unnamed: 0,Date,amzn,googl,msft,aapl,intc,nvda,ibm,vz,aaba,amd
2919,2017-08-09,982.01,940.08,72.47,161.06,36.59,42.58,142.11,48.6,59.77,13.11
2920,2017-08-10,956.92,923.59,71.41,155.32,36.14,43.03,141.77,48.22,58.03,12.83
2921,2017-08-11,967.99,930.09,72.5,157.48,35.87,41.19,141.84,48.02,57.92,12.12
2922,2017-08-14,983.3,938.93,73.59,159.85,36.34,38.99,141.84,48.06,58.87,12.23
2923,2017-08-15,982.74,938.08,73.22,161.6,36.0,42.1,142.32,48.78,59.99,12.76


In [14]:
train_date = train_data['Date']
test_date = test_data['Date']
train_data = train_data[cols].astype(float)
test_data = test_data[cols].astype(float)
np.shape(train_data)

train_data.info()
test_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2919 entries, 0 to 2918
Data columns (total 10 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   amzn    2919 non-null   float64
 1   googl   2919 non-null   float64
 2   msft    2919 non-null   float64
 3   aapl    2919 non-null   float64
 4   intc    2919 non-null   float64
 5   nvda    2919 non-null   float64
 6   ibm     2919 non-null   float64
 7   vz      2919 non-null   float64
 8   aaba    2919 non-null   float64
 9   amd     2919 non-null   float64
dtypes: float64(10)
memory usage: 228.2 KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 2919 to 3018
Data columns (total 10 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   amzn    100 non-null    float64
 1   googl   100 non-null    float64
 2   msft    100 non-null    float64
 3   aapl    100 non-null    float64
 4   intc    100 non-null    float64
 5   nvda    100 non-null    float64
 

In [15]:
x_train, y_train = train_data[:-100], train_data[-100:]
x_test, y_test = train_data, test_data

In [16]:
model = RandomForestRegressor()

In [18]:
model.fit(train_data, train_data)

In [19]:
y_pred = model.predict(test_data)

In [29]:
y_pred = pd.DataFrame(y_pred, columns = ['amzn_pred', 'googl_pred', 'msft_pred', 'aapl_pred', 'intc_pred','nvda_pred', 'ibm_pred', 'vz_pred', 'aaba_pred', 'amd_pred'])

In [30]:
 y_test.head(5)

Unnamed: 0,amzn,googl,msft,aapl,intc,nvda,ibm,vz,aaba,amd
2919,982.01,940.08,72.47,161.06,36.59,42.58,142.11,48.6,59.77,13.11
2920,956.92,923.59,71.41,155.32,36.14,43.03,141.77,48.22,58.03,12.83
2921,967.99,930.09,72.5,157.48,35.87,41.19,141.84,48.02,57.92,12.12
2922,983.3,938.93,73.59,159.85,36.34,38.99,141.84,48.06,58.87,12.23
2923,982.74,938.08,73.22,161.6,36.0,42.1,142.32,48.78,59.99,12.76


In [31]:
y_pred.head(5)

Unnamed: 0,amzn_pred,googl_pred,msft_pred,aapl_pred,intc_pred,nvda_pred,ibm_pred,vz_pred,aaba_pred,amd_pred
0,986.9751,943.2511,72.0504,155.4482,36.0612,40.9931,145.8331,48.0612,58.3438,13.3281
1,959.7349,933.9713,69.337,147.8238,35.2617,33.7396,153.6377,45.9194,52.9529,12.8165
2,971.2772,939.4717,70.1801,148.8348,35.1895,36.7089,151.4472,46.3368,54.9401,12.8836
3,988.7333,945.8897,72.1907,153.9379,35.9361,41.0349,145.8056,47.9182,58.1573,13.343
4,987.0492,944.3539,72.0487,155.5383,35.9863,41.093,146.085,48.0755,58.3668,13.2969


In [32]:
import math
from statistics import mean

In [36]:
stocks = ['amzn', 'googl', 'msft', 'aapl', 'intc', 'nvda', 'ibm', 'vz', 'aaba', 'amd']
pmse_values = []
mean_values = []
std_values = []

for stock in stocks:
    # Calculate mean
    mean_value = y_test[stock].head(10).mean()
    mean_values.append(mean_value)
    
    # Calculate PMSE
    pmse_value = math.sqrt(mean_squared_error(y_pred[f'{stock}_pred'], y_test[stock]))
    pmse_values.append(pmse_value)
    
    # Calculate standard deviation
    std_value = y_test[stock].head(10).std()
    std_values.append(std_value)

# Calculate PMSE / Mean
pmse_to_mean = [pmse / mean for pmse, mean in zip(pmse_values, mean_values)]

# Calculate PMSE / Std
pmse_to_std = [pmse / std for pmse, std in zip(pmse_values, std_values)]

# Create DataFrame
error = {
    'Stock': stocks,
    'Mean': [round(mean, 2) for mean in mean_values],
    'PMSE': [round(pmse, 2) for pmse in pmse_values],
    'PMSE / Mean': [round(pmse_mean, 2) for pmse_mean in pmse_to_mean],
    'Std': [round(std, 2) for std in std_values],
    'PMSE / Std': [round(pmse_std, 2) for pmse_std in pmse_to_std]
}

Error = pd.DataFrame(error)
Error.head(10)

Unnamed: 0,Stock,Mean,PMSE,PMSE / Mean,Std,PMSE / Std
0,amzn,969.04,83.39,0.09,11.68,7.14
1,googl,933.02,41.79,0.04,8.25,5.07
2,msft,72.7,7.99,0.11,0.7,11.46
3,aapl,158.86,14.61,0.09,2.07,7.06
4,intc,35.65,7.09,0.2,0.66,10.68
5,nvda,41.15,9.23,0.22,1.27,7.25
6,ibm,141.52,6.29,0.04,0.94,6.71
7,vz,48.23,4.15,0.09,0.34,12.31
8,aaba,60.95,12.46,0.2,2.53,4.93
9,amd,12.55,1.8,0.14,0.38,4.78
