In [None]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn import preprocessing
from sklearn import metrics
import pandas_datareader.data as web
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

# Data preparing

In [None]:
#Read stock data use pandas_datareader.data from web
# Get Stock Data
stk_tickers = ['MSFT', 'IBM', 'GOOGL']
ccy_tickers = ['DEXJPUS', 'DEXUSUK']
idx_tickers = ['SP500', 'DJIA', 'VIXCLS']
stk_data = web.DataReader(stk_tickers, 'yahoo')
ccy_data = web.DataReader(ccy_tickers, 'fred')
idx_data = web.DataReader(idx_tickers, 'fred')

In [None]:
# Select columns
base = stk_data.loc[:, ('Adj Close', 'MSFT')]
X1 = stk_data.loc[:, ('Adj Close', ('GOOGL', 'IBM'))]
X2 = ccy_data
X3 = idx_data
print(X1)
print(X2)
print(X3)

In [None]:
#Standardized data (X1, X2, X3) with kept index (date)
standard_scaler = preprocessing.StandardScaler()
sX1 = standard_scaler.fit_transform(X1.values)
sX2 = standard_scaler.fit_transform(X2.values)
sX3 = standard_scaler.fit_transform(X3.values)
print(type(sX1))
sX1

In [None]:
sX1 = pd.DataFrame(index = X1.index,data = sX1,columns=X1.columns)
sX2 = pd.DataFrame(index = X2.index,data = sX2,columns=X2.columns)
sX3 = pd.DataFrame(index = X3.index,data = sX3,columns=X3.columns)
sX1

In [None]:
X1['Adj Close']

In [None]:
# Calculate ความแตกต่างของค่า ราคา 'Adj Close', 'MSFT’)ย้อนหลัง return_period วัน
return_period = 1 #predict 1 วันข้างหน้า
Y = base.shift(-return_period)
X4_3DT = base.diff(3*return_period).shift(-3*return_period)
X4_6DT = base.diff(6*return_period).shift(-6*return_period)
X4_12DT = base.diff(12*return_period).shift(-12*return_period)
X4 = pd.concat([X4_3DT, X4_6DT, X4_12DT], axis=1)
X4.columns = ['MSFT_3DT', 'MSFT_6DT', 'MSFT_12DT']
X4 = pd.DataFrame(standard_scaler.fit_transform(X4.values), index = X4.index,columns=X4.columns)

# Forming Dataset
X = pd.concat([sX1, sX2, sX3, X4], axis=1)
dataset = pd.concat([Y, X], axis=1)
print(type(dataset))


In [None]:
dataset.dropna(inplace=True)
dataset

In [None]:
dataset.info()

In [None]:
Y = pd.DataFrame(dataset[dataset.columns[0]].reset_index(drop=True))
X = pd.DataFrame(dataset[dataset.columns[1:]].reset_index(drop=True))
print(Y)
print(X)

In [None]:
X.corr()

In [None]:
lower = pd.DataFrame(np.tril(X.corr(),-1),columns = X.columns)
to_drop = [column for column in lower.columns if any(lower[column] > 0.9)]
to_drop


In [None]:
X.drop(columns=to_drop,inplace=True)


In [None]:
X

# train

In [None]:
#shuffle spilt
# Rseed = 55
# x_train_set , x_test, y_train_set , y_test = train_test_split(X,Y,test_size= 0.3,random_state= Rseed)
# x_train,x_validate,y_train,y_validate = train_test_split(x_train_set,y_train_set,test_size=0.3 ,random_state= Rseed)
from math import ceil, floor
#test_size =  ceil(0.3 * len( X ))
train_size = floor(0.7 * len( X ))
X_train, X_test = X[0:train_size], X[train_size:len(X)]
Y_train, Y_test = Y[0:train_size], Y[train_size:len(X)]

print(train_size)


## Kfold

In [None]:
# Cross Validation Model
# set k-fold crossvalidation with shuffle
num_fold = 4
seed = 500
kfold = KFold(n_splits=num_fold, shuffle = True, random_state=seed)

## SVR train

In [None]:
import warnings
warnings.filterwarnings('ignore')
# Model selection
model_LM = LinearRegression(copy_X=True, fit_intercept=True, n_jobs=4, normalize=False)
#c_val ลองอย่างน้อย 3 ค่า [0.1, 1, 10, 100]
lst = [0.1, 1, 10, 100]
c_val = 0
AVG_Linear_Model =[]
AVG_SVR_linear =[]
AVG_SVR_rbf = []
AVG_SVR_poly = []
for i in lst:
    c_val = i
    svr_lin  = SVR(kernel='linear', C=c_val)
    svr_rbf  = SVR(kernel='rbf'   , C=c_val, gamma=0.01)
    svr_poly = SVR(kernel='poly'  , C=c_val, degree=2)
    
    #Calculate accuracy score for each model
    score_LM    =  cross_val_score(model_LM, X_train, Y_train, cv=kfold)
    score_lin   =  cross_val_score(svr_lin, X_train, Y_train, cv=kfold)
    score_rbf   =  cross_val_score(svr_rbf, X_train, Y_train, cv=kfold)
    score_poly  =  cross_val_score(svr_poly, X_train, Y_train, cv=kfold)

    # View score k-fold
    # Valication score comparison
    score = pd.DataFrame({'Linear Model':score_LM,'SVR_linear':score_lin, 'SVR_rbf': score_rbf, 'SVR_poly':score_poly})
    score_mean = pd.DataFrame({'AVG Linear Model':[score_LM.mean()],'AVG SVR_linear':[score_lin.mean()],
                                'AVG SVR_rbf': [score_rbf.mean()], 'AVG SVC_poly': [score_poly.mean()]})
    print(score)
    print(score_mean)

    #display( plot ( score ))
    AVG_Linear_Model.append(score_LM.mean())
    AVG_SVR_linear.append(score_lin.mean())
    AVG_SVR_rbf.append(score_rbf.mean())
    AVG_SVR_poly.append(score_poly.mean())


In [None]:
cvall = ['c 0.1','c 1','c 10','c 100']
plt.figure(figsize =(15, 8))
X_axis = np.arange(len(AVG_Linear_Model))
plt.bar(X_axis , AVG_Linear_Model, 0.2, label = 'avg_linear')
plt.bar(X_axis + 0.2, AVG_SVR_linear, 0.2, label = 'avg_svr_linear')
plt.bar(X_axis + 0.4, AVG_SVR_rbf, 0.2, label = 'avg_svr_rbf')
plt.bar(X_axis + 0.6, AVG_SVR_poly, 0.2, label = 'avg_svr_poly')
plt.xticks(X_axis, cvall)

plt.legend()
plt.show()

## Score

In [None]:
print(score_LM)
print(score_lin)
print(score_rbf)
print(score_poly)

In [None]:
# View score k-fold
# Valication score comparison
score = pd.DataFrame({'Linear Model':score_LM,'SVR_linear':score_lin, 'SVR_rbf': score_rbf, 'SVR_poly': score_poly})
score_mean = pd.DataFrame({'AVG Linear Model':[score_LM.mean()],'AVG SVR_linear':[score_lin.mean()],
                            'AVG SVR_rbf': [score_rbf.mean()], 'AVG SVC_poly': [score_poly.mean()]})
print(score)
print(score_mean)
#display( plot ( score ))

In [None]:
# Predict all models ( LM, SVR_linear, SVR_rbf, SVR Poly )
LM_pred = model_LM.fit(X_train, Y_train).predict(X_test)
# Model prediction performance evaluation for all model ( LM, SVR_linear, SVR_rbf, SVR Poly )
#MSE 
LM_MSE = metrics.mean_squared_error(Y_test, LM_pred)
#R2
LM_r2 = metrics.r2_score(Y_test, LM_pred)
print(LM_MSE)
print(LM_r2)

# Find all

In [None]:
c_val = 10
svr_lin =   SVR(kernel='linear',   C=c_val)
svr_rbf =   SVR(kernel='rbf',      C=c_val, gamma=0.01)
svr_poly =  SVR(kernel='poly',     C=c_val, degree=2)

LM_pred      =  model_LM.fit(X_train, Y_train).predict(X_test)
svr_lin_pred =  svr_lin.fit(X_train,Y_train).predict(X_test)
svr_rbf_pred =  svr_rbf.fit(X_train,Y_train).predict(X_test)
svr_poly_pred = svr_poly.fit(X_train,Y_train).predict(X_test)
# X_test.reset_index(inplace = True)
# X_test.drop(columns = [X_test.columns[0]],inplace = True)
# print(len(LM_pred))
# print(X_test)

In [None]:
X_test.columns

In [None]:
plt.scatter(X_test["DEXJPUS"],LM_pred, c='magenta')
plt.title('X_test["DEXJPUS"] compare with Linear model')
plt.legend()
plt.show()

In [None]:
plt.scatter(X_test["VIXCLS"],LM_pred, c='coral')
plt.title('X_test["VIXCLS"] compare with Linear model')
plt.show()


In [None]:
plt.scatter(X_test["VIXCLS"],svr_lin_pred, c='coral')
plt.title('X_test["VIXCLS"] compare with SVR linear')

plt.show()

In [None]:
plt.scatter(X_test["VIXCLS"],svr_poly_pred, c='coral')
plt.title('X_test["VIXCLS"] compare with SVR poly')
plt.show()


In [None]:
plt.scatter(X_test["VIXCLS"],svr_rbf_pred, c='coral')
plt.title('X_test["VIXCLS"] compare with SVR rbf')
plt.show()

In [None]:
LM_MSE = metrics.mean_squared_error(Y_test, LM_pred)
LM_r2 = metrics.r2_score(Y_test, LM_pred)
# print (LM_MSE)
# print(LM_r2)

svr_lin_MSE = metrics.mean_squared_error(Y_test, svr_lin_pred)
svr_lin_r2 = metrics.r2_score(Y_test, svr_lin_pred)
# print (svr_lin_MSE)
# print(svr_lin_r2)

svr_rbf_MSE = metrics.mean_squared_error(Y_test, svr_rbf_pred)
svr_rbf_r2 = metrics.r2_score(Y_test, svr_rbf_pred)
# print (svr_rbf_MSE)
# print(svr_rbf_r2)

svr_poly_MSE = metrics.mean_squared_error(Y_test, svr_poly_pred)
svr_poly_r2 = metrics.r2_score(Y_test, svr_poly_pred)
# print (svr_poly_MSE)
# print(svr_poly_r2)

plt.bar(["LM_MSE","svr_lin_MSE","svr_rbf_MSE","svr_poly_MSE"],[LM_MSE,svr_lin_MSE,svr_rbf_MSE,svr_poly_MSE])
plt.show()

In [None]:
plt.bar(["LM_r2","svr_lin_r2","svr_rbf_r2","svr_poly_r2"],[LM_r2,svr_lin_r2,svr_rbf_r2,svr_poly_r2])
plt.show()