In [726]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pandas_datareader as pdr
import seaborn as sns
import talib 
from sklearn.preprocessing import MinMaxScaler
from sklearn import tree
from sklearn.linear_model import LinearRegression
from sklearn.feature_selection import RFE
from mlxtend.feature_selection import SequentialFeatureSelector as SFS



In [727]:
startdate = '2016-01-1'
enddate= '2021-12-31'
scaler = MinMaxScaler()

In [728]:
dji = pdr.get_data_yahoo('^DJI', start=startdate, end=enddate)
dji_norm = pd.DataFrame(scaler.fit_transform(dji), columns=dji.columns)
n225 = pdr.get_data_yahoo("^N225", start=startdate, end=enddate)
n225_norm = pd.DataFrame(scaler.fit_transform(n225), columns=n225.columns)
hsi = pdr.get_data_yahoo("^HSI", start=startdate, end=enddate)
hsi_norm = pd.DataFrame(scaler.fit_transform(hsi), columns=hsi.columns)
sse = pdr.get_data_yahoo("000001.SS", start=startdate, end=enddate)
sse_norm = pd.DataFrame(scaler.fit_transform(sse), columns=sse.columns)


<h1>Dow Jones Industrial Average</h1>

<h2>Feature Selection</h2>

In [729]:
dji['Future Close'] = dji['Close'].shift(-1)
dji

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close,Future Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2015-12-31,17590.660156,17421.160156,17590.660156,17425.029297,93690000,17425.029297,17148.939453
2016-01-04,17405.480469,16957.630859,17405.480469,17148.939453,148060000,17148.939453,17158.660156
2016-01-05,17195.839844,17038.609375,17147.500000,17158.660156,105750000,17158.660156,16906.509766
2016-01-06,17154.830078,16817.619141,17154.830078,16906.509766,120250000,16906.509766,16514.099609
2016-01-07,16888.359375,16463.630859,16888.359375,16514.099609,176240000,16514.099609,16346.450195
...,...,...,...,...,...,...,...
2021-11-16,36316.609375,36076.179688,36076.179688,36142.218750,308290000,36142.218750,35931.050781
2021-11-17,36159.699219,35909.480469,36159.699219,35931.050781,358310000,35931.050781,35870.949219
2021-11-18,35952.628906,35654.390625,35901.691406,35870.949219,435080000,35870.949219,35601.980469
2021-11-19,35879.089844,35555.371094,35879.089844,35601.980469,408840000,35601.980469,35619.250000


In [730]:
def computeTarget(present_price, future_price):
    score = (( future_price - present_price ) / present_price ) * 100
    target = []
    for i in score:
        if i > 0:
            target.append(1)
        else:
            target.append(0)
    return target
# 2 = up, 1 = down, 0 = sideway

In [731]:

target = computeTarget(dji['Close'], dji['Future Close'])

dji['Target'] = target
dji = dji.dropna()

In [732]:
close = dji['Close']
high = dji['High']
low = dji['Low']
open = dji['Open']
volume  = dji['Volume']

In [733]:
dji_ti = pd.DataFrame()
dji_ti['SMA20'] = talib.SMA(close, timeperiod=20)
dji_ti['SMA50'] = talib.SMA(close, timeperiod=50)
dji_ti['SMA200'] = talib.SMA(close, timeperiod=200)
dji_ti['EMA'] = talib.EMA(close, timeperiod=14)
dji_ti['OBV'] = talib.OBV(close, volume)
dji_ti['RSI14'] = talib.RSI(close, timeperiod=14)
dji_ti['ADX'] = talib.ADX(high, low, close, timeperiod=14)
dji_ti['ADXR'] = talib.ADXR(high, low, close, timeperiod=14)
dji_ti['aroondown'], ti['aroonup'] = talib.AROON(high, low, timeperiod=14)
dji_ti['APO'] = talib.APO(close, fastperiod=12, slowperiod=26, matype=0)
dji_ti['AROONOSC'] = talib.AROONOSC(high, low, timeperiod=14)
dji_ti['BOP'] = talib.BOP(open, high, low, close)
dji_ti['CCI'] = talib.CCI(high, low, close, timeperiod=14)
dji_ti['CMO'] = talib.CMO(close, timeperiod=14)
dji_ti['DX'] = talib.DX(high, low, close, timeperiod=14)
dji_ti['macd'],ti['macdsignal'],ti['macdhist'] = talib.MACD(close, fastperiod = 12, slowperiod=26, signalperiod=9)
dji_ti['MFI'] = talib.MFI(high, low, close, volume, timeperiod=14)
dji_ti['MINUS_DI'] = talib.MINUS_DI(high, low, close, timeperiod=14)
dji_ti['MINUS_DM'] = talib.MINUS_DM(high, low, timeperiod=14)
dji_ti['MOM'] = talib.MOM(close, timeperiod=14)
dji_ti['PLUS_DI'] = talib.PLUS_DI(high, low, close, timeperiod=14)
dji_ti['PLUS_DM'] = talib.PLUS_DM(high, low, timeperiod=14)
dji_ti['PPO'] = talib.PPO(close, fastperiod=12, slowperiod=26, matype=0)
dji_ti['ROC'] = talib.ROC(close, timeperiod=10)
dji_ti['ROCP'] = talib.ROCP(close, timeperiod=10)
dji_ti['ROCR'] = talib.ROCR(close, timeperiod=10)
dji_ti['ROCR100'] = talib.ROCR100(close, timeperiod=10)
dji_ti['slowk'], ti['slowd'] = talib.STOCH(high, low, close, fastk_period=5, slowk_period=3, slowd_period=3, slowd_matype=0)
dji_ti['fastk'], ti['fastd'] = talib.STOCHF(high, low, close, fastk_period=5, fastd_period=3, fastd_matype=0)
dji_ti['TRIX'] = talib.TRIX(close, timeperiod=30)
dji_ti['ULTOSC'] = talib.ULTOSC(high, low, close, timeperiod1=7, timeperiod2=14, timeperiod3=28)
dji_ti['WILLR'] = talib.WILLR(high, low, close, timeperiod=14)
dji_ti['Target'] = dji['Target']
dji_ti

Unnamed: 0_level_0,SMA20,SMA50,SMA200,EMA,OBV,RSI14,ADX,ADXR,aroondown,APO,...,ROC,ROCP,ROCR,ROCR100,slowk,fastk,TRIX,ULTOSC,WILLR,Target
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-12-31,,,,,9.369000e+07,,,,,,...,,,,,,,,,,0
2016-01-04,,,,,-5.437000e+07,,,,,,...,,,,,,,,,,1
2016-01-05,,,,,5.138000e+07,,,,,,...,,,,,,,,,,0
2016-01-06,,,,,-6.887000e+07,,,,,,...,,,,,,,,,,0
2016-01-07,,,,,-2.451100e+08,,,,,,...,,,,,,,,,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-11-15,35920.142969,35120.292734,34133.517100,35983.146326,4.037648e+10,59.727010,23.895160,23.179295,7.142857,452.909655,...,0.483405,0.004834,1.004834,100.483405,21.227432,34.318504,0.048812,47.777185,-44.478834,1
2021-11-16,35954.388477,35141.137109,34160.790791,36004.355982,4.068477e+10,60.821001,23.921669,22.839573,0.000000,416.484150,...,0.248497,0.002485,1.002485,100.248497,38.460274,52.614944,0.051693,45.702226,-41.507722,0
2021-11-17,35970.474023,35159.136719,34186.828047,35994.581955,4.032646e+10,54.656278,23.134882,22.278214,0.000000,358.198743,...,-0.626500,-0.006265,0.993735,99.373500,30.743867,5.298153,0.054180,40.768597,-68.059884,0
2021-11-18,35983.867578,35178.968125,34210.903496,35978.097590,3.989138e+10,53.009423,21.616344,21.578782,0.000000,285.630283,...,-0.701140,-0.007011,0.992989,99.298860,30.205023,32.701973,0.056254,43.144530,-76.237339,0


In [734]:
# plt.figure(figsize=(12,10))
# cor = ti.corr()
# sns.heatmap(cor, annot=True, cmap=plt.cm.Reds)
# plt.show()

In [735]:
# cor_target = abs(cor["Target"])
# features = cor_target[cor_target>0.05]
# features

In [751]:
dji_ti = dji_ti.dropna()
X = dji_ti.drop("Target", axis=1)  # Feature Matrix
y = dji_ti["Target"]               # Target Variable
y

Date
2016-10-14    0
2016-10-17    1
2016-10-18    1
2016-10-19    0
2016-10-20    0
             ..
2021-11-15    1
2021-11-16    0
2021-11-17    0
2021-11-18    0
2021-11-19    1
Name: Target, Length: 1285, dtype: int64

In [752]:
X = X.fillna(0)

In [753]:
#X.isna().sum()

In [754]:
model = LinearRegression()

#Initializing RFE model
rfe = RFE(model, n_features_to_select=7)

#Transforming data using RFE
X_rfe = rfe.fit_transform(X,y)  

#Fitting the data to model
model.fit(X_rfe,y)

print(rfe.support_)
print(rfe.ranking_)

[False False False False False  True False False False False False  True
 False  True False False False False False False  True False False  True
 False False  True False False  True False False]
[16 15 24 22 26  1  9  8  7 13 12  1 11  1 17 14 25  3 19 23  1 18  4  1
 21 20  1  6 10  1  2  5]


In [755]:
# no. of features
#
nof_list = np.arange(1, 13)            
high_score = 0

# Variable to store the optimum features
#
nof = 0           
score_list = []

for n in range(len(nof_list)):
    X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.9, random_state = 0)

    model = LinearRegression()
    rfe = RFE(model, n_features_to_select=nof_list[n])
    X_train_rfe = rfe.fit_transform(X_train,y_train)
    X_test_rfe = rfe.transform(X_test)
    model.fit(X_train_rfe,y_train)

    score = model.score(X_test_rfe,y_test)
    score_list.append(score)

    if(score > high_score):
        high_score = score
        nof = nof_list[n]

print("Optimum number of features: %d" % nof)
print("Score with %d features: %f" % (nof, high_score))

Optimum number of features: 0
Score with 0 features: 0.000000


In [756]:
cols = list(X.columns)
model = LinearRegression()

# Initializing RFE model
rfe = RFE(model, n_features_to_select=10)             

# Transforming data using RFE
X_rfe = rfe.fit_transform(X,y)  

# Fitting the data to model
model.fit(X_rfe,y)              
temp = pd.Series(rfe.support_, index = cols)
selected_features_rfe = temp[temp==True].index

print(selected_features_rfe)

Index(['RSI14', 'BOP', 'CMO', 'MINUS_DI', 'PLUS_DI', 'PPO', 'ROC', 'ROCR100',
       'TRIX', 'ULTOSC'],
      dtype='object')


In [777]:
# dji_ti = dji_ti.dropna()
# X = dji_ti.drop("Target", axis=1)  # Feature Matrix
# y = dji_ti["Target"]               # Target Variable
# #Split the features and target data
# #Define Sequential Forward Selection (sfs)
# sfs = SFS(LinearRegression(),
#            k_features=5,
#            forward=True,
#            floating=False,
#            scoring = 'r2',
#            cv = 0)
# #Use SFS to select the top 5 features 
# sfs.fit(X, y)

# #Create a dataframe for the SFS results 
# df_SFS_results = pd.DataFrame(sfs.subsets_).transpose()
# df_SFS_results


<h2>จากการทำ Feature Selection ควรจะใช้ 'RSI14', 'BOP', 'CMO', 'MINUS_DI', 'PLUS_DI', 'PPO', 'ROC', 'ROCR100','TRIX', 'ULTOSC' ในการสร้าง model</h2>

In [769]:
from sklearn.model_selection import train_test_split
X = dji_ti.loc[:, ['RSI14', 'BOP', 'CMO', 'MINUS_DI', 'PLUS_DI', 'PPO', 'ROC', 'ROCR100','TRIX', 'ULTOSC']].values
#X = dji_ti.loc[:, ['CCI', 'DX', 'MINUS_DI', 'PLUS_DM', 'slowd']].values
y = dji_ti.loc[:, ['Target']].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [770]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [771]:
# Visualize the data
#
def visualize(title, xlabel, ylabel, classifier, X_test, y_test):
    X_set, y_set = X_test, y_test
    X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01),
                         np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01))
    plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape),
                 alpha = 0.75, cmap = ListedColormap(('lightblue', 'cornsilk')))
    plt.xlim(X1.min(), X1.max())
    plt.ylim(X2.min(), X2.max())

    for i, j in enumerate(np.unique(y_set)):
        plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], label = j)

    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.legend()
    plt.show()

<h2>Decision Tree</h2>

In [772]:
# Decision Tree Classifier: Create and Train the model
#
from sklearn.tree import DecisionTreeClassifier

dtc = DecisionTreeClassifier()
dtc.fit(X_train, y_train)

DecisionTreeClassifier()

In [773]:
# Prediction
#
y_pred = dtc.predict(X_test)  #Accuracy
y_pred

array([1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0,
       0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0,
       1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1,
       1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
       0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0,
       1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1,
       0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1,
       1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
       1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1,
       0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1,
       0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0])

In [774]:
# Look at the actual y_test
#
y_test

array([[0],
       [1],
       [0],
       [0],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [0],
       [1],
       [1],
       [0],
       [1],
       [0],
       [1],
       [0],
       [1],
       [0],
       [0],
       [1],
       [0],
       [1],
       [1],
       [0],
       [1],
       [0],
       [1],
       [1],
       [0],
       [1],
       [1],
       [1],
       [0],
       [0],
       [1],
       [0],
       [1],
       [0],
       [1],
       [0],
       [1],
       [0],
       [1],
       [0],
       [0],
       [1],
       [0],
       [0],
       [1],
       [1],
       [0],
       [1],
       [1],
       [1],
       [0],
       [0],
       [0],
       [0],
       [1],
       [0],
       [1],
       [0],
       [1],
       [1],
       [1],
       [1],
       [0],
       [1],
       [1],
       [1],
       [1],
       [0],
       [1],
       [1],
       [1],
       [1],
       [1],
       [0],
       [0],
       [1],
    

In [775]:
# See the accuracy
#
from sklearn import metrics
print('Accuracy Score:', metrics.accuracy_score(y_test, y_pred))

Accuracy Score: 0.5058365758754864


In [765]:
# See the confusion matrix
#
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
cm

array([[ 1,  4,  6],
       [ 8, 51, 52],
       [ 2, 55, 61]])

<h2>Random Forest</h2>

<h2>Logistic Regression</h2>

<h2>XGBoost</h2>

<h2>Linear Regression</h2>

<h2>Gauccian Process Regress</h2>

<h1>Nikkei 225 Stock Average</h1>

<h2>Feature Selection</h2>

In [787]:
n225['Future Close'] = n225['Close'].shift(-1)
n225

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close,Future Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2016-01-04,18951.119141,18394.429688,18818.580078,18450.980469,140200000.0,18450.980469,18374.000000
2016-01-05,18547.380859,18327.519531,18398.759766,18374.000000,132300000.0,18374.000000,18191.320312
2016-01-06,18469.380859,18064.300781,18410.570312,18191.320312,142200000.0,18191.320312,17767.339844
2016-01-07,18172.039062,17767.339844,18139.769531,17767.339844,168000000.0,17767.339844,17697.960938
2016-01-08,17975.310547,17509.640625,17562.230469,17697.960938,184300000.0,17697.960938,17218.960938
...,...,...,...,...,...,...,...
2021-11-16,29960.929688,29681.250000,29749.710938,29808.119141,62000000.0,29808.119141,29688.330078
2021-11-17,29909.970703,29623.789062,29906.679688,29688.330078,63900000.0,29688.330078,29598.660156
2021-11-18,29715.949219,29402.570312,29597.929688,29598.660156,65400000.0,29598.660156,29745.869141
2021-11-19,29768.539062,29589.189453,29641.050781,29745.869141,65600000.0,29745.869141,29774.109375


In [788]:
target = computeTarget(n225['Close'], n225['Future Close'])

n225['Target'] = target
n225 = n225.dropna()

In [789]:
close = n225['Close']
high = n225['High']
low = n225['Low']
open = n225['Open']
volume  = n225['Volume']

In [791]:
n225_ti = pd.DataFrame()
n225_ti['SMA20'] = talib.SMA(close, timeperiod=20)
n225_ti['SMA50'] = talib.SMA(close, timeperiod=50)
n225_ti['SMA200'] = talib.SMA(close, timeperiod=200)
n225_ti['EMA'] = talib.EMA(close, timeperiod=14)
n225_ti['OBV'] = talib.OBV(close, volume)
n225_ti['RSI14'] = talib.RSI(close, timeperiod=14)
n225_ti['ADX'] = talib.ADX(high, low, close, timeperiod=14)
n225_ti['ADXR'] = talib.ADXR(high, low, close, timeperiod=14)
n225_ti['aroondown'], ti['aroonup'] = talib.AROON(high, low, timeperiod=14)
n225_ti['APO'] = talib.APO(close, fastperiod=12, slowperiod=26, matype=0)
n225_ti['AROONOSC'] = talib.AROONOSC(high, low, timeperiod=14)
n225_ti['BOP'] = talib.BOP(open, high, low, close)
n225_ti['CCI'] = talib.CCI(high, low, close, timeperiod=14)
n225_ti['CMO'] = talib.CMO(close, timeperiod=14)
n225_ti['DX'] = talib.DX(high, low, close, timeperiod=14)
n225_ti['macd'],ti['macdsignal'],ti['macdhist'] = talib.MACD(close, fastperiod = 12, slowperiod=26, signalperiod=9)
n225_ti['MFI'] = talib.MFI(high, low, close, volume, timeperiod=14)
n225_ti['MINUS_DI'] = talib.MINUS_DI(high, low, close, timeperiod=14)
n225_ti['MINUS_DM'] = talib.MINUS_DM(high, low, timeperiod=14)
n225_ti['MOM'] = talib.MOM(close, timeperiod=14)
n225_ti['PLUS_DI'] = talib.PLUS_DI(high, low, close, timeperiod=14)
n225_ti['PLUS_DM'] = talib.PLUS_DM(high, low, timeperiod=14)
n225_ti['PPO'] = talib.PPO(close, fastperiod=12, slowperiod=26, matype=0)
n225_ti['ROC'] = talib.ROC(close, timeperiod=10)
n225_ti['ROCP'] = talib.ROCP(close, timeperiod=10)
n225_ti['ROCR'] = talib.ROCR(close, timeperiod=10)
n225_ti['ROCR100'] = talib.ROCR100(close, timeperiod=10)
n225_ti['slowk'], ti['slowd'] = talib.STOCH(high, low, close, fastk_period=5, slowk_period=3, slowd_period=3, slowd_matype=0)
n225_ti['fastk'], ti['fastd'] = talib.STOCHF(high, low, close, fastk_period=5, fastd_period=3, fastd_matype=0)
n225_ti['TRIX'] = talib.TRIX(close, timeperiod=30)
n225_ti['ULTOSC'] = talib.ULTOSC(high, low, close, timeperiod1=7, timeperiod2=14, timeperiod3=28)
n225_ti['WILLR'] = talib.WILLR(high, low, close, timeperiod=14)
n225_ti['Target'] = n225['Target']
n225_ti

Unnamed: 0_level_0,SMA20,SMA50,SMA200,EMA,OBV,RSI14,ADX,ADXR,aroondown,APO,...,ROC,ROCP,ROCR,ROCR100,slowk,fastk,TRIX,ULTOSC,WILLR,Target
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-01-04,,,,,1.402000e+08,,,,,,...,,,,,,,,,,0
2016-01-05,,,,,7.900000e+06,,,,,,...,,,,,,,,,,0
2016-01-06,,,,,-1.343000e+08,,,,,,...,,,,,,,,,,0
2016-01-07,,,,,-3.023000e+08,,,,,,...,,,,,,,,,,0
2016-01-08,,,,,-4.866000e+08,,,,,,...,,,,,,,,,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-11-15,29233.262109,29297.366875,28901.341738,29352.537538,5.710900e+09,58.851897,11.824174,15.413168,0.000000,357.955729,...,3.059983,0.030600,1.030600,103.059983,66.522879,89.647116,0.034467,49.140580,-7.398881,1
2021-11-16,29272.395020,29322.659062,28907.225088,29413.281752,5.772900e+09,59.249153,11.990726,14.860945,21.428571,372.630747,...,0.543187,0.005432,1.005432,100.543187,84.425284,83.405483,0.035941,49.417832,-10.284253,0
2021-11-17,29296.035547,29333.863477,28911.555293,29449.954862,5.709000e+09,56.983099,11.979636,14.369150,14.285714,393.159868,...,0.567156,0.005672,1.005672,100.567156,81.149848,70.396946,0.037510,49.491904,-18.346138,0
2021-11-18,29313.191016,29332.638867,28916.817695,29469.782235,5.643600e+09,55.278742,11.348443,13.825177,7.142857,336.507537,...,-0.656866,-0.006569,0.993431,99.343134,63.762055,37.483737,0.039044,56.947511,-24.380985,1


In [793]:
n225_ti = n225_ti.dropna()
X = n225_ti.drop("Target", axis=1)  # Feature Matrix
y = n225_ti["Target"]               # Target Variable

In [794]:
model = LinearRegression()

#Initializing RFE model
rfe = RFE(model, n_features_to_select=7)

#Transforming data using RFE
X_rfe = rfe.fit_transform(X,y)  

#Fitting the data to model
model.fit(X_rfe,y)

print(rfe.support_)
print(rfe.ranking_)

[False False False False False False False  True False False False  True
 False  True False False False  True False False False False False  True
 False False False False False  True  True False]
[17 19 25 16 26  7  2  1 11 12 14  1 13  1  8 15 18  1 24 22  5 23  3  1
 20 21  6 10  9  1  1  4]


In [795]:
# no. of features
#
nof_list = np.arange(1, 13)            
high_score = 0

# Variable to store the optimum features
#
nof = 0           
score_list = []

for n in range(len(nof_list)):
    X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.9, random_state = 0)

    model = LinearRegression()
    rfe = RFE(model, n_features_to_select=nof_list[n])
    X_train_rfe = rfe.fit_transform(X_train,y_train)
    X_test_rfe = rfe.transform(X_test)
    model.fit(X_train_rfe,y_train)

    score = model.score(X_test_rfe,y_test)
    score_list.append(score)

    if(score > high_score):
        high_score = score
        nof = nof_list[n]

print("Optimum number of features: %d" % nof)
print("Score with %d features: %f" % (nof, high_score))

Optimum number of features: 0
Score with 0 features: 0.000000


In [796]:
cols = list(X.columns)
model = LinearRegression()

# Initializing RFE model
rfe = RFE(model, n_features_to_select=10)             

# Transforming data using RFE
X_rfe = rfe.fit_transform(X,y)  

# Fitting the data to model
model.fit(X_rfe,y)              
temp = pd.Series(rfe.support_, index = cols)
selected_features_rfe = temp[temp==True].index

print(selected_features_rfe)

Index(['ADX', 'ADXR', 'BOP', 'CMO', 'MINUS_DI', 'PPO', 'ROC', 'TRIX', 'ULTOSC',
       'WILLR'],
      dtype='object')


<h2>จากการทำ Feature Selection ควรจะใช้ 'ADX', 'ADXR', 'BOP', 'CMO', 'MINUS_DI', 'PPO', 'ROC', 'TRIX', 'ULTOSC',
       'WILLR' ในการสร้าง model</h2>

<h2>Decision Tree</h2>

<h2>Random Forest</h2>

<h2>Logistic Regression</h2>

<h2>XGBoost</h2>

<h2>Linear Regress</h2>

<h2>Cauccian Process Regression</h2>

<h1>Heng Seng Index</h1>

<h2>Feature Selection</h2>

<h2>Decision Tree</h2>

<h2>Random Forest</h2>

<h2>Logistic Regression</h2>

<h2>XGBoost</h2>

<h2>Linear Regression</h2>

<h2>Gauccian Process Regression</h2>

<h1>SSE Composite Index</h1>

<h2>Feature Selection</h2>

<h2>Decision Tree</h2>

<h2>Random Forest</h2>

<h2>Logistic Regression</h2>

<h2>XGBoost</h2>

<h2>Linear Regression</h2>

<h2>Cauccian Process Regression</h2>