## 訓練資料集&CNN Model

In [35]:
import numpy as np
import pandas as pd
from keras.utils import np_utils 
from tqdm import tqdm
import os
from random import shuffle
from keras.models import Model
from matplotlib import pyplot
from numpy import expand_dims
import matplotlib.pyplot as plt 
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from keras import optimizers
from keras.regularizers import l2
from keras.layers import BatchNormalization
from keras import regularizers

def load_cnn_train_data():
    os.environ["CUDA_VISIBLE_DEVICES"]="0"

    #10大權值股
    stock_codes = [2330,2317,2412,6505,1301,1303,2454,2882,2881,3008]
    numbers = len(stock_codes)*570

    x_train = []
    y_train = []
    for i in tqdm(stock_codes):
        stock_code = str(i)

        for i in range(570):
            df = pd.read_csv('data/x/'+stock_code+'_'+str(i)+'_x.csv')
            df = df.drop(['date'], axis = 1)

            y = df['y'].T.to_numpy().tolist()
            y_train.append(y[0])

            df = df.drop(['y'], axis = 1)
            df = df.drop(['floating'], axis = 1)

            x = df.T.to_numpy().tolist()
            x_train.append(x)

    x_train = np.array(x_train)
    x_train = x_train.reshape(numbers,20,30,1).astype('float32') 
    x_train = (x_train+1)/2

    y_train = np.array(y_train).astype('float32') 
    y_train = (y_train+1)
    y_train_onehot = np_utils.to_categorical(y_train,3)

    return x_train, y_train_onehot

def cnn_model():
    model = Sequential()
    model.add(Conv2D(filters = 128 ,kernel_size = (20,3), padding = 'same', input_shape = (20, 30, 1), activation = 'relu'))
    #model.add(Conv2D(filters = 128,kernel_size = (20,3), padding = 'same', activation = 'relu'))
    model.add(Dropout(0.5)) 
    model.add(Flatten())                                    
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5)) 
    model.add(Dense(3,activation='softmax'))
    model.summary()
    return model

In [36]:
x_train, y_train_onehot = load_cnn_train_data()
model = cnn_model()

100%|██████████| 10/10 [00:22<00:00,  2.26s/it]


Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_5 (Conv2D)            (None, 20, 30, 128)       7808      
_________________________________________________________________
dropout_5 (Dropout)          (None, 20, 30, 128)       0         
_________________________________________________________________
flatten_3 (Flatten)          (None, 76800)             0         
_________________________________________________________________
dense_5 (Dense)              (None, 128)               9830528   
_________________________________________________________________
dropout_6 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_6 (Dense)              (None, 3)                 387       
Total params: 9,838,723
Trainable params: 9,838,723
Non-trainable params: 0
____________________________________________

In [38]:
from keras.callbacks import EarlyStopping

adam = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08) 
early_stop = EarlyStopping(monitor='val_loss', patience=10, verbose=2)

model.compile(loss='categorical_crossentropy',optimizer= adam,metrics=['accuracy']) 
train_history=model.fit(x = x_train, y = y_train_onehot, validation_split = 0.2, epochs = 100 ,batch_size = 128, verbose = 2, callbacks=[early_stop])

Train on 4560 samples, validate on 1140 samples
Epoch 1/100
 - 7s - loss: 1.0708 - accuracy: 0.4868 - val_loss: 0.7086 - val_accuracy: 0.6877
Epoch 2/100
 - 7s - loss: 0.6980 - accuracy: 0.6627 - val_loss: 0.6659 - val_accuracy: 0.7000
Epoch 3/100
 - 7s - loss: 0.6286 - accuracy: 0.7134 - val_loss: 0.6007 - val_accuracy: 0.7254
Epoch 4/100
 - 7s - loss: 0.5909 - accuracy: 0.7355 - val_loss: 0.5973 - val_accuracy: 0.7272
Epoch 5/100
 - 7s - loss: 0.5801 - accuracy: 0.7353 - val_loss: 0.5655 - val_accuracy: 0.7439
Epoch 6/100
 - 7s - loss: 0.5542 - accuracy: 0.7454 - val_loss: 0.5897 - val_accuracy: 0.7395
Epoch 7/100
 - 7s - loss: 0.5552 - accuracy: 0.7489 - val_loss: 0.5934 - val_accuracy: 0.7421
Epoch 8/100
 - 7s - loss: 0.5253 - accuracy: 0.7763 - val_loss: 0.5852 - val_accuracy: 0.7430
Epoch 9/100
 - 7s - loss: 0.5231 - accuracy: 0.7684 - val_loss: 0.5911 - val_accuracy: 0.7404
Epoch 10/100
 - 7s - loss: 0.5155 - accuracy: 0.7689 - val_loss: 0.5785 - val_accuracy: 0.7465
Epoch 11/10

## 測試資料集
stock_code參數調整預測試的股票代碼

In [50]:
stock_codes = [2330]
numbers = len(stock_codes)*570

x_test = []
y_test = []
for i in tqdm(stock_codes):
    stock_code = str(i)
    
    for i in range(570):
        df = pd.read_csv('data/x/'+stock_code+'_'+str(i)+'_x.csv')
        df = df.drop(['date'], axis = 1)
        
        y = df['y'].T.to_numpy().tolist()
        y_test.append(y[0])
        
        df = df.drop(['y'], axis = 1)
        df = df.drop(['floating'], axis = 1)
        
        x = df.T.to_numpy().tolist()
        x_test.append(x)
    
x_test = np.array(x_test)
x_test = x_test.reshape(numbers,20,30,1).astype('float32') 
x_test = (x_test+1)/2

y_test = np.array(y_test).astype('float32') 
y_test = (y_test+1)
y_test_onehot = np_utils.to_categorical(y_test,3)

100%|██████████| 1/1 [00:02<00:00,  2.25s/it]


## 評估

In [55]:
from keras.models import load_model

scores = model.evaluate(x_test, y_test_onehot)   
print('acc :', scores[1])
prediction = model.predict_classes(x_test) .astype('float32')
pd.crosstab(y_test, prediction, rownames=['label'],colnames=['predict'])

acc : 0.840350866317749


predict,0.0,1.0,2.0
label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0.0,86,15,0
1.0,7,187,11
2.0,0,58,206


In [75]:
class Directional_Symmetry:
    def __init__(self, prediction, real):
        self.prediction = prediction-1
        self.real = real - 1
        self.my_dc()
        self.dc()

    def my_dc(self):
        total = 0
        for i in range(len(self.real)):
            if self.real[i] * self.prediction[i] == 0:
                if self.real[i] == 0 and self.prediction[i] == 0:
                    tmp = 1
                else:
                    tmp = 0
            else:
                tmp = 1
            total += tmp
        result = total/(len(self.real))*100
        self.my_dc_results = result

    def dc(self):
        total = 0
        for i in range(len(self.real)):
            if self.prediction[i] * self.real[i] > 0:
                tmp = 1
            else:
                tmp = 0
            total += tmp
        result = total/(len(self.real))*100
        self.dc_results = result

ds = Directional_Symmetry(prediction,y_test)
print('directional symmetry(包含不漲跌類別):', ds.my_dc_results,'%')
print('directional symmetry(不漲跌類別皆為錯):', ds.dc_results,'%')


directional symmetry(包含不漲跌類別): 84.03508771929825 %
directional symmetry(不漲跌類別皆為錯): 51.2280701754386 %
