# 데이터 획득 및 설정

In [38]:
import sqlalchemy as db
import datetime
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
from sklearn.model_selection import train_test_split
import os
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Input
import tensorflow as tf
import random
from tensorflow.keras.layers import Dropout, BatchNormalization, Activation
from keras.callbacks import ModelCheckpoint

In [39]:
engine = create_engine('mysql://sbe03253:jin94099@database-2.clmg3ftdxi2a.ap-northeast-2.rds.amazonaws.com/MJTradierDB')
conn = engine.connect()

In [40]:
br = pd.read_sql_table('buyReports', conn)

In [41]:
# Filtering
br = br[( br['isAllBuyed'] == 1) & ( br['isAllSelled'] == 1) & (br['nBuyVolume'] > 0)]

In [42]:
feature_names =  [   
        'nBuyStrategyIdx',
        'nRqTime' , 
        'fStartGap' ,
        'fPowerWithOutGap' , 
        'fPower' , 
        'fPlusCnt07' , 
        'fMinusCnt07' , 
        'fPlusCnt09' , 
        'fMinusCnt09' ,
        'fPowerJar' , 
        'fOnlyDownPowerJar' , 
        'fOnlyUpPowerJar' , 
        'nTradeCnt' , 
        'nChegyulCnt' , 
        'nHogaCnt' , 
        'nNoMoveCnt' , 
        'nFewSpeedCnt' ,
        'nMissCnt' , 
        'lTotalTradeVolume' , 
        'lTotalBuyVolume' , 
        'lTotalSellVolume' ,
        'nAccumUpDownCount' ,
        'fAccumUpPower' , 
        'fAccumDownPower' ,
        'lTotalTradePrice' , 
        'lTotalBuyPrice' , 
        'lTotalSellPrice' , 
        'lMarketCap' , 
        'nAccumCountRanking' , 
        'nMarketCapRanking' , 
        'nPowerRanking' , 
        'nTotalBuyPriceRanking' , 
        'nTotalBuyVolumeRanking' ,
        'nTotalTradePriceRanking' ,
        'nTotalTradeVolumeRanking' ,
        'nTotalRank' , 
        'nMinuteTotalRank' , 
        'nMinuteTradePriceRanking' ,
        'nMinuteTradeVolumeRanking' , 
        'nMinuteBuyPriceRanking' , 
        'nMinuteBuyVolumeRanking' ,
        'nMinutePowerRanking' , 
        'nMinuteCountRanking' ,
        'nMinuteUpDownRanking' ,
        'nFakeBuyCnt' , 
        'nFakeAssistantCnt' ,
        'nFakeResistCnt' , 
        'nPriceUpCnt' , 
        'nPriceDownCnt' ,
        'nTotalFakeCnt' ,
        'nTotalFakeMinuteCnt' ,
        'nUpCandleCnt' , 
        'nDownCandleCnt' ,
        'nUpTailCnt' , 
        'nDownTailCnt' ,
        'nShootingCnt' ,
        'nCandleTwoOverRealCnt' ,
        'nCandleTwoOverRealNoLeafCnt' , 
        'fSpeedCur' , 
        'fHogaSpeedCur' ,
        'fTradeCur' , 
        'fPureTradeCur' , 
        'fPureBuyCur' , 
        'fHogaRatioCur' ,  
        'fSharePerHoga' , 
        'fSharePerTrade' ,
        'fHogaPerTrade' , 
        'fTradePerPure' , 
        'fMaDownFsVal' , 
        'fMa20mVal' , 
        'fMa1hVal' ,
        'fMa2hVal' ,
        'fMaxMaDownFsVal' ,
        'fMaxMa20mVal' ,
        'fMaxMa1hVal' ,
        'fMaxMa2hVal' ,
        'nMaxMaDownFsTime' ,
        'nMaxMa20mTime' ,
        'nMaxMa1hTime' ,
        'nMaxMa2hTime' ,
        'nDownCntMa20m' ,
        'nDownCntMa1h' ,
        'nDownCntMa2h' ,
        'nUpCntMa20m' ,
        'nUpCntMa1h' ,
        'nUpCntMa2h' ,
        'fMSlope' ,
        'fISlope' ,
        'fTSlope' ,
        'fHSlope' ,
        'fRSlope' ,
        'fDSlope' ,
        'fMAngle' ,
        'fIAngle' ,
        'fTAngle' ,
        'fHAngle' ,
        'fRAngle' ,
        'fDAngle' ,
        'nCrushCnt' ,
        'nCrushUpCnt' ,
        'nCrushDownCnt' ,
        'nCrushSpecialDownCnt' 
]
feature_size = len(feature_names)

In [43]:
# float32범위보다 높은 double값이 있어 inf로 계산됨 
# 오류발생 가능성이 있어 값의 상한선을 둠
BILLION = 1000000000
br.loc[ br['fSharePerHoga'] > BILLION, 'fSharePerHoga'] = BILLION 
br.loc[ br['fHogaPerTrade'] > BILLION, 'fHogaPerTrade'] = BILLION
br.loc[ br['fSharePerTrade'] > BILLION, 'fSharePerTrade'] = BILLION
br.loc[ br['fTradePerPure'] > BILLION, 'fTradePerPure'] = BILLION


X = br[
   feature_names
]

In [44]:
min_s = None
max_s = None
mean_s = None
std_s = None
zero_s = None
median_s = None
iqr3_s = None
iqr1_s = None

MINMAX = 'MinMax'
ROBUST = 'Robust'
STANDARD = 'Standard'

def setScaler(p_data):
    np_data = p_data.to_numpy(dtype=np.float32)

    row_num = np_data.shape[0]
    col_num = np_data.shape[1]
    
    # global 사용
    global min_s
    global max_s
    global mean_s
    global std_s
    global zero_s
    global median_s
    global iqr3_s
    global iqr1_s
    
    # MinMaxScaler
    min_s = np_data.min(axis=0)
    max_s = np_data.max(axis=0)
    
    # StandardScaler
    mean_s = np_data.mean(axis=0)
    std_s = np_data.std(axis=0)
    zero_s = np.zeros(col_num, dtype=np.float32)
    
    # RobustScaler
    median_s = np.median(np_data, axis=0)
    iqr3_s = np.quantile(np_data, q=0.75, axis=0)
    iqr1_s = np.quantile(np_data, q=0.25, axis=0)
    


In [45]:
# 테스트 상 스케일 방법 중 Normalizer는 좋지 않다.
setScaler(X)

In [46]:
def fitMyScaler(p_data, scale_method='MinMax'):
    np_data = p_data.to_numpy(dtype=np.float32)

    row_num = np_data.shape[0]
    col_num = np_data.shape[1]
    
    d0_s = None
    d1_s = None
    d2_s = None
    
    if scale_method == 'MinMax':
        d0_s = min_s
        d1_s = max_s
        d2_s = min_s
    elif scale_method == 'Standard':
        d0_s = mean_s
        d1_s = std_s
        d2_s = zero_s
    elif scale_method == 'Robust':
        d0_s = median_s
        d1_s = iqr3_s
        d2_s = iqr1_s
    else :
        print('해당하는 스케일함수가 없습니다.')
        return
    
    for i in range(col_num):
        
        d0 = d0_s[i]
        d1 = d1_s[i]
        d2 = d2_s[i]
        
        denom = d1 - d2
        if denom == 0:
            denom = max_s[i] - min_s[i]
            if denom == 0 or np.isinf(denom) or np.isnan(denom):
                denom = 1
                
        for j in range(row_num):
            np_data[j, i] = (np_data[j, i] - d0) / denom
            
            
    return np_data

In [47]:
def WriteScaleData(table, feature_names, scale_method, model_name, pandas_data ):
    try:
        today = datetime.datetime.today()
        scaleMethod = scale_method
        sModel = model_name
        
        np_data = pandas_data.to_numpy(dtype=np.float32)
        row_num = np_data.shape[0]
        col_num = np_data.shape[1]
        
        d0_s = None
        d1_s = None
        d2_s = None
    
        if scale_method == 'MinMax':
            d0_s = min_s
            d1_s = max_s
            d2_s = min_s
        elif scale_method == 'Standard':
            d0_s = mean_s
            d1_s = std_s
            d2_s = zero_s
        elif scale_method == 'Robust':
            d0_s = median_s
            d1_s = iqr3_s
            d2_s = iqr1_s
        else :
            print('해당하는 스케일함수가 없습니다.')
            return
        
        
        for idx, col in enumerate(feature_names):
            sVar = col
            
            d0 = d0_s[idx]
            d1 = d1_s[idx]
            d2 = d2_s[idx]
            
            if d2 - d1 == 0:
                d1 = max_s[idx]
                d2 = min_s[idx]
            
            query = db.insert(table).values( {'dTime': today, 'sScaleMethod':scaleMethod, 'sVariableName':sVar, 
                            'sModelName':sModel, 'fD0':d0, 'fD1':d1, 'fD2':d2, 'nSeq':idx})
            result_proxy = conn.execute(query)
            result_proxy.close()
        print('put scale to ', sModel, ' ends')
    except Exception as ex:
        print(ex)
        return;


In [48]:
scale_method = ROBUST
X = fitMyScaler(X, scale_method)

print(type(X))
print(X)

<class 'numpy.ndarray'>
[[-0.6805556   0.00704231 -0.21659644 ...  0.          0.
   0.        ]
 [-0.8472222   1.3594652  -0.67948395 ...  0.          0.
   0.        ]
 [-0.8333333   1.3624549  -0.67948395 ...  0.          0.
   0.        ]
 ...
 [ 0.5555556   1.0131629  -0.75302684 ...  2.          1.
   0.        ]
 [ 0.5694444   1.0131629  -0.75302684 ...  2.          1.
   0.        ]
 [ 0.5972222   1.0131629  -0.75302684 ...  2.          1.
   0.        ]]


In [49]:
crit = 0.01
target_col_name = 'fProfit'
br.loc[br[target_col_name] >= crit, 'target'] = 1
br.loc[br[target_col_name] < crit, 'target'] = 0
y = br['target']

y = y.to_numpy()

In [50]:
br['target'].unique()

array([0., 1.])

In [51]:
random_seed = int(1 / (random.random() + 0.00000001) * 100)
random_seed

274

In [52]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=random_seed)

In [53]:
print('X_train : ', X_train.shape)
print('y_train : ', y_train.shape)
print('X_test  : ', X_test.shape)
print('y_test  : ', y_test.shape)

X_train :  (52317, 102)
y_train :  (52317,)
X_test  :  (17439, 102)
y_test  :  (17439,)


In [54]:
nInputDim = feature_size
nOutputDim = 1

In [55]:
# Basic
main_input = Input(shape=(nInputDim), name='input')
x = Dense(1024, activation='relu')(main_input)
x = Dense(1024, activation='relu')(x)
x = Dense(512, activation='relu')(x)
x = Dense(512, activation='relu')(x)
x = Dense(512, activation='relu')(x)
x = Dense(512, activation='relu')(x)
x = Dense(256, activation='relu')(x)
x = Dense(256, activation='relu')(x)
x = Dense(256, activation='relu')(x)
x = Dense(128, activation='relu')(x)
x = Dense(256, activation='relu')(x)
x = Dense(128, activation='relu')(x)
x = Dense(256, activation='relu')(x)
main_output = Dense(nOutputDim, activation='sigmoid', name='output')(x)

In [20]:
# # Dropout
# # 테스트 상 BatchNormalization은 좋지 않다
# main_input = Input(shape=(nInputDim), name='input')
# x = Dense(256, activation='relu')(main_input)
# x = Dense(512, activation='relu')(x)
# x = Dropout(.1)(x)
# x = Dense(1024, activation='relu')(x)
# x = Dropout(.2)(x)
# x = Dense(1024, activation='relu')(x)
# x = Dropout(.2)(x)
# x = Dense(512, activation='relu')(x)
# x = Dropout(.1)(x)
# x = Dense(256, activation='relu')(x)
# x = Dense(256, activation='relu')(x)
# x = Dense(256, activation='relu')(x)
# x = Dense(512, activation='relu')(x)
# main_output = Dense(nOutputDim, activation='sigmoid', name='output')(x)

In [56]:
model = Model(inputs=main_input, outputs=main_output)
model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input (InputLayer)          [(None, 102)]             0         
                                                                 
 dense_9 (Dense)             (None, 1024)              105472    
                                                                 
 dense_10 (Dense)            (None, 1024)              1049600   
                                                                 
 dense_11 (Dense)            (None, 512)               524800    
                                                                 
 dense_12 (Dense)            (None, 512)               262656    
                                                                 
 dense_13 (Dense)            (None, 512)               262656    
                                                                 
 dense_14 (Dense)            (None, 512)               2626

In [None]:
EPOCH = 5
BATCH_SIZE = 150

call_back_dir = './checkpoint/'
call_back_filename = 'checkpoint-epoch-{}-batch-{}-trial-001.h5'.format(EPOCH, BATCH_SIZE)
file_name = call_back_dir + call_back_filename
checkpoint = ModelCheckpoint(filename,             # file명을 지정합니다
                             monitor='val_loss',   # val_loss 값이 개선되었을때 호출됩니다
                             verbose=1,            # 로그를 출력합니다
                             save_best_only=True,  # 가장 best 값만 저장합니다
                             mode='auto'           # auto는 알아서 best를 찾습니다. min/max
                            )

history = model.fit(X_train, y_train, 
      # validation_data=(x_valid, y_valid),
      epochs=EPOCH, 
      batch_size=BATCH_SIZE, 
      callbacks=[checkpoint], # checkpoint 콜백
     )

In [57]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

history = model.fit(X_train, y_train, epochs=EPOCH, batch_size=BATCH_SIZE)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [23]:
_, accuracy = model.evaluate(X_test, y_test)
print('accuracy : ', accuracy * 100)

accuracy :  84.59200859069824


In [24]:
y_pred = model.predict(X_test)



In [28]:
one = 0
zero = 0

ac = 0
fl = 0
d_ac = 0
d_fl = 0

suc_crit = 0.9
fl_crit = 0.5

for i in range(y_pred.shape[0]):
    if(y_test[i] == 1.0):
        one += 1
    elif(y_test[i] == 0.0):
        zero += 1
        
    if y_pred[i] > suc_crit:
        if(y_test[i] == 1.0):
            ac += 1
        else:
            fl += 1
            
    if y_pred[i] < fl_crit:
        if(y_test[i] == 0.0):
            d_ac += 1
        else:
            d_fl += 1
    
print('기준명 : ', target_col_name, ', 기준값 : ', crit)
print('총량 : ', one+zero)
print('0 : ', zero, ', 비율 : ', (zero / (1 if one+zero == 0 else one+zero)) * 100, '(%)')
print('1 : ', one, ', 비율 : ', (one / (1 if one+zero == 0 else one+zero)) * 100, '(%)', end='\n\n')

print('============ predict 0 =============')
print('총 횟수 : ', d_ac+ d_fl, ',  타겟기준 : ', fl_crit)
print('실제 0 : ', d_ac)
print('실제 1 : ', d_fl)
print('정답비율 : ', (d_ac / (1 if d_ac+d_fl == 0 else d_ac+d_fl)) * 100, '(%)', end='\n\n')
    
print('============ predict 1 =============')
print('총 횟수 : ', ac+ fl, ', 타겟기준 : ', suc_crit)
print('실제 1 : ', ac)
print('실제 0 : ', fl)
print('정답비율 : ', (ac / (1 if ac+fl == 0 else ac+fl)) * 100, '(%)', end='\n\n')

기준명 :  fProfit , 기준값 :  0.01
총량 :  17439
0 :  13492 , 비율 :  77.36682149205804 (%)
1 :  3947 , 비율 :  22.63317850794197 (%)

총 횟수 :  13891 ,  타겟기준 :  0.5
실제 0 :  12348
실제 1 :  1543
정답비율 :  88.8920884025628 (%)

총 횟수 :  2600 , 타겟기준 :  0.9
실제 1 :  1992
실제 0 :  608
정답비율 :  76.61538461538461 (%)



In [29]:
# onnx 생성 및 스케일 DB 삽입

model_name = ''
h5_path = './h5/'
onnx_path = './onnx/'
tmp_model_path = './model_tmp/'
save_model_name = model_name +'.h5'
output_onnx_file_name = model_name + '.onnx'

model.save(h5_path + save_model_name)

# h5 to pb
model_convert = tf.keras.models.load_model(h5_path + save_model_name, compile=False)
model_convert.save(tmp_model_path, save_format="tf")

# pb to onnx 
import os
os.system('python -m tf2onnx.convert --saved-model ' +  tmp_model_path + ' --output ' + onnx_path + output_onnx_file_name + ' --opset 13')

metadata = db.MetaData()
table = db.Table('scaleDatasDict', metadata, autoload=True, autoload_with=engine)

WriteScaleData(table=table, feature_names=feature_names, scale_method=scale_method,
 model_name=output_onnx_file_name, pandas_data=br[feature_names])

INFO:tensorflow:Assets written to: ./model_tmp/assets
put scale to  fProfit_10_Droupout2.onnx  ends


In [None]:
# 단일 테스트??

In [32]:
t_h5_path = './h5/'
t_model_name = ['fProfit_10_Robust_100_c', 'fProfit_10_Robust_c', 'fProfit_10_Dropout','fProfit_10_Droupout2']
t_save_model_name = [name + '.h5' for  name in t_model_name]

models = []
for i in t_save_model_name:
    model_tmp = tf.keras.models.load_model(t_h5_path + i, compile=False)
    models.append( model_tmp)

In [35]:
y_pred1 = models[0].predict(X_test)
y_pred2 = models[1].predict(X_test)
y_pred3 = models[2].predict(X_test)
y_pred4 = models[3].predict(X_test)



In [37]:
one = 0
zero = 0

ac = 0
fl = 0
d_ac = 0
d_fl = 0

suc_crit = 0.5
fl_crit = 0.5

len_y = y_test.shape[0]
y_pred = [y_pred1, y_pred4]

for i in range(len_y):
    if(y_test[i] == 1.0):
        one += 1
    elif(y_test[i] == 0.0):
        zero += 1
        
    # PREDICT 0
    pass_0 = False
    pass_0_check = 0 
    for pred in y_pred:
        if pred[i][0] < fl_crit :
            pass_0_check += 1
            
    if pass_0_check == len(y_pred):
        pass_0 = True
            
    if pass_0: 
        if(y_test[i] == 0.0):
            d_ac += 1
        else:
            d_fl += 1
    
    # PREDICT 1
    pass_1 = False
    pass_1_check = 0 
    for pred in y_pred:
        if pred[i][0] > suc_crit :
            pass_1_check += 1
            
    if pass_1_check == len(y_pred):
        pass_1 = True
            
    if pass_1: 
        if(y_test[i] == 1.0):
            ac += 1
        else:
            fl += 1

   
    
print('기준명 : ', target_col_name, ', 기준값 : ', crit)
print('총량 : ', one+zero)
print('0 : ', zero, ', 비율 : ', (zero / (1 if one+zero == 0 else one+zero)) * 100, '(%)')
print('1 : ', one, ', 비율 : ', (one / (1 if one+zero == 0 else one+zero)) * 100, '(%)', end='\n\n')

print('============ predict 0 =============')
print('총 횟수 : ', d_ac+ d_fl, ',  타겟기준 : ', fl_crit)
print('실제 0 : ', d_ac)
print('실제 1 : ', d_fl)
print('정답비율 : ', (d_ac / (1 if d_ac+d_fl == 0 else d_ac+d_fl)) * 100, '(%)', end='\n\n')
    
print('============ predict 1 =============')
print('총 횟수 : ', ac+ fl, ', 타겟기준 : ', suc_crit)
print('실제 1 : ', ac)
print('실제 0 : ', fl)
print('정답비율 : ', (ac / (1 if ac+fl == 0 else ac+fl)) * 100, '(%)', end='\n\n')

기준명 :  fProfit , 기준값 :  0.01
총량 :  17439
0 :  13492 , 비율 :  77.36682149205804 (%)
1 :  3947 , 비율 :  22.63317850794197 (%)

총 횟수 :  12780 ,  타겟기준 :  0.5
실제 0 :  12150
실제 1 :  630
정답비율 :  95.07042253521126 (%)

총 횟수 :  2304 , 타겟기준 :  0.5
실제 1 :  2170
실제 0 :  134
정답비율 :  94.18402777777779 (%)



In [57]:
li = [
    0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,
    0,0
]
p = pd.DataFrame(li).T
p

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,92,93,94,95,96,97,98,99,100,101
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
s = fitMyScaler(p, ROBUST)
s

In [None]:
model_s.predict(s)