# 라이브러리 및 함수 불러오기

In [1]:
import warnings
warnings.filterwarnings('ignore')
import os
import time
import pandas as pd
import numpy as np
import datetime
import pickle
import pymysql
from sqlalchemy import create_engine
from tqdm import tqdm
import random


# model
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier 
from lightgbm import LGBMClassifier
from sklearn.neural_network import MLPClassifier
import sklearn.metrics as metrics
from sklearn.metrics import accuracy_score, roc_auc_score

# scaling
from sklearn.preprocessing import MinMaxScaler # 정규화

In [3]:
#con = pymysql.connect()
#cursor = con.cursor()

In [4]:
def load_file(TYPE,trading, days,target):
    with open('../data/{}_{}_{}_{}.pickle'.format(TYPE,trading, days,target), 'rb') as f:
        data = pickle.load(f)
    return data

In [5]:
def set_random_seed(seed=42):
  np.random.seed(seed)
  random.seed(seed)

In [6]:
set_random_seed()

In [7]:
def order_request(trading_val, days,label,predY,lst_code2date,TYPE):
    lst_output = []
    
    for (code,date),y in zip(lst_code2date, predY):
        if y[1] >= 0.9:
            lst_output.append([code, date, 'buy','all'])
            lst_output.append([code, date+"n", 'sell', 'all'])
        elif y[1] >= 0.7:
            lst_output.append([code, date, 'buy','r90'])
            lst_output.append([code, date+"n", 'sell', 'all'])
        elif y[1] >= 0.5:
            lst_output.append([code, date, 'buy', 'r50'])
            lst_output.append([code, date+"n", 'sell', 'all'])
        elif y[1] >= 0.3:
            lst_output.append([code, date, 'buy', 'r30'])
            lst_output.append([code, date+"n", 'sell', 'all'])
        
    lst_output.sort(key = lambda x: x[1]) # date 기준으로 주문 요청 결과 정렬

    OF = open('../data/order_request_{}_{}_{}_{}.txt'.format(trading_val, days,label,TYPE),'w')
    for row in lst_output:
        OF.write('\t'.join(map(str, row)) + '\n')
    OF.close()

In [8]:
def make_rate(trading_val, days,label,con):
    start_money = 10000000 # 초기 현금 1천만원
    money = start_money
    dic_code2num ={}  # 보유 종목

    IF = open('../data/order_request_{}_{}_{}_val.txt'.format(trading_val, days,label))
    for i, line in tqdm(enumerate(IF)): #주문 일지를 한 줄 읽어 옴
        code, date, request, amount = line.strip().split("\t")

        ##############################################################################################
        sql_query = '''
                    SELECT *
                    FROM stock_{}
                    WHERE Date
                    BETWEEN '2021-01-01' AND '2021-07-01'
                    '''.format(code)
        stock = pd.read_sql(sql = sql_query, con = con)
        lst_stock = stock.values.tolist()

        for idx, row in enumerate(lst_stock):
            Date = row[0].strftime('%Y%m%d')        
            if date.endswith('n'):
                if date.rstrip('n') == Date:
                    sell_close = lst_stock[idx+1][4]
            elif date == Date:
                buy_close = lst_stock[idx][4]

        ##############################################################################################

        if request == 'buy': 
            if amount.startswith('r'):
                request_money = money * float(amount.lstrip("r")) / 100
            elif amount == 'all':
                request_money = money
            elif amount.isdigit():
                request_money = int(amount)
        
            else:
                raise Exception('Not permitted option')
            request_money = min(request_money, money)
            buy_num = int(request_money / buy_close)
            money -= buy_num * buy_close  # 현재 금액(money)을 실제 매수액을 뺀 만큼 업데이트
            if code not in dic_code2num:
                dic_code2num[code] = 0
            dic_code2num[code] += buy_num # 보유 종목 데이터에 구매 종목(code)를 매수 개수 만큼 증가
        if request == 'sell': # sell인 경우
            if amount == 'all':
                sell_num = dic_code2num[code]
   
            else:
                raise Exception('Not permitted option')            
            money += sell_num * sell_close
            dic_code2num[code] -= sell_num
            if dic_code2num[code] == 0:
                del dic_code2num[code]
    IF.close()            

    if dic_code2num != {}: # 매매가 종료되었는데 보유 종목이 있으면
        raise Exception('Not empty stock') 

    print("Final earning rate : {} %".format(str((money-start_money) / start_money * 100)))

In [9]:
def make_rate_test(trading_val, days,label,con):
    start_money = 10000000 # 초기 현금 1천만원
    money = start_money
    dic_code2num ={}  # 보유 종목

    IF = open('../data/order_request_{}_{}_{}_test.txt'.format(trading_val, days,label))
    for i, line in tqdm(enumerate(IF)): #주문 일지를 한 줄 읽어 옴
        code, date, request, amount = line.strip().split("\t")

        ##############################################################################################
        sql_query = '''
                    SELECT *
                    FROM stock_{}
                    WHERE Date
                    BETWEEN '2021-07-01' AND '2021-12-31'
                    '''.format(code)
        stock = pd.read_sql(sql = sql_query, con = con)
        lst_stock = stock.values.tolist()

        for idx, row in enumerate(lst_stock):
            Date = row[0].strftime('%Y%m%d')        
            if date.endswith('n'):
                if date.rstrip('n') == Date:
                    sell_close = lst_stock[idx+1][4]
            elif date == Date:
                buy_close = lst_stock[idx][4]

        ##############################################################################################

        if request == 'buy': 
            if amount.startswith('r'):
                request_money = money * float(amount.lstrip("r")) / 100
            elif amount == 'all':
                request_money = money
            elif amount.isdigit():
                request_money = int(amount)
        
            else:
                raise Exception('Not permitted option')
            request_money = min(request_money, money)
            buy_num = int(request_money / buy_close)
            money -= buy_num * buy_close  # 현재 금액(money)을 실제 매수액을 뺀 만큼 업데이트
            if code not in dic_code2num:
                dic_code2num[code] = 0
            dic_code2num[code] += buy_num # 보유 종목 데이터에 구매 종목(code)를 매수 개수 만큼 증가
        if request == 'sell': # sell인 경우
            if amount == 'all':
                sell_num = dic_code2num[code]
   
            else:
                raise Exception('Not permitted option')            
            money += sell_num * sell_close
            dic_code2num[code] -= sell_num
            if dic_code2num[code] == 0:
                del dic_code2num[code]
    IF.close()            

    if dic_code2num != {}: # 매매가 종료되었는데 보유 종목이 있으면
        raise Exception('Not empty stock') 

    print("Final earning rate : {} %".format(str((money-start_money) / start_money * 100)))

# Base-line
1. 모델 : LR
2. 거래대금 : 1조 <br>
3. 거래일 : 10일
4. 오늘 대비 다음 날 종가 상승률 임계값 : 0.02 


In [30]:
train = load_file('train',10000,10,0.02)
val = load_file('val',10000,10,0.02)
test = load_file('test',10000,10,0.02)

In [31]:
lst_code2date = []
trainX = []
trainY = []

for line in train:
    code, date, x, y = line
    lst_code2date.append([code,date])
    trainX.append(list(map(int, x.split(','))))
    trainY.append(int(y))
    
trainX = np.array(trainX)
trainY = np.array(trainY)

In [32]:
val_code2date = []
valX = []
valY = []

for line in val:
    code, date, x, y = line
    val_code2date.append([code,date])
    valX.append(list(map(int, x.split(','))))
    valY.append(int(y))
valX = np.array(valX)
valY = np.array(valY)

In [33]:
trainX.shape , valX.shape

((350, 50), (187, 50))

# 모델 성능

## LR

In [51]:
LR = LogisticRegression()
LR.fit(trainX, trainY)
predY = LR.predict_proba(valX) 
predY2 = LR.predict(valX) 

print('Accuracy : ', round(accuracy_score(valY,predY2),4))
print('ROC : ', round(roc_auc_score(valY,predY2),4))

Accuracy :  0.8663
ROC :  0.8234


In [52]:
for y in predY:
    if y[1] > 0.7:
        print(y)

[0.29387045 0.70612955]
[0.28675539 0.71324461]
[0.2048895 0.7951105]


In [36]:
order_request(10000, 10,0.02,predY,val_code2date,'val')

In [38]:
make_rate(10000,10,0.02,con)

224it [00:02, 102.92it/s]

Final earning rate : -54.36991 %





## GB

In [53]:
GB = GradientBoostingClassifier()
GB.fit(trainX, trainY)
predY = GB.predict_proba(valX) 
predY2 = GB.predict(valX) 

print('Accuracy : ', round(accuracy_score(valY,predY2),4))
print('ROC : ', round(roc_auc_score(valY,predY2),4))

Accuracy :  0.4866
ROC :  0.576


In [40]:
SUM = 0 

for y in predY:
    if y[1] > 0.7:
        SUM += 1
print(SUM)

72


In [41]:
order_request(10000, 10,0.02,predY,val_code2date,'val')

In [43]:
make_rate(10000,10,0.02,con)

350it [00:03, 102.42it/s]

Final earning rate : -63.91111000000001 %





## GN

In [54]:
GN = GaussianNB()
GN.fit(trainX, trainY)
predY = GN.predict_proba(valX) 
predY2 = GN.predict(valX) 

print('Accuracy : ', round(accuracy_score(valY,predY2),4))
print('ROC : ', round(roc_auc_score(valY,predY2),4))

Accuracy :  0.5561
ROC :  0.6326


In [55]:
SUM = 0 

for y in predY:
    if y[1] > 0.7:
        SUM += 1
print(SUM)

95


In [56]:
order_request(10000, 10,0.02,predY,val_code2date,'val')

In [58]:
make_rate(10000,10,0.02,con)

288it [00:03, 90.90it/s]

Final earning rate : -71.84048 %





## RF

In [41]:
RF = RandomForestClassifier(random_state = 42)
RF.fit(trainX, trainY)
predY = RF.predict_proba(valX) 
predY2 = RF.predict(valX) 

print('Accuracy : ', round(accuracy_score(valY,predY2),4))
print('ROC : ', round(roc_auc_score(valY,predY2),4))

Accuracy :  0.3316
ROC :  0.4581


In [42]:
SUM = 0 

for y in predY:
    if y[1] > 0.7:
        SUM += 1
print(SUM)

33


In [43]:
order_request(10000, 10,0.02,predY,val_code2date,'val')

In [59]:
make_rate(10000,10,0.02,con)

288it [00:03, 92.51it/s]

Final earning rate : -71.84048 %





## MLP

In [60]:
MLP = MLPClassifier(random_state = 42)
MLP.fit(trainX, trainY)
predY = MLP.predict_proba(valX) 
predY2 = MLP.predict(valX) 

print('Accuracy : ', round(accuracy_score(valY,predY2),4))
print('ROC : ', round(roc_auc_score(valY,predY2),4))

Accuracy :  0.7647
ROC :  0.705


In [61]:
SUM = 0 

for y in predY:
    if y[1] > 0.7:
        SUM += 1
print(SUM)

46


In [62]:
order_request(10000, 10,0.02,predY,val_code2date,'val')

In [63]:
make_rate(10000,10,0.02,con)

92it [00:00, 92.61it/s]

Final earning rate : -59.99539 %





## LGB

In [77]:
lgb = LGBMClassifier()
lgb.fit(trainX, trainY)
predY = lgb.predict_proba(valX) 
predY2 = lgb.predict(valX) 

print('Accuracy : ', round(accuracy_score(valY,predY2),4))
print('ROC : ', round(roc_auc_score(valY,predY2),4))

Accuracy :  0.738
ROC :  0.7446


In [79]:
SUM = 0 

for y in predY:
    if y[1] > 0.7:
        SUM += 1
print(SUM)


58


In [80]:
order_request(10000, 10,0.02,predY,val_code2date,'val')

In [81]:
make_rate(10000,10,0.02,con)

250it [00:02, 91.16it/s]

Final earning rate : -75.0336 %





## XGB

In [82]:
xgb = XGBClassifier()
xgb.fit(trainX, trainY)
predY = xgb.predict_proba(valX) 
predY2 = xgb.predict(valX) 

print('Accuracy : ', round(accuracy_score(valY,predY2),4))
print('ROC : ', round(roc_auc_score(valY,predY2),4))


Accuracy :  0.754
ROC :  0.7456


In [83]:
SUM = 0 

for y in predY:
    if y[1] > 0.7:
        SUM += 1
print(SUM)

55


In [84]:
order_request(10000, 10,0.02,predY,val_code2date,'val')

In [85]:
make_rate(10000,10,0.02,con)

198it [00:02, 88.87it/s]

Final earning rate : -70.76289 %





# 거래대금
## 1조
- 최종 모델 XGB

XGB

In [76]:
order_request(10000, 10,0.02, predY,val_code2date,'val')

In [86]:
make_rate(10000,10,0.02,con)

198it [00:02, 89.25it/s]

Final earning rate : -70.76289 %





## 1000억

In [88]:
train = load_file('train',1000,10,0.02)
val = load_file('val',1000,10,0.02)

In [89]:
lst_code2date = []
trainX = []
trainY = []

for line in train:
    code, date, x, y = line
    lst_code2date.append([code,date])
    trainX.append(list(map(int, x.split(','))))
    trainY.append(int(y))
    
trainX = np.array(trainX)
trainY = np.array(trainY)

In [90]:
val_code2date = []
valX = []
valY = []

for line in val:
    code, date, x, y = line
    val_code2date.append([code,date])
    valX.append(list(map(int, x.split(','))))
    valY.append(int(y))
valX = np.array(valX)
valY = np.array(valY)

In [91]:
xgb = XGBClassifier(random_state = 42)
xgb.fit(trainX, trainY)
predY = xgb.predict_proba(valX) 
predY2 = xgb.predict(valX) 

print('Accuracy : ', round(accuracy_score(valY,predY2),2))
print('ROC : ',  round(roc_auc_score(valY,predY2),2))


Accuracy :  0.89
ROC :  0.89


In [92]:
order_request(1000,10,0.02,predY,val_code2date, 'val')

In [96]:
make_rate(1000,10,0.02,con)

5180it [00:57, 89.82it/s]

Final earning rate : -36.29476 %





## 100억

In [97]:
train = load_file('train',100,10,0.02)
val = load_file('val',100,10,0.02)

In [98]:
train.shape,val.shape,test.shape

((133796, 4), (35297, 4), (82, 4))

In [99]:
lst_code2date = []
trainX = []
trainY = []

for line in train:
    code, date, x, y = line
    lst_code2date.append([code,date])
    trainX.append(list(map(int, x.split(','))))
    trainY.append(int(y))
    
trainX = np.array(trainX)
trainY = np.array(trainY)

In [100]:
pd.Series(trainY).value_counts()

0    87820
1    45976
dtype: int64

In [101]:
val_code2date = []
valX = []
valY = []

for line in val:
    code, date, x, y = line
    val_code2date.append([code,date])
    valX.append(list(map(int, x.split(','))))
    valY.append(int(y))
valX = np.array(valX)
valY = np.array(valY)

In [102]:
xgb = XGBClassifier()
xgb.fit(trainX, trainY)
predY = xgb.predict_proba(valX) 
predY2 = xgb.predict(valX) 

print('Accuracy : ', accuracy_score(valY,predY2))
print('ROC : ', roc_auc_score(valY,predY2))


Accuracy :  0.931637249624614
ROC :  0.9160808091362549


In [103]:
order_request(100,10,0.02,predY,val_code2date, 'val')

In [104]:
make_rate(100,10,0.02,con)

27614it [05:00, 92.01it/s]

Final earning rate : 22.0062 %





## 10억

In [105]:
train = load_file('train',10,10,0.02)
val = load_file('val',10,10,0.02)

In [106]:
lst_code2date = []
trainX = []
trainY = []

for line in train:
    code, date, x, y = line
    lst_code2date.append([code,date])
    trainX.append(list(map(int, x.split(','))))
    trainY.append(int(y))
    
trainX = np.array(trainX)
trainY = np.array(trainY)

In [107]:
pd.Series(trainY).value_counts()

0    384041
1    129056
dtype: int64

In [108]:
val_code2date = []
valX = []
valY = []

for line in val:
    code, date, x, y = line
    val_code2date.append([code,date])
    valX.append(list(map(int, x.split(','))))
    valY.append(int(y))
valX = np.array(valX)
valY = np.array(valY)

In [109]:
xgb = XGBClassifier(random_state = 42,eval_metric='mlogloss')
xgb.fit(trainX, trainY)
predY = xgb.predict_proba(valX) 
predY2 = xgb.predict(valX) 

print('Accuracy : ', accuracy_score(valY,predY2))
print('ROC : ', roc_auc_score(valY,predY2))


Accuracy :  0.942790736282892
ROC :  0.890920500639063


In [110]:
order_request(10,10,0.02,predY,val_code2date, 'val')

In [111]:
make_rate(10,10,0.02,con)

60048it [10:51, 92.17it/s] 

Final earning rate : -18.536179999999998 %





# 스케일링

## 예측 성능
### 100억

In [112]:
def load_file(TYPE,trading, days,target):
    with open('../data/{}_{}_{}_{}.pickle'.format(TYPE,trading, days,target), 'rb') as f:
        data = pickle.load(f)
    return data

In [113]:
train = load_file('scaling_train',100,10,0.02)
val = load_file('scaling_val',100,10,0.02)

In [114]:
train.shape,val.shape

((133796, 4), (35297, 4))

In [115]:
lst_code2date = []
trainX = []
trainY = []

for line in train:
    code, date, x, y = line
    lst_code2date.append([code,date])
    trainX.append(list(map(float, x.split(','))))
    trainY.append(int(y))
    
trainX = np.array(trainX)
trainY = np.array(trainY)

In [116]:
trainY

array([1, 0, 0, ..., 0, 1, 1])

In [117]:
pd.Series(trainY).value_counts()

0    87820
1    45976
dtype: int64

In [118]:
val_code2date = []
valX = []
valY = []

for line in val:
    code, date, x, y = line
    val_code2date.append([code,date])
    valX.append(list(map(float, x.split(','))))
    valY.append(int(y))
valX = np.array(valX)
valY = np.array(valY)

In [119]:
pd.Series(valY).value_counts()

0    23047
1    12250
dtype: int64

In [120]:
xgb = XGBClassifier(random_state = 42,eval_metric='mlogloss')
xgb.fit(trainX, trainY)
predY = xgb.predict_proba(valX) 
predY2 = xgb.predict(valX) 

print('Accuracy : ', accuracy_score(valY,predY2))
print('ROC : ', roc_auc_score(valY,predY2))

Accuracy :  0.881916310168003
ROC :  0.908945073200018


In [121]:
order_request(100,10,0.02,predY, val_code2date, 'val')

In [122]:
make_rate(100,10, 0.02,con)

33766it [05:46, 97.48it/s] 

Final earning rate : 1.8519500000000002 %





# 임계값 설정

In [13]:
train = load_file('scaling_train',100,10,0.05)
val = load_file('scaling_val',100,10,0.05)

In [14]:
lst_code2date = []
trainX = []
trainY = []

for line in train:
    code, date, x, y = line
    lst_code2date.append([code,date])
    trainX.append(list(map(float, x.split(','))))
    trainY.append(int(y))
    
trainX = np.array(trainX)
trainY = np.array(trainY)

In [15]:
pd.Series(trainY).value_counts()

0    110458
1     23338
dtype: int64

In [16]:
val_code2date = []
valX = []
valY = []

for line in val:
    code, date, x, y = line
    val_code2date.append([code,date])
    valX.append(list(map(float, x.split(','))))
    valY.append(int(y))
valX = np.array(valX)
valY = np.array(valY)

In [17]:
xgb = XGBClassifier(random_state = 42,eval_metric='mlogloss')
xgb.fit(trainX, trainY)
predY = xgb.predict_proba(valX) 
predY2 = xgb.predict(valX) 

print('Accuracy : ', accuracy_score(valY,predY2))
print('ROC : ', roc_auc_score(valY,predY2))


Accuracy :  0.8361617134600674
ROC :  0.8980534035362213


In [144]:
order_request(100,10,0.05,predY,val_code2date, 'val')

In [145]:
make_rate(100,10,0.05,con)

25198it [04:13, 99.35it/s] 

Final earning rate : -13.65423 %





# 거래일 설정
## 5일
여기부터

In [18]:
train = load_file('scaling_train',100,5,0.02)
val = load_file('scaling_val',100,5,0.02)

In [19]:
train.shape,val.shape

((134795, 4), (37294, 4))

In [20]:
lst_code2date = []
trainX = []
trainY = []

for line in train:
    code, date, x, y = line
    lst_code2date.append([code,date])
    trainX.append(list(map(float, x.split(','))))
    trainY.append(int(y))
    
trainX = np.array(trainX)
trainY = np.array(trainY)

In [21]:
pd.Series(trainY).value_counts()

0    88474
1    46321
dtype: int64

In [22]:
val_code2date = []
valX = []
valY = []

for line in val:
    code, date, x, y = line
    val_code2date.append([code,date])
    valX.append(list(map(float, x.split(','))))
    valY.append(int(y))
valX = np.array(valX)
valY = np.array(valY)

In [23]:
xgb = XGBClassifier()
xgb.fit(trainX, trainY)
predY = xgb.predict_proba(valX) 
predY2 = xgb.predict(valX) 

print('Accuracy : ', accuracy_score(valY,predY2))
print('ROC : ', roc_auc_score(valY,predY2))


Accuracy :  0.8808119268515042
ROC :  0.9078804684361732


In [24]:
order_request(100,5,0.02,predY,val_code2date, 'val')

In [133]:
make_rate(100,5,0.02,con)

35706it [06:05, 97.68it/s] 

Final earning rate : 13.13434 %





## 20일

In [25]:
train = load_file('train',100,20,0.02)
val = load_file('val',100,20,0.02)

In [27]:
train.shape,val.shape

((131660, 4), (31631, 4))

In [28]:
lst_code2date = []
trainX = []
trainY = []

for line in train:
    code, date, x, y = line
    lst_code2date.append([code,date])
    trainX.append(list(map(float, x.split(','))))
    trainY.append(int(y))
    
trainX = np.array(trainX)
trainY = np.array(trainY)

In [29]:
pd.Series(trainY).value_counts()

0    86484
1    45176
dtype: int64

In [30]:
val_code2date = []
valX = []
valY = []

for line in val:
    code, date, x, y = line
    val_code2date.append([code,date])
    valX.append(list(map(float, x.split(','))))
    valY.append(int(y))
valX = np.array(valX)
valY = np.array(valY)

In [31]:
xgb = XGBClassifier()
xgb.fit(trainX, trainY)
predY = xgb.predict_proba(valX) 
predY2 = xgb.predict(valX) 

print('Accuracy : ', accuracy_score(valY,predY2))
print('ROC : ', roc_auc_score(valY,predY2))


Accuracy :  0.9252948057285574
ROC :  0.9085361221149861


In [174]:
order_request(100,20,0.02,predY,val_code2date, 'val')

In [175]:
make_rate(100,20,0.02,con)

25076it [06:03, 69.00it/s]

Final earning rate : -5.54572 %





# 하이퍼 파라미터

In [12]:
train = load_file('train',100,5,0.02)
val = load_file('val',100,5,0.02)

In [13]:
lst_code2date = []
trainX = []
trainY = []

for line in train:
    code, date, x, y = line
    lst_code2date.append([code,date])
    trainX.append(list(map(float, x.split(','))))
    trainY.append(int(y))
    
trainX = np.array(trainX)
trainY = np.array(trainY)

In [14]:
val_code2date = []
valX = []
valY = []

for line in val:
    code, date, x, y = line
    val_code2date.append([code,date])
    valX.append(list(map(float, x.split(','))))
    valY.append(int(y))
valX = np.array(valX)
valY = np.array(valY)

In [15]:
train.shape, val.shape

((134795, 4), (37294, 4))

In [16]:
from sklearn.model_selection import GridSearchCV

xgb = XGBClassifier(random_state = 42,eval_metric='mlogloss')

params = {'max_depth' : [3,5], 'scale_pos_weight' : [1,3,5], 'learning_rate' : [0.01, 0.1],
         'n_estimators' : [100,300,500]}

In [17]:
#grid_cv = GridSearchCV(xgb, param_grid = params,  cv = 5,scoring ="roc_auc", refit=True)
#grid_cv.fit(trainX, trainY)

In [18]:
#print(grid_cv.best_params_)

In [28]:
xgb_model = XGBClassifier(learning_rate = 0.1 ,max_depth = 3 , n_estimators = 300, scale_pos_weight = 1 )
xgb_model.fit(trainX,trainY)
predY = xgb_model.predict_proba(valX) 
predY2 = xgb_model.predict(valX) 

print('Accuracy : ', accuracy_score(valY,predY2))
print('ROC : ', roc_auc_score(valY,predY2))


Accuracy :  0.8946479326433207
ROC :  0.916764204436671


In [20]:
order_request(100,5,0.02,predY,val_code2date, 'val')

In [14]:
make_rate(100,5,0.02,con)

33602it [14:00, 39.96it/s]

Final earning rate : 5.11447 %





# 최종 수익률

In [29]:
test = load_file('test',100,5,0.02)

In [30]:
val.shape, test.shape

((37294, 4), (30492, 4))

In [31]:
test_code2date = []
testX = []
testY = []

for line in test:
    code, date, x, y = line
    test_code2date.append([code,date])
    testX.append(list(map(float, x.split(','))))
    testY.append(int(y))
testX = np.array(testX)
testY = np.array(testY)

In [32]:
xgb_model = XGBClassifier(learning_rate = 0.1 ,max_depth = 3 , n_estimators = 300, scale_pos_weight = 1 )
xgb_model.fit(trainX,trainY)
predY = xgb_model.predict_proba(testX) 
predY2 = xgb_model.predict(testX) 

print('Accuracy : ', accuracy_score(testY,predY2))
print('ROC : ', roc_auc_score(testY,predY2))

Accuracy :  0.8922012331103241
ROC :  0.9180778341623876


In [33]:
order_request(100,20,0.02,predY,test_code2date, 'test')

In [56]:
make_rate_test(100,5,0.02,con)

Final earning rate : 3.25245 %
