In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm
import datetime
import pickle
from sklearn.linear_model import LogisticRegression
import pymysql

In [2]:
con = pymysql.connect(
)

## 구축한 머신러닝 데이터 불러오기

거래대금 : 1조<br> 거래일 : 10일 <br> 종속변수 임계값 : 0.02

In [3]:
def load_file(TYPE,trading, days,target):
    with open('../../data/{}_{}_{}_{}.pickle'.format(TYPE,trading, days,target), 'rb') as f:
        data = pickle.load(f)
    return data

In [38]:
train = load_file('train',10000,10,0.02)
val = load_file('val',10000,10,0.02)
test = load_file('test', 10000,10,0.02)

### train

In [5]:
lst_code2date = []
trainX = []
trainY = []

for line in train:
    code, date, x, y = line
    lst_code2date.append([code,date])
    trainX.append(list(map(int, x.split(','))))
    trainY.append(int(y))

In [6]:
trainX = np.array(trainX)
trainY = np.array(trainY)

In [7]:
trainX.shape

(350, 50)

## val

In [24]:
lst_code2date = []
valX = []
valY = []

for line in val:
    code, date, x, y = line
    lst_code2date.append([code,date])
    valX.append(list(map(int, x.split(','))))
    valY.append(int(y))
valX = np.array(valX)
valY = np.array(valY)

## test

In [52]:
test_code2date = []
testX = []
testY = []

for line in test:
    code, date, x, y = line
    test_code2date.append([code,date])
    testX.append(list(map(int, x.split(','))))
    testY.append(int(y))
testX = np.array(testX)
testY = np.array(testY)

### Modeling

In [26]:
LR = LogisticRegression(random_state = 42)
LR.fit(trainX, trainY)

LogisticRegression(random_state=42)

In [27]:
predY = LR.predict_proba(valX) # predict_proba 함수는 예측한 값을 확률 값으로 출력
predY2 = LR.predict(valX) # predict 함수는 예측한 값을 이진 값(1 또는 0)으로 출력

In [28]:
predY[:5]

array([[0.42621095, 0.57378905],
       [0.44811538, 0.55188462],
       [0.44331061, 0.55668939],
       [0.46710413, 0.53289587],
       [0.29387045, 0.70612955]])

In [13]:
# 전체 대비 상승 2% 확률 
round(np.count_nonzero(predY2) / len(predY2),2)

0.26

## 주문 요청 일지
다음 날 (D+1) 종가 *% 이상 상승 여부 예측 확률에 따라 주문 요청 일지를 작성한다.

In [29]:
lst_output = []

for (code,date),y in zip(lst_code2date, predY):
    if y[1] >= 0.9:
        lst_output.append([code, date, 'buy','all'])
        lst_output.append([code, date+"n", 'sell', 'all'])
    elif y[1] >= 0.7:
        lst_output.append([code, date, 'buy','r90'])
        lst_output.append([code, date+"n", 'sell', 'all'])
    elif y[1] >= 0.5:
        lst_output.append([code, date, 'buy', 'r50'])
        lst_output.append([code, date+"n", 'sell', 'all'])
    elif y[1] >= 0.3:
        lst_output.append([code, date, 'buy', 'r30'])
        lst_output.append([code, date+"n", 'sell', 'all'])


In [30]:
lst_output[:10]

[['048410', '20210223', 'buy', 'r50'],
 ['048410', '20210223n', 'sell', 'all'],
 ['048410', '20210226', 'buy', 'r50'],
 ['048410', '20210226n', 'sell', 'all'],
 ['024810', '20210209', 'buy', 'r50'],
 ['024810', '20210209n', 'sell', 'all'],
 ['024810', '20210215', 'buy', 'r50'],
 ['024810', '20210215n', 'sell', 'all'],
 ['024810', '20210217', 'buy', 'r90'],
 ['024810', '20210217n', 'sell', 'all']]

In [31]:
lst_output.sort(key = lambda x: x[1]) # date 기준으로 주문 요청 결과 정렬

# 주문요청일지 저장
OF = open('../../data/order_request_baseline.txt','w')
for row in lst_output:
    OF.write('\t'.join(map(str, row)) + '\n')
OF.close()

In [32]:
start_money = 10000000 # 초기 현금 1천만원
money = start_money
dic_code2num ={}  # 보유 종목

IF = open('../../data/order_request_baseline.txt','r')
for i, line in tqdm(enumerate(IF)): #주문 일지를 한 줄 읽어 옴
    code, date, request, amount = line.strip().split("\t")

    ##############################################################################################
    sql_query = '''
                SELECT *
                FROM stock_{}
                WHERE Date
                BETWEEN '2021-01-01' AND '2021-07-01'
                '''.format(code)
    stock = pd.read_sql(sql = sql_query, con = con)
    lst_stock = stock.values.tolist()

    for idx, row in enumerate(lst_stock):
        Date = row[0].strftime('%Y%m%d')        
        if date.endswith('n'):
            if date.rstrip('n') == Date:
                sell_close = lst_stock[idx+1][4]
        elif date == Date:
            buy_close = lst_stock[idx][4]

    ##############################################################################################

    if request == 'buy': 
        if amount.startswith('r'):
            request_money = money * float(amount.lstrip("r")) / 100
        elif amount == 'all':
            request_money = money
        elif amount.isdigit():
            request_money = int(amount)

        else:
            raise Exception('Not permitted option')
        request_money = min(request_money, money)
        buy_num = int(request_money / buy_close)
        money -= buy_num * buy_close  # 현재 금액(money)을 실제 매수액을 뺀 만큼 업데이트
        if code not in dic_code2num:
            dic_code2num[code] = 0
        dic_code2num[code] += buy_num # 보유 종목 데이터에 구매 종목(code)를 매수 개수 만큼 증가
    if request == 'sell': # sell인 경우
        if amount == 'all':
            sell_num = dic_code2num[code]

        else:
            raise Exception('Not permitted option')            
        money += sell_num * sell_close
        dic_code2num[code] -= sell_num
        if dic_code2num[code] == 0:
            del dic_code2num[code]
IF.close()            

if dic_code2num != {}: # 매매가 종료되었는데 보유 종목이 있으면
    raise Exception('Not empty stock') 

print("Final earning rate : {} %".format(str((money-start_money) / start_money * 100)))

224it [00:06, 36.21it/s]

Final earning rate : -54.36991 %





In [44]:
LR = LogisticRegression(random_state = 42)
LR.fit(trainX, trainY)

LogisticRegression(random_state=42)

In [46]:
predY = LR.predict_proba(testX) # predict_proba 함수는 예측한 값을 확률 값으로 출력
predY2 = LR.predict(testX) # predict 함수는 예측한 값을 이진 값(1 또는 0)으로 출력

In [54]:
lst_output = []

for (code,date),y in zip(test_code2date, predY):
    if y[1] >= 0.9:
        lst_output.append([code, date, 'buy','all'])
        lst_output.append([code, date+"n", 'sell', 'all'])
    elif y[1] >= 0.7:
        lst_output.append([code, date, 'buy','r90'])
        lst_output.append([code, date+"n", 'sell', 'all'])
    elif y[1] >= 0.5:
        lst_output.append([code, date, 'buy', 'r50'])
        lst_output.append([code, date+"n", 'sell', 'all'])
    elif y[1] >= 0.3:
        lst_output.append([code, date, 'buy', 'r30'])
        lst_output.append([code, date+"n", 'sell', 'all'])


In [55]:
lst_output[:10]

[['222080', '20210930', 'buy', 'r50'],
 ['222080', '20210930n', 'sell', 'all'],
 ['035080', '20210714', 'buy', 'r50'],
 ['035080', '20210714n', 'sell', 'all'],
 ['028300', '20210830', 'buy', 'r50'],
 ['028300', '20210830n', 'sell', 'all'],
 ['064260', '20211118', 'buy', 'r50'],
 ['064260', '20211118n', 'sell', 'all'],
 ['064260', '20211119', 'buy', 'r50'],
 ['064260', '20211119n', 'sell', 'all']]

In [56]:
lst_output.sort(key = lambda x: x[1]) # date 기준으로 주문 요청 결과 정렬

# 주문요청일지 저장
OF = open('../../data/order_request_baseline.txt','w')
for row in lst_output:
    OF.write('\t'.join(map(str, row)) + '\n')
OF.close()

In [57]:
start_money = 10000000 # 초기 현금 1천만원
money = start_money
dic_code2num ={}  # 보유 종목

IF = open('../../data/order_request_baseline.txt','r')
for i, line in tqdm(enumerate(IF)): #주문 일지를 한 줄 읽어 옴
    code, date, request, amount = line.strip().split("\t")

    ##############################################################################################
    sql_query = '''
                SELECT *
                FROM stock_{}
                WHERE Date
                BETWEEN '2021-07-01' AND '2021-12-31'
                '''.format(code)
    stock = pd.read_sql(sql = sql_query, con = con)
    lst_stock = stock.values.tolist()

    for idx, row in enumerate(lst_stock):
        Date = row[0].strftime('%Y%m%d')        
        if date.endswith('n'):
            if date.rstrip('n') == Date:
                sell_close = lst_stock[idx+1][4]
        elif date == Date:
            buy_close = lst_stock[idx][4]

    ##############################################################################################

    if request == 'buy': 
        if amount.startswith('r'):
            request_money = money * float(amount.lstrip("r")) / 100
        elif amount == 'all':
            request_money = money
        elif amount.isdigit():
            request_money = int(amount)

        else:
            raise Exception('Not permitted option')
        request_money = min(request_money, money)
        buy_num = int(request_money / buy_close)
        money -= buy_num * buy_close  # 현재 금액(money)을 실제 매수액을 뺀 만큼 업데이트
        if code not in dic_code2num:
            dic_code2num[code] = 0
        dic_code2num[code] += buy_num # 보유 종목 데이터에 구매 종목(code)를 매수 개수 만큼 증가
    if request == 'sell': # sell인 경우
        if amount == 'all':
            sell_num = dic_code2num[code]

        else:
            raise Exception('Not permitted option')            
        money += sell_num * sell_close
        dic_code2num[code] -= sell_num
        if dic_code2num[code] == 0:
            del dic_code2num[code]
IF.close()            

if dic_code2num != {}: # 매매가 종료되었는데 보유 종목이 있으면
    raise Exception('Not empty stock') 

print("Final earning rate : {} %".format(str((money-start_money) / start_money * 100)))

116it [00:03, 35.26it/s]

Final earning rate : -15.69981 %



