## 제목을 입력합니다

In [1]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from pykrx import stock as st
import pickle

### csv 불러오기 실습

DataFrame : 2차원 데이터 -> table

In [2]:
stock_label = pickle.load(open('stock_label.sav', 'rb'))
date_label = pickle.load(open('date_label.sav', 'rb'))
close_model = pickle.load(open('predict_close_model.sav', 'rb'))
updown_model = pickle.load(open('predict_updown_model.sav', 'rb'))

def updateLabelEncoder(today):
    global date_label
    maxLabel = max(date_label['num'])
    todayDate = st.get_market_ohlcv(today, today, "005930").reset_index()

    for s_date in todayDate['날짜'].astype(str):
        if s_date not in date_label['s_date'].values:
            new_row = pd.DataFrame({'s_date': [s_date], 'num': [maxLabel + 1]})
            date_label = pd.concat([date_label, new_row], ignore_index=True)

    pickle.dump(date_label, open('date_label.sav', 'wb'))

def preprocessingNewData(today):
    updateLabelEncoder(today)
    columnMapper = {"티커":"isin", "시가" : "open", "고가" : "high", "저가" : "low", "종가" : "close", "거래량" : "volume", "거래대금" : "amount", "등락률" : "updown"}

    result = pd.DataFrame([])
    date_set = date_label.tail(3)['s_date'].str.replace("-",'').to_list()
    date_set_dash = date_label.tail(3)['s_date'].to_list()
    date_set.pop()
    date_set_dash.pop()

    for i, date in enumerate(date_set):
        new = st.get_market_ohlcv(date).reset_index()
        new["s_date"] = date_set_dash[i]
        result = pd.concat([result,new])

    result.rename(columnMapper, axis=1, inplace=True)
    result = result[['isin', 's_date', 'open', 'high', 'low', 'close', 'volume', 'amount', 'updown']].sort_values(['isin', 's_date'])
    result['r_price'] = result['close'].shift(-1)
    result['r_updown'] = (0.5 * (result['updown'].shift(-1) / abs(result['updown'].shift(-1))) + 0.5)
    result.fillna(0, inplace= True)
    result['isin'] = result['isin'].replace(stock_label.set_index('isin')['num'])
    score = result[result["s_date"] != date_set_dash[-1]].reset_index(drop=True)
    predict = result[result["s_date"] == date_set_dash[-1]].reset_index(drop=True)
    score['s_date'] = score['s_date'].replace(date_label.set_index('s_date')['num'])
    predict['s_date'] = predict['s_date'].replace(date_label.set_index('s_date')['num'])

    r_price = score.pop("r_price")
    r_updown = score.pop("r_updown")
    predict.pop("r_price")
    predict.pop("r_updown")

    return [score, r_price, r_updown, predict]

def predictToday(today):
    [_, r_price, r_updown, new_data] = preprocessingNewData(today)
    e_price = pd.DataFrame(close_model.predict(new_data), columns=['e_price'])
    e_updown = pd.DataFrame(updown_model.predict(new_data), columns=['e_updown'])

    result = pd.concat([new_data, e_price, e_updown], axis=1)
    result['e_price'] = round(result['e_price']).astype(int)
    result['e_updown'] = (round(result['e_updown'], 1) + 0.5).astype(int)
    result['e_rate'] = round((result['e_price'] - result['close']) / result['close'] * 100, 2)
    result['tmp_updown'] = (result['e_rate'] > 0).astype(int)
    result['h_updown'] = ((result['e_updown'] == 1) & (result['tmp_updown'] == 1)).astype(int)
    result.drop(['open', 'high', 'low', 'volume', 'amount', 'updown', "tmp_updown"], axis =1, inplace=True)
    result['isin'].replace(stock_label.set_index('num')['isin'], inplace=True)
    result['s_date'].replace(date_label.set_index('num')['s_date'], inplace=True)
    return result

def scoreYesterday(today):
    [new_data, r_price, r_updown, _] = preprocessingNewData(today)
    e_price = pd.DataFrame(close_model.predict(new_data), columns=['e_price'])
    e_updown = pd.DataFrame(updown_model.predict(new_data), columns=['e_updown'])

    result = pd.concat([new_data, e_price, r_price , e_updown, r_updown], axis=1)
    result['e_price'] = round(result['e_price']).astype(int)
    result['e_updown'] = round(result['e_updown']).astype(int)
    result['e_rate'] = round((result['e_price'] - result['close']) / result['close'] * 100, 2)
    result['r_rate'] = round((result['r_price'] - result['close']) / result['close'] * 100, 2)
    result['s_correct'] = (result['e_updown']==result['r_updown']).astype(int)
    result['tmp_correct'] = ((result['e_rate'] * result['r_rate']) / abs(result['e_rate'] * result['r_rate']) + 1) * 0.5
    result['tmp_correct'] = result['tmp_correct'].fillna(0).astype(int)
    result['h_correct'] = ((result['s_correct'] == 1) & (result['tmp_correct'] == 1)).astype(int)
    result['error'] = round((abs((result['e_price'] - result['r_price']) / result['r_price']) * 100), 2)
    result.drop(['open', 'high', 'low', 'volume', 'amount', 'updown', "e_updown","r_updown", "tmp_correct"], axis =1, inplace=True)
    result['isin'].replace(stock_label.set_index('num')['isin'], inplace=True)
    result['s_date'].replace(date_label.set_index('num')['s_date'], inplace=True)
    return result


In [3]:
today_date = "20230710"
updateLabelEncoder(today_date)
[score, r_price, r_updown, predict] = preprocessingNewData(today_date)

In [4]:
score = scoreYesterday("20230710")
predict = predictToday("20230707")

In [5]:
score

Unnamed: 0,isin,s_date,close,e_price,r_price,e_rate,r_rate,s_correct,h_correct,error
0,000020,2023-07-06,10300,10307,10100.0,0.07,-1.94,1,0,2.05
1,000040,2023-07-06,574,596,554.0,3.83,-3.48,1,0,7.58
2,000050,2023-07-06,9700,9715,9840.0,0.15,1.44,0,0,1.27
3,000070,2023-07-06,72200,72149,71000.0,-0.07,-1.66,1,1,1.62
4,000075,2023-07-06,54100,54066,53100.0,-0.06,-1.85,1,1,1.82
...,...,...,...,...,...,...,...,...,...,...
948,457190,2023-07-06,312500,312282,310000.0,-0.07,-0.80,0,0,0.74
949,460850,2023-07-06,10230,10225,10120.0,-0.05,-1.08,1,1,1.04
950,460860,2023-07-06,9950,9936,10100.0,-0.14,1.51,0,0,1.62
951,900140,2023-07-06,2360,2355,2420.0,-0.21,2.54,0,0,2.69


In [6]:
predict

Unnamed: 0,isin,s_date,close,e_price,e_updown,e_rate,h_updown
0,000020,2023-07-07,10100,10099,1,-0.01,0
1,000040,2023-07-07,554,561,1,1.26,1
2,000050,2023-07-07,9840,9861,0,0.21,0
3,000070,2023-07-07,71000,70961,1,-0.05,0
4,000075,2023-07-07,53100,53051,0,-0.09,0
...,...,...,...,...,...,...,...
948,457190,2023-07-07,310000,309733,0,-0.09,0
949,460850,2023-07-07,10120,10127,0,0.07,0
950,460860,2023-07-07,10100,10122,0,0.22,0
951,900140,2023-07-07,2420,2442,0,0.91,0


In [7]:
predict[predict['h_updown'] == 1].sort_values('e_rate')

Unnamed: 0,isin,s_date,close,e_price,e_updown,e_rate,h_updown
280,005420,2023-07-07,56400,56405,1,0.01,1
613,035250,2023-07-07,16520,16521,1,0.01,1
296,005830,2023-07-07,72200,72206,1,0.01,1
772,114090,2023-07-07,14370,14372,1,0.01,1
241,004545,2023-07-07,14980,14982,1,0.01,1
...,...,...,...,...,...,...,...
83,001380,2023-07-07,1327,1338,1,0.83,1
1,000040,2023-07-07,554,561,1,1.26,1
775,118000,2023-07-07,514,523,1,1.75,1
93,001510,2023-07-07,663,676,1,1.96,1
