## 제목을 입력합니다

In [1]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from pykrx import stock as st
import pickle

### csv 불러오기 실습

DataFrame : 2차원 데이터 -> table

In [33]:
close_model = pickle.load(open('predict_close_model_new.sav', 'rb'))
updown_model = pickle.load(open('predict_updown_model_new.sav', 'rb'))
date_label = pickle.load(open('date_label.sav', 'rb'))
stock_label = pickle.load(open('stock_label.sav', 'rb'))

# 1. 날짜 라벨인코더 업데이트 -> pickle로 저장하기 clear
def updateLabelEncoder(today):
    global date_label
    maxLabel = max(date_label['num'])
    todayDate = st.get_market_ohlcv(today, today, "005930").reset_index()

    for s_date in todayDate['날짜'].astype(str):
        if s_date not in date_label['s_date'].values:
            new_row = pd.DataFrame({'s_date': [s_date], 'num': [maxLabel + 1]})
            date_label = pd.concat([date_label, new_row], ignore_index=True)
    pickle.dump(date_label, open('date_label.sav', 'wb'))

# 2. 가져온 전일, 당일 시세 정보 전처리 : 가격예측&등락예측 모델용
def preprocessingNewData(today):
    updateLabelEncoder(today)
    columnMapper = {"티커":"isin", "시가" : "open", "고가" : "high", "저가" : "low", "종가" : "close", "거래량" : "volume", "거래대금" : "amount", "등락률" : "updown"}

    result = pd.DataFrame([])
    date_set = date_label.tail(2)['s_date'].str.replace("-",'').to_list()
    date_set_dash = date_label.tail(2)['s_date'].to_list()

    for i, date in enumerate(date_set):
        new = st.get_market_ohlcv(date).reset_index()
        new["s_date"] = date_set_dash[i]
        result = pd.concat([result,new])

    result.rename(columnMapper, axis=1, inplace=True)
    result = result[['isin', 's_date', 'open', 'high', 'low', 'close', 'volume', 'amount', 'updown']].sort_values(['isin', 's_date'])
    result['r_price'] = result['close'].shift(-1)
    result['r_updown'] = (0.5 * (result['updown'].shift(-1) / abs(result['updown'].shift(-1))) + 0.5)
    result.fillna(0, inplace= True)
    result['isin'] = result['isin'].replace(stock_label.set_index('isin')['num'])
    score = result[result["s_date"] != date_set_dash[-1]].reset_index(drop=True)
    predict = result[result["s_date"] == date_set_dash[-1]].reset_index(drop=True)
    score['s_date'] = score['s_date'].replace(date_label.set_index('s_date')['num'])
    predict['s_date'] = predict['s_date'].replace(date_label.set_index('s_date')['num'])
    r_price = score.pop("r_price")
    r_updown = score.pop("r_updown")
    predict.pop("r_price")
    predict.pop("r_updown")

    return [score, r_price, r_updown, predict]

# 3. 예측 결과 테이블 작성 및 json 반환
def predictToday(today):
    [_, r_price, r_updown, new_data] = preprocessingNewData(today)
    p_price = pd.DataFrame(close_model.predict(new_data), columns=['p_price'])
    e_updown = pd.DataFrame(updown_model.predict(new_data), columns=['e_updown'])

    result = pd.concat([new_data, p_price, e_updown], axis=1)
    result['p_price'] = round(result['p_price']).astype(int)
    result['e_updown'] = (round(result['e_updown'], 1) + 0.5).astype(int)
    result['p_rate'] = round((result['p_price'] - result['close']) / result['close'] * 100, 2)
    result['tmp_updown'] = (result['p_rate'] > 0).astype(int)
    result['predict'] = ((result['e_updown'] == 1) & (result['tmp_updown'] == 1)).astype(int)
    result.drop(['open', 'high', 'low', 'volume', 'amount', 'updown', "tmp_updown", 'e_updown'], axis =1, inplace=True)
    result['isin'].replace(stock_label.set_index('num')['isin'], inplace=True)
    result['s_date'].replace(date_label.set_index('num')['s_date'], inplace=True)
    return result

# 4. 예측 평가 테이블 작성 및 json 반환
def scoreYesterday(today):
    [new_data, r_price, r_updown, _] = preprocessingNewData(today)
    p_price = pd.DataFrame(close_model.predict(new_data), columns=['p_price'])
    e_updown = pd.DataFrame(updown_model.predict(new_data), columns=['e_updown'])

    result = pd.concat([new_data, p_price, r_price , e_updown, r_updown], axis=1)
    result['p_price'] = round(result['p_price']).astype(int)
    result['e_updown'] = (round(result['e_updown'], 1) + 0.5).astype(int)
    result['p_rate'] = round((result['p_price'] - result['close']) / result['close'] * 100, 2)
    result['r_rate'] = round((result['r_price'] - result['close']) / result['close'] * 100, 2)
    result['tmp_updown'] = (result['p_rate'] > 0).astype(int)
    result['predict'] = ((result['e_updown'] == 1) & (result['tmp_updown'] == 1)).astype(int)
    result['correct'] = (result['predict']==result['r_updown']).astype(int)
    result['error'] = round((abs((result['p_price'] - result['r_price']) / result['r_price']) * 100), 2)
    result.drop(['open', 'high', 'low', 'volume', 'amount', 'updown', 'tmp_updown', "e_updown","r_updown"], axis =1, inplace=True)
    result['isin'].replace(stock_label.set_index('num')['isin'], inplace=True)
    result['s_date'].replace(date_label.set_index('num')['s_date'], inplace=True)
    return result

In [34]:
today_date = "20230711"
updateLabelEncoder(today_date)
[score, r_price, r_updown, predict] = preprocessingNewData(today_date)

In [35]:
score = scoreYesterday("20230711")
predict = predictToday("20230710")

In [43]:
score[(score['s_date']=='2023-07-10') & (score['predict'] == 1)]

Unnamed: 0,isin,s_date,close,p_price,r_price,p_rate,r_rate,predict,correct,error
75,1230,2023-07-10,17440,17447,18820.0,0.04,7.91,1,1,7.3
202,3580,2023-07-10,4480,4489,4530.0,0.2,1.12,1,1,0.91
238,4450,2023-07-10,38300,38304,36500.0,0.01,-4.7,1,0,4.94
299,5880,2023-07-10,1904,1907,1892.0,0.16,-0.63,1,0,0.79
300,5930,2023-07-10,69500,69565,71500.0,0.09,2.88,1,1,2.71
353,7700,2023-07-10,18700,18702,19120.0,0.01,2.25,1,1,2.19
384,9190,2023-07-10,3610,3611,3705.0,0.03,2.63,1,1,2.54
388,9290,2023-07-10,6310,6318,6350.0,0.13,0.63,1,1,0.5
394,9440,2023-07-10,3580,3588,3595.0,0.22,0.42,1,1,0.19
432,11150,2023-07-10,3250,3259,3305.0,0.28,1.69,1,1,1.39


In [44]:
score

Unnamed: 0,isin,s_date,close,p_price,r_price,p_rate,r_rate,predict,correct,error
0,000020,2023-07-07,10100,10098,9840.0,-0.02,-2.57,0,1,2.62
1,000040,2023-07-07,554,560,558.0,1.08,0.72,0,0,0.36
2,000050,2023-07-07,9840,9860,9470.0,0.2,-3.76,0,1,4.12
3,000070,2023-07-07,71000,70960,70100.0,-0.06,-1.27,0,1,1.23
4,000075,2023-07-07,53100,53049,53500.0,-0.1,0.75,0,0,0.84
...,...,...,...,...,...,...,...,...,...,...
948,457190,2023-07-10,297000,296738,315000.0,-0.09,6.06,0,0,5.8
949,460850,2023-07-10,9980,9984,10060.0,0.04,0.8,0,0,0.76
950,460860,2023-07-10,9740,9735,9770.0,-0.05,0.31,0,0,0.36
951,900140,2023-07-10,2490,2512,2620.0,0.88,5.22,0,0,4.12


In [42]:
score = pd.concat([pd.read_csv("predictTableSample.csv", dtype=(str)), score])

In [41]:
preidctSample.to_csv("predictTableSample.csv", sep=",", na_rep="null", index=False)

In [40]:
preidctSample = pd.concat([score, predict])
preidctSample

Unnamed: 0,isin,s_date,close,p_price,r_price,p_rate,r_rate,predict,correct,error
0,000020,2023-07-07,10100,10098,9840.0,-0.02,-2.57,0,1,2.62
1,000040,2023-07-07,554,560,558.0,1.08,0.72,0,0,0.36
2,000050,2023-07-07,9840,9860,9470.0,0.2,-3.76,0,1,4.12
3,000070,2023-07-07,71000,70960,70100.0,-0.06,-1.27,0,1,1.23
4,000075,2023-07-07,53100,53049,53500.0,-0.1,0.75,0,0,0.84
...,...,...,...,...,...,...,...,...,...,...
948,457190,2023-07-11,315000,314856,,-0.05,,0,,
949,460850,2023-07-11,10060,10075,,0.15,,0,,
950,460860,2023-07-11,9770,9781,,0.11,,0,,
951,900140,2023-07-11,2620,2653,,1.26,,0,,


In [15]:
predict

Unnamed: 0,isin,s_date,close,p_price,p_rate,predict
0,000020,2023-07-11,9930,9940,0.10,0
1,000040,2023-07-11,557,579,3.95,0
2,000050,2023-07-11,9510,9530,0.21,0
3,000070,2023-07-11,70400,70379,-0.03,0
4,000075,2023-07-11,53500,53483,-0.03,0
...,...,...,...,...,...,...
948,457190,2023-07-11,315000,314856,-0.05,0
949,460850,2023-07-11,10060,10075,0.15,0
950,460860,2023-07-11,9770,9781,0.11,0
951,900140,2023-07-11,2620,2653,1.26,0


In [27]:
predict[predict['predict'] == 1].sort_values('p_rate')

Unnamed: 0,isin,s_date,close,p_price,p_rate,predict
41,660,2023-07-11,113400,113408,0.01,1
458,12320,2023-07-11,88300,88312,0.01,1
60,995,2023-07-11,26050,26055,0.02,1
241,4545,2023-07-11,14520,14526,0.04,1
251,4835,2023-07-11,5110,5112,0.04,1
204,3620,2023-07-11,8620,8624,0.05,1
669,69460,2023-07-11,1340,1341,0.07,1
101,1570,2023-07-11,105900,106007,0.1,1
288,5720,2023-07-11,4325,4330,0.12,1
300,5930,2023-07-11,71500,71594,0.13,1
