In [90]:
from DB import DataFrameHandler
from DB import AIModelHandler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import pandas as pd
import pickle, time
import os

pd.options.mode.chained_assignment = None

with open('ohlcv.sav', 'rb') as file:
    ohlcv = file.read()
ohlcv = pickle.loads(ohlcv)

with open('fundamental.sav', 'rb') as file:
    fundamental = file.read()
fundamental = pickle.loads(fundamental)

def exists_file(isin):
    maindirectory_path = os.path.join('PredictModel', 'models')
    subdirectory_path = os.path.join(maindirectory_path, isin)
    file_path = os.path.join(subdirectory_path, 'Ver00001.sav')
    return os.path.exists(file_path)

def create_subdirectory(directory_name):
    maindirectory_path = os.path.join('PredictModel', 'models')
    subdirectory_path = os.path.join(maindirectory_path, directory_name)
    
    if not os.path.exists(subdirectory_path):
        os.makedirs(subdirectory_path)

def createNewModels(isin, new, previous_version):
    maindirectory_path = os.path.join('PredictModel', 'models')
    subdirectory_path = os.path.join(maindirectory_path, isin)
    file_list = os.listdir(subdirectory_path)
    
    if "lastest.sav" in file_list:
        original_file_path = os.path.join(subdirectory_path, "lastest.sav")
        new_file_path = os.path.join(subdirectory_path, f'{previous_version}.sav')
        os.rename(original_file_path, new_file_path)
        # print(f"'lastest.sav' 파일의 이름이 '{previous_version}.sav'로 변경되었습니다.")

    pickle.dump(new, open(f'{subdirectory_path}/lastest.sav', 'wb'))
    # print(f"새 버전의 모델이 생성되었습니다.")

def preprocessing(ohlcv, fundamental):
    tmp_ohlcv = ohlcv.drop(columns=['AMOUNT'])
    try:
        tmp_fundamental = fundamental[["ISIN","S_DATE", 'PBR','PER']]
        # tmp_fundamental = fundamental[["ISIN","WORK_DT", 'PBR','PER']]
    except:
        tmp_fundamental = pd.DataFrame([])
    total = pd.merge(tmp_ohlcv, tmp_fundamental, how='outer').dropna(axis=1)
    
    total.drop(columns=["ISIN"], inplace=True)

    #나중에 지워야함
    total.rename(columns={'S_DATE' : 'WORK_DT'}, inplace=True)
    total = total.sort_values("WORK_DT").reset_index(drop=True)

    total["target1"] = total['CLOSE'].shift(-1)
    total["target2"] = ((total['UPDOWN'] > 0) * 1).shift(-1)

    train = total[total["WORK_DT"] < "2023-09-01"]
    predict = total[total["WORK_DT"] >= "2023-09-01"]
    train['WORK_DT'] = train['WORK_DT'].index + 1
    predict['WORK_DT'] = predict['WORK_DT'].index + 1
    priceAnswer = predict.pop('target1')
    updownAnswer = predict.pop('target2')

    return {'train' : train, 'predict':predict, 'priceAnswer' : priceAnswer, 'updownAnswer':updownAnswer}

def calcPriceScore(x, y, model):
    answer = (y > x['CLOSE']).reset_index(drop=True)
    prediction = model.predict(x) > x['CLOSE'].reset_index(drop=True)
    predictResult = (answer == prediction) * 1
    return round(sum(predictResult)/len(predictResult),2)

def calcUpdownScore(x, y, model):
    return round(model.score(x, y),2)

def makeAiModel(X, i):
    train_set, test_set = train_test_split(X, test_size=0.2, random_state=i)
    train_set_y1 = train_set.pop('target1')
    train_set_y2 = train_set.pop('target2')
    test_set_y1 = test_set.pop('target1')
    test_set_y2 = test_set.pop('target2')

    priceModel = LogisticRegression(solver='saga')
    updownModel = LogisticRegression()

    priceModel.fit(train_set, train_set_y1)
    updownModel.fit(train_set, train_set_y2)

    priceModelScore = calcPriceScore(test_set, test_set_y1, priceModel)
    updownModelScore = calcUpdownScore(test_set, test_set_y2, updownModel)
    
    result = {"priceModel" : priceModel, "updownModel" : updownModel, "priceModelScore" : priceModelScore, "updownModelScore" : updownModelScore}
    return result

In [107]:
ohlcv[2602]

Unnamed: 0,ISIN,S_DATE,OPEN,HIGH,LOW,CLOSE,VOLUME,AMOUNT,UPDOWN
0,458610,2023-08-30,3465,4200,2515,2520,54168694,,0.0
1,458610,2023-08-31,2370,2750,2130,2235,8162138,,-11.31
2,458610,2023-09-01,2145,2235,2055,2055,2206942,,-8.05
3,458610,2023-09-04,2075,2110,2055,2070,706892,,0.73
4,458610,2023-09-05,2090,2090,2065,2075,317152,,0.24
5,458610,2023-09-06,2080,2095,2075,2095,184623,,0.96
6,458610,2023-09-07,2090,2095,2075,2075,247514,,-0.95
7,458610,2023-09-08,2090,2090,2070,2075,139022,,0.0
8,458610,2023-09-11,2090,2100,2080,2085,125067,,0.48
9,458610,2023-09-12,2085,2100,2080,2090,117932,,0.24


In [104]:
newStock

[2411, 2599, 2602]

In [109]:
print(stockMapper['355390']["train"])

    WORK_DT   OPEN   HIGH    LOW  CLOSE   VOLUME  UPDOWN  PBR  PER  target1   
0         1  46000  47150  45000  47150  1723454    0.00  0.0  0.0  61200.0  \
1         2  53200  61200  50000  61200  1817874   29.80  0.0  0.0  65300.0   
2         3  66000  70000  58300  65300  3917786    6.70  0.0  0.0  75300.0   
3         4  66100  81500  63100  75300  3917165   15.31  0.0  0.0  75300.0   
4         5  78000  84900  72900  75300  1907937    0.00  0.0  0.0  72500.0   
5         6  78300  81200  70100  72500  1138725   -3.72  0.0  0.0  65900.0   
6         7  68900  72800  63300  65900   838763   -9.10  0.0  0.0  71800.0   
7         8  65200  74900  64200  71800  1093250    8.95  0.0  0.0  61700.0   
8         9  72300  73500  60200  61700   592496  -14.07  0.0  0.0  58900.0   
9        10  61700  64300  58600  58900   304972   -4.54  0.0  0.0  57800.0   
10       11  60400  60600  56800  57800   258159   -1.87  0.0  0.0  55200.0   
11       12  58400  58700  54900  55200   262620   -

In [108]:
stockMapper = {}
newStock =[]
for i, stock in enumerate(ohlcv):
    isin = stock["ISIN"][0]
    if isin in ['004565' ,'353060', '010145', '000995', '010050', '000547', '001745']:
        continue
    print(f'{i}번째 : {isin}')
    if exists_file(isin):
        continue
    if isin in ['355390', '457390', '458610']:
        stockMapper[isin] = preprocessing(stock, fundamental[i])
        tmp = pd.concat([stockMapper[isin]["predict"], stockMapper[isin]["priceAnswer"], stockMapper[isin]["updownAnswer"]], axis=1)
        stockMapper[isin]["train"] = pd.concat([stockMapper[isin]["train"], tmp.drop(tmp.index[-1])])
        newModel = makeAiModel(stockMapper[isin]["train"], 29)
        createNewModels(isin,newModel,"Ver00001")
        continue

    stockMapper[isin] = preprocessing(stock, fundamental[i])
    newModel = makeAiModel(stockMapper[isin]["train"], 29)
    createNewModels(isin,newModel,"Ver00001")

0번째 : 000020
1번째 : 000040
2번째 : 000050
3번째 : 000070
4번째 : 000075
5번째 : 000080
6번째 : 000087
7번째 : 000100
8번째 : 000105
9번째 : 000120
10번째 : 000140
11번째 : 000145
12번째 : 000150
13번째 : 000155
14번째 : 000157
15번째 : 000180
16번째 : 000210
17번째 : 000215
18번째 : 000220
19번째 : 000225
20번째 : 000227
21번째 : 000230
22번째 : 000240
23번째 : 000270
24번째 : 000300
25번째 : 000320
26번째 : 000325
27번째 : 000370
28번째 : 000390
29번째 : 000400
30번째 : 000430
31번째 : 000480
32번째 : 000490
33번째 : 000500
34번째 : 000520
35번째 : 000540
36번째 : 000545
38번째 : 000590
39번째 : 000640
40번째 : 000650
41번째 : 000660
42번째 : 000670
43번째 : 000680
44번째 : 000700
45번째 : 000720
46번째 : 000725
47번째 : 000760
48번째 : 000810
49번째 : 000815
50번째 : 000850
51번째 : 000860
52번째 : 000880
53번째 : 000885
54번째 : 00088K
55번째 : 000890
56번째 : 000910
57번째 : 000950
58번째 : 000970
59번째 : 000990
61번째 : 001020
62번째 : 001040
63번째 : 001045
64번째 : 00104K
65번째 : 001060
66번째 : 001065
67번째 : 001067
68번째 : 001070
69번째 : 001080
70번째 : 001120
71번째 : 001130
72번째 : 001140
73번째 : 001200
74

