In [1]:
from skl2onnx import to_onnx
from sklearn.ensemble import RandomForestClassifier
import sqlalchemy as db
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
from skl2onnx.common.data_types import FloatTensorType, Int16TensorType, DoubleTensorType
from imblearn.over_sampling import RandomOverSampler, SMOTE, ADASYN
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report  

from mclLib.featurenames import *
from mclLib.modelpostfix import *
from mclLib.cleaner import *
from mclLib.server_info import *
from mclLib.tester import *

In [2]:

engine = create_engine(mysql_server_uri)
conn = engine.connect()


In [3]:
''' get db data '''
br_full_data = pd.read_sql_table('buyreports_legacy', conn)

In [85]:
''' db data filter '''
get_filter = ( br_full_data['isAllBuyed'] == 1) & ( br_full_data['isAllSelled'] == 1) & ( br_full_data['nRqTime'] <= 93000) 
br = br_full_data[get_filter]

''' get features name'''
feature_names =  f_name_102
feature_size = len(feature_names)

model_name = 'RF_20230520_v20_n150_d10'

''' set X data '''
X = br[feature_names].to_numpy(dtype=np.float64)

''' set y data '''
y_condition = ((br['fMaxPowerAfterBuyWhile60'] <= 0.05)) # | ((br['lTotalBuyEndPrice'] - br['lTotalBuyPrice']) <= 1000000000))
y = np.where(y_condition, 1, 0)

In [86]:
from collections import Counter
Counter(y)

Counter({1: 70539, 0: 23379})

In [87]:
# smote = SMOTE(sampling_strategy=0.85)
# X, y = smote.fit_resample(X, y)

In [88]:
rf = RandomForestClassifier(n_estimators=150, max_depth=10)
rf.fit(X, y)

In [89]:
onx = to_onnx(model=rf, options={'zipmap': False},
              initial_types=[('input', DoubleTensorType([None, 102]))],
              final_types=[('output', DoubleTensorType([None])),
                           ('prob', DoubleTensorType([None, 2]))],
              target_opset=17)

with open(onnx_path + model_name + onnx_, "wb") as f:
    f.write(onx.SerializeToString())

In [90]:
cleanAll()

In [91]:
y_pred = rf.predict(X)

In [92]:
print(classification_report(y, y_pred))

              precision    recall  f1-score   support

           0       0.99      0.20      0.34     23379
           1       0.79      1.00      0.88     70539

    accuracy                           0.80     93918
   macro avg       0.89      0.60      0.61     93918
weighted avg       0.84      0.80      0.75     93918



In [93]:
testClassification(y, [y_pred])

분류평가를 시작합니다...
len of models :  1
suc_line : 1 (1.0)
fail_line : 1 (1.0)
crit value :  0.5

총량 :  93918
0 :  23379 , 비율 :  24.892991758768286 (%)
1 :  70539 , 비율 :  75.10700824123171 (%)

총 횟수 :  4847
실제 0 :  4790
실제 1 :  57
정답비율 :  98.8240148545492 (%)

총 횟수 :  89071
실제 1 :  70482
실제 0 :  18589
정답비율 :  79.13013214177454 (%)



([1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  0,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
