In [70]:
from pycaret.classification import load_model
import numpy as np
import pandas as pd
from sklearn.metrics import f1_score


In [56]:
test_gu = ['동작구', '구로구']
df = pd.read_csv("seoul_preprocessed.csv")
test_df  = df[df['SIG_KOR_NM'].isin(test_gu)]

In [57]:
# test를 제외한 데이터로 risk level 추출
df_nonzero = df[~df['SIG_KOR_NM'].isin(test_gu)]
df_nonzero = df_nonzero[df_nonzero['tow_count'] > 0]

# `tow_count` 값으로 정렬
df_nonzero = df_nonzero.sort_values(by='tow_count')

# 0을 제외한 데이터
total_nonzero = len(df_nonzero)

mid_end = int(total_nonzero*1/2) # 절반

# 각 구간 끝값 (경계)
mid_end_value = df_nonzero.iloc[mid_end - 1]['tow_count']  # 가운데 구간 끝값

print(f"가운데 구간 끝값: {mid_end_value}\n")

test_df['risk_level'] = test_df['tow_count'].apply(
    lambda x: 0 if x == 0 else (1 if x <= mid_end_value else 2)
)

# 결과 확인
print(test_df[['tow_count', 'risk_level']].tail())

가운데 구간 끝값: 2

       tow_count  risk_level
16209          0           0
16210          0           0
16211          0           0
16212          0           0
16213          0           0


In [60]:
features = test_df.columns[3:-2]
target = 'risk_level'
del test_df['tow_count']
X_test, y_test   = test_df[features], test_df[target]

In [64]:
model1 = load_model('domain_specific_model_1')
model2 = load_model('domain_specific_model_2')
model3 = load_model('domain_specific_model_3')
model4 = load_model('domain_specific_model_4')

Transformation Pipeline and Model Successfully Loaded
Transformation Pipeline and Model Successfully Loaded
Transformation Pipeline and Model Successfully Loaded
Transformation Pipeline and Model Successfully Loaded


In [66]:
pred1 = model1.predict_proba(X_test)
pred2 = model2.predict_proba(X_test)
pred3 = model3.predict_proba(X_test)
pred4 = model4.predict_proba(X_test)



In [68]:
# 소프트 보팅: 확률 평균
ensemble_pred = (pred1 + pred2 + pred3 + pred4) / 4
final_preds = np.argmax(ensemble_pred, axis=1)

In [72]:
df_result = pd.DataFrame({
    'prediction': final_preds
})

In [86]:
f1 = f1_score(y_test, final_preds, average ="weighted")
print(f"f1_score: {f1:.4f}")

f1_score: 0.6761


In [112]:
X_test.shape

(3229, 27)

In [122]:
pred1[0]

array([9.99146855e-01, 7.07435925e-04, 1.45708763e-04])