In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.inspection import permutation_importance
import pickle

In [2]:
data_test = pd.read_csv('test.csv')

# Модель классификатора.
model_filename = "clf_model.pkl"
with open(model_filename, 'rb') as file:
    clf_model_best = pickle.load(file)

# Модель нормализатора.
model_filename = "sc_model.pkl"
with open(model_filename, 'rb') as file:
    SC_model = pickle.load(file)  

columns_to_remove = ['id', 'Region_Code', 'Policy_Sales_Channel', 'Driving_License', 'Annual_Premium', 'Vintage']

# Признак 'Vehicle_Age' (nominal) заменяем на числовой (continous):
# 0 - '< 1 Year'
# 1 - '1-2 Year'
# 2 - '> 2 Years'
Vehicle_Age_category = ['< 1 Year', '1-2 Year', '> 2 Years']
Vehicle_Age_number = list(range(len(Vehicle_Age_category)))
for idx, column_name in enumerate(Vehicle_Age_category):
    data_test['Vehicle_Age'] = data_test['Vehicle_Age'].apply(lambda x: Vehicle_Age_number[idx] if x == column_name else x)

# Признак 'Gender' (nominal) заменяем на 'Is_Male' (dichotomous): 1 - Male, 0 - Female.
data_test['Gender'] = data_test['Gender'].apply(lambda x: 1 if x == 'Male' else 0)
data_test.rename(columns={'Gender': 'Is_Male'}, inplace=True)

# Признак 'Vehicle_Damage' (nominal) заменяем на (dichotomous): 1 - Yes, 0 - No.
data_test['Vehicle_Damage'] = data_test['Vehicle_Damage'].apply(lambda x: 1 if x == 'Yes' else 0)

# Удаляем бесполезные признаки.
data_test = data_test.drop(columns=columns_to_remove)


tmp_X_test = data_test.drop(columns=['Response'])
X_test = pd.DataFrame(SC_model.transform(tmp_X_test.values), columns=tmp_X_test.columns)
y_test = data_test['Response']

y_pred = pd.DataFrame(clf_model_best.predict(X_test.values), columns=['Response'])
print(classification_report(y_test, y_pred, digits=4))
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
print('TP: {}\nTN: {}\nFP: {}\nFN: {}'.format(tp, tn, fp, fn))

              precision    recall  f1-score   support

           0     0.9853    0.6695    0.7973   5705019
           1     0.2826    0.9289    0.4334    799778

    accuracy                         0.7014   6504797
   macro avg     0.6340    0.7992    0.6153   6504797
weighted avg     0.8989    0.7014    0.7525   6504797

TP: 742887
TN: 3819474
FP: 1885545
FN: 56891
