In [2]:
# Web에 포함시킬 모델을 구성
import pandas as pd

In [3]:
df1 = pd.read_csv('01_Data.csv')

In [5]:
df1['Target'] = df1['State'].replace({'계약확정':0, '기간만료':0
                                    , '해약확정':1, '해약진행중':1})

In [8]:
X = df1[['Term', 'Product_Type', 'Amount_Month', 'Age', 'Gender', 'Credit_Rank']]
Y = df1['Target']

- RandomForestClassifier
- MinMaxScaling / OneHotEncoding
- SimpleImputer
- SMOTE
- cv=3 / max_depth 5-10 / min_samples_split 5~10
- model_web.sav

In [10]:
from sklearn.model_selection import train_test_split
from sklearn.compose import make_column_transformer
from imblearn.pipeline import make_pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from imblearn.over_sampling import SMOTE 
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
import pickle


In [11]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, random_state=1234)

In [12]:
numeric_pipe = make_pipeline(SimpleImputer(), MinMaxScaler())
category_pipe = make_pipeline(SimpleImputer(strategy='most_frequent'), OneHotEncoder())
numeric_list = X.describe().columns.tolist()
category_list = X.describe(include='object').columns.tolist()
preprocessing_pipe = make_column_transformer((numeric_pipe, numeric_list)
                                            , (category_pipe, category_list))
model_pipe = make_pipeline(preprocessing_pipe, SMOTE(), RandomForestClassifier())

In [13]:
hyper_parameter = {'randomforestclassifier__max_depth':range(5,11)
                    , 'randomforestclassifier__min_samples_split':range(5,11)}
grid_model = GridSearchCV(model_pipe, param_grid=hyper_parameter, cv=3, n_jobs=-1)
grid_model.fit(X_train, Y_train)

In [14]:
best_model = grid_model.best_estimator_
Y_train_pred = best_model.predict(X_train)
Y_test_pred = best_model.predict(X_test)

In [15]:
print(classification_report(Y_train,Y_train_pred))

              precision    recall  f1-score   support

           0       0.99      0.95      0.97     37984
           1       0.10      0.39      0.15       491

    accuracy                           0.94     38475
   macro avg       0.54      0.67      0.56     38475
weighted avg       0.98      0.94      0.96     38475



In [16]:
print(classification_report(Y_test,Y_test_pred))

              precision    recall  f1-score   support

           0       0.99      0.95      0.97     12681
           1       0.08      0.37      0.13       145

    accuracy                           0.95     12826
   macro avg       0.54      0.66      0.55     12826
weighted avg       0.98      0.95      0.96     12826



In [17]:
pickle.dump(best_model, open('model_web.sav','wb'))

In [20]:
best_model.predict_proba(X_train)[0][1]

0.2431387197189807