In [6]:
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import GridSearchCV
import pickle


In [7]:
df = pd.read_csv('data_train.csv')

df.head()

Unnamed: 0,customer_id,credit_score,country,gender,age,tenure,balance,products_number,credit_card,active_member,estimated_salary,churn
0,15676909,667,Spain,Female,34,5,0.0,2,1,0,163830.64,0
1,15749265,427,Germany,Male,42,1,75681.52,1,1,1,57098.0,0
2,15582492,535,France,Female,29,2,112367.34,1,1,0,185630.76,0
3,15780386,654,Spain,Male,40,5,105683.63,1,1,0,173617.09,0
4,15611759,850,Spain,Female,57,8,126776.3,2,1,1,132298.49,0


In [8]:
columns_dict = {
    'one_hot': ['country','gender'], 
    'without_X': ['churn','customer_id']
}

columns_dict['without_X']

['churn', 'customer_id']

In [9]:
class Processing():
    def __init__(self, df: pd.DataFrame) -> None:
        self.df = df

    def one_hot(self, columns_dict: dict):
 
        self.df = pd.get_dummies(self.df, columns=columns_dict['one_hot'], drop_first=False)

        return self.df
    
    def feature_label(self, columns_dict: dict):

        X = self.df.drop(columns = columns_dict['without_X'])
        y = self.df['churn']

        return X, y


    def scale(self, X):

        sc = StandardScaler()
        X_sc = sc.fit_transform(X)

        return X_sc
        

load_df = Processing(df = df)
df = load_df.one_hot(columns_dict=columns_dict)
X, y = load_df.feature_label(columns_dict=columns_dict)
X_sc = load_df.scale(X)

model = GradientBoostingClassifier(ccp_alpha=0.0, criterion='friedman_mse', init=None,
                           learning_rate=0.1, loss='log_loss', max_depth=3,
                           max_features=None, max_leaf_nodes=None,
                           min_impurity_decrease=0.0, min_samples_leaf=1,
                           min_samples_split=2, min_weight_fraction_leaf=0.0,
                           n_estimators=100, n_iter_no_change=None,
                           random_state=123, subsample=1.0, tol=0.0001,
                           validation_fraction=0.1, verbose=0,
                           warm_start=False)

model.fit(X_sc, y)

In [10]:

with open('model.pkl', 'wb') as file:
    pickle.dump(model, file)
