# Setting up the prediction process

First, let's load the dataset:

In [12]:
import numpy as np
import pandas as pd


np.random.seed(40)

data = pd.read_csv("churn.csv",sep=',',index_col=0)

y = data['Churn']
X = data.drop('Churn',axis=1)

#print(data.describe(include='all'))

## Applying transformation

In [13]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from pandas.api.types import is_numeric_dtype

def convert_and_remove_categorical_variables(X, to_convert, to_remove):
    for variable in X.columns:    
        if variable in to_convert:
            if len(X[variable].unique()) < 10:
                X = pd.concat([X,pd.get_dummies(X[variable], prefix=variable, drop_first=True)],axis=1).drop([variable],axis=1)  
        elif variable in to_remove:
            X = X.drop([variable],axis=1)
    return X

In [14]:
to_convert = ['Area_Code','International_Plan','Voice_mail_Plan']
to_remove = ['Phone_Number']
X = data.drop('Churn',axis=1)
X = convert_and_remove_categorical_variables(X, to_convert, to_remove)

In [15]:
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()    
y = encoder.fit_transform(y)

In [16]:
from sklearn.model_selection import ShuffleSplit, KFold, StratifiedKFold
from sklearn.linear_model import LogisticRegression as LR
from sklearn.tree import DecisionTreeClassifier as DT
from sklearn.ensemble import RandomForestClassifier as RF
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import Normalizer
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_validate
from sklearn.model_selection import train_test_split as tts
from sklearn.preprocessing import MinMaxScaler

from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score

## The evaluation setup

Let's now prepare our evaluation setup. To do this, you will implement a 10-Fold cross-validation, as well as a stratified and shuffled 10-Fold CV. 

I have prepared three models which you have to test. Make sure you also normalize your data first. Calculate the mean of the three metrics (the mean of their mean results over the 5-fold CV), and return the best-performing model. After obtaining the best model (classifier), it is fit to the training data; the function should return the model fitted with the training data.

Next, write a function that applies the best model you have found during cross-validation with your final test sets:

In [17]:
def evaluation_process(X_train, y_train, n_folds, shuffled): 
    
    from sklearn.model_selection import ShuffleSplit, KFold, StratifiedKFold
    from sklearn.linear_model import LogisticRegression as LR
    from sklearn.tree import DecisionTreeClassifier as DT
    from sklearn.ensemble import RandomForestClassifier as RF
    from sklearn.preprocessing import StandardScaler
    from sklearn.preprocessing import Normalizer
    from sklearn.pipeline import make_pipeline
    from sklearn.model_selection import cross_validate
    from sklearn.model_selection import train_test_split as tts
    from sklearn.preprocessing import MinMaxScaler
    #import warnings
    #warnings.filterwarnings("ignore")
    overall_metric = []
    overall_precision = []
    overall_accuracy = []
    overall_auroc = []
    metric = []
    # Your output should be a cross_validate() object
    np.random.seed(42)
    models = [LR(solver = 'liblinear'), DT(), RF(n_estimators = 10, random_state = 99)]
    #best_model = models[0]
    k_fold = KFold(n_splits = n_folds, random_state = 42)
    stratified_kfold = StratifiedKFold(n_splits= n_folds, random_state = 42)
    shuffled_stratified = ShuffleSplit(n_folds, test_size = 0.3, random_state = 42)
    CV_type = [k_fold, stratified_kfold, shuffled_stratified]
    for i in models:
        metric_2 = 0
        precision = 0
        accuracy = 0
        auroc = 0
        pipeline = make_pipeline(StandardScaler(), i)
        for j in CV_type:

            

            # metrics you want to have computed
            metrics = ['accuracy','precision','roc_auc']
            metric = ['test_accuracy','test_precision', 'test_roc_auc']
            # By default, we should not really care about the training scores. To show them, we add the extra return_train_score parameter
            outcomes = cross_validate(pipeline, X_train, y_train, scoring=metrics, cv=j, return_train_score=False)
            
            accuracy_1 = np.average(outcomes['test_accuracy'])
            precision_1 = np.average(outcomes['test_precision'])
            roc_auc_1 = np.average(outcomes['test_roc_auc'])
            accuracy += accuracy_1
            precision += precision_1
            auroc += roc_auc_1
                
        overall_accuracy.append(accuracy / 3)
        overall_precision.append(precision / 3)
        overall_auroc.append(auroc / 3)
        
    #print(overall_accuracy)
    #print(overall_precision)
    #print(overall_auroc)
    for i in range(len(models)):
        average_metric = (overall_accuracy[i] + overall_precision[i] + overall_auroc[i]) / 3
        overall_metric.append(average_metric)
    #print(overall_metric)
    sorted = np.argsort(overall_metric)[::-1]
    best_classifier = models[sorted[0]]
    #print(best_classifier)
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    best_model = best_classifier.fit(X_train_scaled, y_train)
    #overall_metric = overall_metric / 9
    return best_model 



In [18]:

def get_evaluation_test_set(model, X_test, y_test):
    from sklearn.preprocessing import MinMaxScaler
    from sklearn.preprocessing import StandardScaler
    from sklearn.metrics import roc_auc_score
    from sklearn.metrics import accuracy_score
    from sklearn.metrics import precision_score
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    np.random.seed(42)
    accuracy = 0
    precision = 0
    auroc = 0
    y_predict = model.predict(X_test_scaled)
    
    accuracy = accuracy_score(y_test, y_predict)
    precision = precision_score(y_test, y_predict)
    auroc = roc_auc_score(y_test, y_predict)
    ###
    ### YOUR CODE HERE
    ###
    
    return accuracy, precision, auroc



Now, verify your results:

In [19]:
from sklearn.model_selection import train_test_split as tts
import warnings
warnings.filterwarnings("ignore")

np.random.seed(42)
X_train, X_test, y_train, y_test = tts(X, y, test_size = 0.3)

best_model = evaluation_process(X_train, y_train, 5, True)
assert np.allclose(get_evaluation_test_set(best_model,X_test, y_test), (0.9506666666666667, 0.9290322580645162, 0.8435724133292982), rtol=0.05)


###
### AUTOGRADER TEST - DO NOT REMOVE
###


So now you have implemented lots of models, used the training set to find the best model, and eventually made your final evaluation based on the test set, after which you should not build any more models. The reason for this is that once you use data for creating a model, you will always have obtained knowledge about the data. Hence, there is always a leak if you make decisions to further alter the model.