In [None]:
import numpy as np # linear algebra
import pandas as pd

In [None]:
import io
import plotly.offline as py#visualization
py.init_notebook_mode(connected=True)#visualization
import plotly.graph_objs as go#visualization
import plotly.tools as tls#visualization
import plotly.figure_factory as ff#visualization
import matplotlib.pyplot as plt#visualization

In [None]:
df = pd.read_csv(r"../input/churndetection/churn-bigml-80.csv")
df['Churn'] = df['Churn'].replace({bool(True):1,bool(False):0})
df['International plan'] = df['International plan'].replace({'Yes':1,'No':0})
df['Voice mail plan'] = df['Voice mail plan'].replace({'Yes':1,'No':0})

df.head()


# MODEL PREDICTION 

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix,accuracy_score,classification_report
from sklearn.metrics import roc_auc_score,roc_curve
from sklearn.metrics import f1_score
import statsmodels.api as sm
from sklearn.metrics import precision_score,recall_score
from yellowbrick.classifier import DiscriminationThreshold
#splitting train and test data 
train,test = train_test_split(df,test_size = .25 ,random_state = 111)
X = df.drop(['State','Churn'],axis=1)
target_col = ['Churn']
##seperating dependent and independent variables
cols    = X.columns
train_X = train[cols]
train_Y = train[target_col]
test_X  = test[cols]
test_Y  = test[target_col]

    
def telecom_churn_prediction(algorithm,training_x,testing_x,
                             training_y,testing_y,cols,cf,threshold_plot) :
    
    #model
    algorithm.fit(training_x,training_y)
    predictions   = algorithm.predict(testing_x)
    probabilities = algorithm.predict_proba(testing_x)
    #coeffs
    import joblib
    joblib.dump(algorithm, 'model.sav')
    print('train complete')
    
    print (algorithm)
    print ("\n Classification report : \n",classification_report(testing_y,predictions))
    print ("Accuracy   Score : ",accuracy_score(testing_y,predictions))
    #confusion matrix
    conf_matrix = confusion_matrix(testing_y,predictions)
    #roc_auc_score
    model_roc_auc = roc_auc_score(testing_y,predictions) 
    print ("Area under curve : ",model_roc_auc,"\n")
    fpr,tpr,thresholds = roc_curve(testing_y,probabilities[:,1])
    print(conf_matrix )
    #plot confusion matrix
    trace1 = go.Heatmap(z = conf_matrix ,
                        x = ["Not churn","Churn"],
                        y = ["Not churn","Churn"],
                        showscale  = False,colorscale = "picnic",
                        name = "matrix")
    
    #plot roc curve
    trace2 = go.Scatter(x = fpr,y = tpr,
                        name = "Roc : " + str(model_roc_auc),
                        line = dict(color = ('rgb(22, 96, 167)'),width = 2))
    trace3 = go.Scatter(x = [0,1],y=[0,1],
                        line = dict(color = ('rgb(205, 12, 24)'),width = 2,
                        dash = 'dot'))
    
    
    
    #subplots
    fig = tls.make_subplots(rows=2, cols=2, specs=[[{}, {}], [{'colspan': 2}, None]],
                            subplot_titles=('Confusion Matrix',
                                            'Receiver operating characteristic'))
    
    fig.append_trace(trace1,1,1)
    fig.append_trace(trace2,1,2)
    fig.append_trace(trace3,1,2)
    
    
    fig['layout'].update(showlegend=False, title="Model performance" ,
                         autosize = False,height = 900,width = 800,
                         plot_bgcolor = 'rgba(240,240,240, 0.95)',
                         paper_bgcolor = 'rgba(240,240,240, 0.95)',
                         margin = dict(b = 195))
    fig["layout"]["xaxis2"].update(dict(title = "false positive rate"))
    fig["layout"]["yaxis2"].update(dict(title = "true positive rate"))
    fig["layout"]["xaxis3"].update(dict(showgrid = True,tickfont = dict(size = 10),
                                        tickangle = 90))
    py.iplot(fig)
    
    if threshold_plot == True : 
        visualizer = DiscriminationThreshold(algorithm)
        visualizer.fit(training_x,training_y)
        visualizer.poof()
        



# LOGISTIC REGRESSION

In [None]:
logit  = LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

telecom_churn_prediction(logit,train_X,test_X,train_Y,test_Y,
                         cols,"coefficients",threshold_plot = True)

# DECISION TREE CLASSIFIER

In [None]:
from sklearn import tree
clf = tree.DecisionTreeClassifier()
telecom_churn_prediction(clf,train_X,test_X,train_Y,test_Y,
                         cols,"features",threshold_plot = True)


# RANDOM FOREST CLASSIFIER

In [None]:
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(max_depth=2, random_state=0)
telecom_churn_prediction(clf,train_X,test_X,train_Y,test_Y,
                         cols,"features",threshold_plot = True)


In [None]:
from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier(random_state=0)
telecom_churn_prediction(clf,train_X,test_X,train_Y,test_Y,
                         cols,"features",threshold_plot = True)


# BAGGING CLASSIFIER

In [None]:
from sklearn.ensemble import BaggingClassifier
import joblib
clf=BaggingClassifier(n_estimators=3)
telecom_churn_prediction(clf,train_X,test_X,train_Y,test_Y,
                         cols,"bag",threshold_plot = True)
filename = 'finalized_model.sav'
joblib.dump(clf, filename)

# KNN CLASSIFIER

In [None]:
from sklearn.neighbors import KNeighborsClassifier
clf = KNeighborsClassifier(n_neighbors=3)
telecom_churn_prediction(clf,train_X,test_X,train_Y,test_Y,
                         cols,"bag",threshold_plot = True)


# MLP CLASSIFIER

In [None]:
from sklearn.neural_network import MLPClassifier
clf = MLPClassifier(random_state=1, max_iter=300)
telecom_churn_prediction(clf,train_X,test_X,train_Y,test_Y,
                         cols,"bag",threshold_plot = True)


# GRADIENT BOOSTING CLASSIFIER

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

clf=GradientBoostingClassifier(n_estimators=100, learning_rate=1.0,max_depth=1, random_state=0)
telecom_churn_prediction(clf,train_X,test_X,train_Y,test_Y,
                         cols,"bag",threshold_plot = True)
