# Model creation
This script is focused towards model creating with the help of EDA we performed to take better decisions

### Import libraries

In [115]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import OneHotEncoder,LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn import metrics
from sklearn.metrics import recall_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from imblearn.combine import SMOTEENN

### Read the data

In [116]:
df=pd.read_csv('telecom_data.csv')

In [117]:
df.head(2)

Unnamed: 0.1,Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,...,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn,tenure_range
0,0,Female,0,Yes,No,No,No phone service,DSL,No,Yes,...,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No,1-12
1,1,Male,0,No,No,Yes,No,DSL,Yes,No,...,No,No,No,One year,No,Mailed check,56.95,1889.5,No,25-36


Create the input and output data

In [118]:
x=df.drop(columns=['Unnamed: 0','Churn'])
y=df['Churn']

In [119]:
xtrain,xtest,ytrain,ytest=train_test_split(x,y,test_size=0.2)

### Label encoder for output column

In [120]:
la=LabelEncoder()
ytrain=la.fit_transform(ytrain)
ytest=la.transform(ytest)

### Onehotencoding for categorical features in input data

In [121]:
tr=ColumnTransformer(
    transformers=[
        ('trf1',OneHotEncoder(sparse_output=False,drop='first'),['gender', 'Partner', 'Dependents', 'PhoneService', 'MultipleLines', 'InternetService',
       'OnlineSecurity', 'OnlineBackup', 'DeviceProtection','TechSupport', 'StreamingTV', 'StreamingMovies', 'Contract','PaperlessBilling', 'PaymentMethod','tenure_range'])
    ],
       remainder='passthrough'
)
xtrain=tr.fit_transform(xtrain)
xtest=tr.transform(xtest)

### Model creation

In [122]:
model_dt=DecisionTreeClassifier(criterion='gini',random_state=100,max_depth=6,min_samples_leaf=8)

In [123]:
model_dt.fit(xtrain,ytrain)

### Output prediction

In [124]:
y_pred=model_dt.predict(xtest)
y_pred

array([0, 0, 0, ..., 0, 0, 0])

### Performance metrics

In [126]:
model_dt.score(xtest,ytest)

0.7874911158493249

In [128]:
print(classification_report(ytest, y_pred, labels=[0,1]))

              precision    recall  f1-score   support

           0       0.82      0.92      0.86      1042
           1       0.64      0.42      0.51       365

    accuracy                           0.79      1407
   macro avg       0.73      0.67      0.69      1407
weighted avg       0.77      0.79      0.77      1407

