# Logistic Regression for Churn Modelling

## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [2]:
dataset = pd.read_csv('Churn_Modelling1.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

## Encoding Categorical Variables and Splitting the dataset into the Training set and Test set

In [3]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [0, 1])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)


In [4]:
print(X_train)

[[0.0 1.0 0.0 ... 0 0 5831.0]
 [1.0 0.0 0.0 ... 1 0 95611.47]
 [0.0 0.0 1.0 ... 1 1 42855.97]
 ...
 [1.0 0.0 0.0 ... 1 0 181429.87]
 [0.0 0.0 1.0 ... 1 1 148750.16]
 [0.0 1.0 0.0 ... 1 0 118855.26]]


In [5]:
print(y_train)

[1 0 0 ... 0 0 1]


In [6]:
print(X_test)

[[0.0 1.0 0.0 ... 1 1 192852.67]
 [1.0 0.0 0.0 ... 1 0 128702.1]
 [0.0 0.0 1.0 ... 1 1 75732.25]
 ...
 [1.0 0.0 0.0 ... 1 0 84487.62]
 [0.0 1.0 0.0 ... 1 0 46522.68]
 [1.0 0.0 0.0 ... 0 0 72927.68]]


In [7]:
print(y_test)

[0 1 0 ... 0 0 0]


## Feature Scaling

In [8]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [9]:
print(X_train)

[[-1.01558815  1.76021608 -0.57468161 ... -1.55362351 -1.03446007
  -1.64080994]
 [ 0.98465111 -0.56811207 -0.57468161 ...  0.64365658 -1.03446007
  -0.07927152]
 [-1.01558815 -0.56811207  1.74009395 ...  0.64365658  0.96668786
  -0.99684012]
 ...
 [ 0.98465111 -0.56811207 -0.57468161 ...  0.64365658 -1.03446007
   1.4133552 ]
 [-1.01558815 -0.56811207  1.74009395 ...  0.64365658  0.96668786
   0.84496184]
 [-1.01558815  1.76021608 -0.57468161 ...  0.64365658 -1.03446007
   0.32500428]]


In [10]:
print(X_test)

[[-1.01558815  1.76021608 -0.57468161 ...  0.64365658  0.96668786
   1.61203027]
 [ 0.98465111 -0.56811207 -0.57468161 ...  0.64365658 -1.03446007
   0.49626891]
 [-1.01558815 -0.56811207  1.74009395 ...  0.64365658  0.96668786
  -0.42502785]
 ...
 [ 0.98465111 -0.56811207 -0.57468161 ...  0.64365658 -1.03446007
  -0.272747  ]
 [-1.01558815  1.76021608 -0.57468161 ...  0.64365658 -1.03446007
  -0.93306558]
 [ 0.98465111 -0.56811207 -0.57468161 ... -1.55362351 -1.03446007
  -0.47380732]]


## Training the Logistic Regression model on the Training set

In [11]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state = 0)
classifier.fit(X_train, y_train)

## Predicting a new result

In [12]:
print(classifier.predict(sc.transform([[0,0,0,1,0,600,20,5,69000,4,1,0,102051.36]])))

[0]


## Predicting the Test set results

In [13]:
y_pred = classifier.predict(X_test)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[0 0]
 [0 1]
 [0 0]
 ...
 [0 0]
 [0 0]
 [0 0]]


## Making the Confusion Matrix

In [14]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[1901   90]
 [ 393  116]]


0.8068