# Logistic Regression

## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [2]:
dataset = pd.read_csv('Loan_approval_2.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

## Splitting the dataset into the Training set and Test set

In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

In [4]:
print(X_train)

[[55000 12000]
 [59000 16000]
 [40000  8000]
 [72000 23000]
 [46000  9500]
 [49200 10000]
 [60000 20000]
 [42000  8500]
 [71000 20000]
 [48000  9000]
 [30000  5000]
 [42000  7500]
 [69000 21000]
 [75000 25000]
 [55000 16000]
 [54000 13000]
 [70000 11000]
 [50000 10000]
 [80000 65000]
 [44000  8500]
 [52000 11000]]


In [5]:
print(y_train)

[1 0 0 1 1 0 0 0 1 0 1 0 0 1 1 0 1 1 1 1 0]


In [6]:
print(X_test)

[[32000  8000]
 [35000  2700]
 [63000 22000]
 [35000  6000]
 [68000 22000]
 [39000 23000]
 [54000  9000]
 [80000 10000]]


In [7]:
print(y_test)

[0 1 0 0 0 0 0 1]


## Feature Scaling

In [8]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [9]:
print(X_train)

[[-0.03008993 -0.29732833]
 [ 0.27814838  0.02702985]
 [-1.18598358 -0.6216865 ]
 [ 1.27992287  0.59465665]
 [-0.72362612 -0.50005218]
 [-0.47703547 -0.45950741]
 [ 0.35520795  0.35138802]
 [-1.03186442 -0.58114173]
 [ 1.20286329  0.35138802]
 [-0.56950696 -0.54059696]
 [-1.95657934 -0.86495513]
 [-1.03186442 -0.66223127]
 [ 1.04874414  0.43247756]
 [ 1.5111016   0.75683574]
 [-0.03008993  0.02702985]
 [-0.10714951 -0.21623878]
 [ 1.12580372 -0.37841787]
 [-0.41538781 -0.45950741]
 [ 1.89639948  4.00041747]
 [-0.87774527 -0.58114173]
 [-0.26126866 -0.37841787]]


In [10]:
print(X_test)

[[-1.80246019 -0.6216865 ]
 [-1.57128146 -1.05146108]
 [ 0.58638668  0.51356711]
 [-1.57128146 -0.78386559]
 [ 0.97168456  0.51356711]
 [-1.26304315  0.59465665]
 [-0.10714951 -0.54059696]
 [ 1.89639948 -0.45950741]]


## Training the Logistic Regression model on the Training set

In [11]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state = 0)
classifier.fit(X_train, y_train)

## Predicting a new result

In [12]:
print(classifier.predict(sc.transform([[30,87000]])))

[1]


## Predicting the Test set results

In [13]:
y_pred = classifier.predict(X_test)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[0 0]
 [0 1]
 [1 0]
 [0 0]
 [1 0]
 [0 0]
 [0 0]
 [1 1]]


## Making the Confusion Matrix

In [14]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[4 2]
 [1 1]]


0.625

## Visualising the Training set results

In [15]:
from matplotlib.colors import ListedColormap
X_set, y_set = sc.inverse_transform(X_train), y_train
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 10, stop = X_set[:, 0].max() + 10, step = 0.25),
                     np.arange(start = X_set[:, 1].min() - 1000, stop = X_set[:, 1].max() + 1000, step = 0.25))
plt.contourf(X1, X2, classifier.predict(sc.transform(np.array([X1.ravel(), X2.ravel()]).T)).reshape(X1.shape),
             alpha = 0.75, cmap = ListedColormap(('red', 'green')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
    plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], c = ListedColormap(('red', 'green'))(i), label = j)
plt.title('Logistic Regression (Training set)')
plt.xlabel('Age')
plt.ylabel('Estimated Salary')
plt.legend()
plt.show()

MemoryError: Unable to allocate 370. GiB for an array with shape (248000, 200080) and data type float64

## Visualising the Test set results

In [None]:
from matplotlib.colors import ListedColormap
X_set, y_set = sc.inverse_transform(X_test), y_test
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 10, stop = X_set[:, 0].max() + 10, step = 0.25),
                     np.arange(start = X_set[:, 1].min() - 1000, stop = X_set[:, 1].max() + 1000, step = 0.25))
plt.contourf(X1, X2, classifier.predict(sc.transform(np.array([X1.ravel(), X2.ravel()]).T)).reshape(X1.shape),
             alpha = 0.75, cmap = ListedColormap(('red', 'green')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
    plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], c = ListedColormap(('red', 'green'))(i), label = j)
plt.title('Logistic Regression (Test set)')
plt.xlabel('Age')
plt.ylabel('Estimated Salary')
plt.legend()
plt.show()