# **Practical - 07** : Logistic Regression

> ### **Objective** : Implement Logistic Regression Algorithm on the given dataset

In [10]:
import numpy as np
import pandas as pd
import io
import matplotlib.pyplot as plt

In [11]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [12]:
data = pd.read_csv("/content/drive/MyDrive/SEM6/CE148_Machine-Learning-Practicals-main/Practical-07/BuyComputer.csv")

data.drop(columns=['User ID',], axis=1, inplace = True)

data.head()

Unnamed: 0,Age,EstimatedSalary,Purchased
0,19,19000,0
1,35,20000,0
2,26,43000,0
3,27,57000,0
4,19,76000,0


In [13]:
#Declare label as last column in the source file
label = data[["Purchased"]]
print(label)

     Purchased
0            0
1            0
2            0
3            0
4            0
..         ...
395          1
396          1
397          1
398          0
399          1

[400 rows x 1 columns]


In [14]:
#Declaring X as all columns excluding last
X = data[["Age", "EstimatedSalary"]]
print(X)

     Age  EstimatedSalary
0     19            19000
1     35            20000
2     26            43000
3     27            57000
4     19            76000
..   ...              ...
395   46            41000
396   51            23000
397   50            20000
398   36            33000
399   49            36000

[400 rows x 2 columns]


In [15]:
# Splitting data
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,label,test_size = 0.2, random_state=117)

In [16]:
# Sacaling data
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)


In [17]:

#Variabes to calculate sigmoid function
y_pred = []
len_x = len(X_train[0])
w = []
b = 0.2
print(len_x)

2


In [18]:
def sigmoid(z):
 return 1/(1 + np.exp(-z))

In [19]:
def log_loss(predicted,actual):
   epsilon = 1e-15
   predicted = [max(epsilon,i) for i in predicted] 
   predicted = [min(i,1-epsilon) for i in predicted]
   predicted_new = np.array(predicted)
   x = -np.mean(actual * np.log(predicted_new) + (1-actual) * np.log(1-predicted_new))
   return x

In [20]:
def gradients(X, y, y_hat):
    m = X.shape[0]
    dw = (1/m)*np.dot(X.T, (y_hat - y))
    db = (1/m)*np.sum((y_hat - y)) 
    return dw, db

In [21]:
def normalize(X):
  m, n = X.shape
  for i in range(n):
    X = (X - X.mean(axis = 0)) / X.std(axis = 0)
  return X

In [22]:
def train(X, y, bs, epochs, lr):
    m, n = X.shape
    w = np.zeros((n,1))
    b = 0

    y = y.values.reshape(m,1)
    x = normalize(X)

    losses = []

    for epoch in range(epochs):
        for i in range((m-1)//bs + 1):
            start_i = i*bs
            end_i = start_i + bs
            xb = X[start_i:end_i]
            yb = y[start_i:end_i]
            y_hat = sigmoid(np.dot(xb, w) + b)
            dw, db = gradients(xb, yb, y_hat)
            w -= lr*dw
            b -= lr*db
        l = log_loss(y, sigmoid(np.dot(X, w) + b))
        losses.append(l)
    return w, b, losses

In [23]:
def prediction_function(inputs):
  x = normalize(inputs)
  preds = sigmoid(np.dot(inputs, w) + b)
  pred_class = []    
  pred_class = [1 if i > 0.5 else 0 for i in preds]  
  return np.array(pred_class)

In [24]:
w, b, l = train(X_train, y_train, bs=100, epochs=1000, lr=0.01)

In [25]:
from sklearn.metrics import accuracy_score,precision_score,recall_score

y_predict = prediction_function(X_test)
print("Accuracy : ",accuracy_score(y_test, y_predict))
print("Precision : ",precision_score(y_test,y_predict))
print("Recall : ",recall_score(y_test,y_predict))

Accuracy :  0.8375
Precision :  0.7586206896551724
Recall :  0.7857142857142857


> ## 3.1 Logistic Regression model using sklearn

In [26]:
from sklearn.linear_model import LogisticRegression
LR = LogisticRegression(random_state = 0)

In [27]:
# Splitting data
from sklearn.model_selection import train_test_split
X_train1, X_test1, y_train1, y_test1 = train_test_split(X,label,test_size = 0.2, random_state=117)

In [28]:
# Sacaling data
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train1 = sc.fit_transform(X_train1)
X_test1 = sc.fit_transform(X_test1)

In [29]:
clf = LR.fit(X_train1,y_train1)

  y = column_or_1d(y, warn=True)


In [30]:
y_pred1 = clf.predict(X_test1)
print(y_pred1)

[1 0 0 0 0 0 1 1 0 0 1 0 0 1 0 1 1 0 0 1 0 0 1 1 1 0 0 0 0 0 0 1 1 1 0 0 0
 0 1 1 0 0 1 0 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 1 1 0 0
 1 1 0 0 0 0]


In [31]:
from sklearn.metrics import accuracy_score

print(accuracy_score(y_test1,y_pred1))

0.8375


In [32]:
# 3 What is the accuracy, precision and recall of the model?
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test1, y_pred1)
print(cm)

[[47  5]
 [ 8 20]]


In [33]:
import sklearn.metrics as metrics
print(metrics.classification_report(y_test1, y_pred1))

              precision    recall  f1-score   support

           0       0.85      0.90      0.88        52
           1       0.80      0.71      0.75        28

    accuracy                           0.84        80
   macro avg       0.83      0.81      0.82        80
weighted avg       0.84      0.84      0.84        80



In [34]:
# 5 Use the model to identify if a person whose age is 28 years and his/her estimated salary is 76000 will purchase a computer?
output = clf.predict([[28,7600]])
# X_test = sc.transform([])
print(output)
#NOT purchase

[1]
