In [1]:
import pandas as pd
import numpy as np

#  QUESTION 1  
PERFORMING LOGISTIC REGRESSION FROM SCRATCH

In [10]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [11]:
def cost(X, y, theta):
    m = X.shape[0]
    z = np.dot(X, theta)
    h = sigmoid(z)
    return -(1/m) * np.sum(y * np.log(h) + (1 - y) * np.log(1 - h))

In [4]:
def gradient(X, y, theta):
    m = X.shape[0]
    z = np.dot(X, theta)
    h = sigmoid(z)
    return (1/m) * np.dot(X.T, (h - y))

In [5]:
def logistic_regression(X, y, alpha=0.02, iterations=5000):
    theta = np.random.rand(X.shape[1])
    
    for i in range(iterations):
        grad = gradient(X, y, theta)
        theta -= alpha * grad  
    
    return theta

In [6]:
def predict(X, theta):
    h = sigmoid(np.dot(X, theta))
    return [1 if i > 0.5 else 0 for i in h]

def z_score(x):
    return (x - np.mean(x)) / np.std(x)

In [8]:
train_X = np.array([
    [60, 22],
    [62, 25],
    [67, 24],
    [70, 20],
    [71, 15],
    [72, 14],
    [75, 14],
    [78, 11]
])

train_y = np.array([0, 0, 0, 0, 1, 1, 1, 1])

test_X = np.array([
    [61, 23],
    [71, 19],
    [73, 15],
    [79, 13]
])

test_y = np.array([0, 0, 1, 1])

train_X = np.apply_along_axis(z_score, 0, train_X)
test_X = np.apply_along_axis(z_score, 0, test_X)

train_X = np.c_[np.ones(train_X.shape[0]), train_X]
test_X = np.c_[np.ones(test_X.shape[0]), test_X]

theta = logistic_regression(train_X, train_y, alpha=0.02, iterations=5000)

train_preds = predict(train_X, theta)
test_preds = predict(test_X, theta)

In [12]:
print("\nlearned theta values:", theta)
print("\ntraining set predictions:", train_preds)
print("\ntest set predictions:", test_preds)

actual_test_df = pd.DataFrame(test_y, columns=['actual Class'])
predicted_test_df = pd.DataFrame(test_preds, columns=['predicted Class'])

result_test = pd.concat([actual_test_df, predicted_test_df], axis=1)
print("\ncomparison of actual vs predicted (test set):")
print(result_test)


learned theta values: [-0.67646125  2.54337563 -3.89028036]

training set predictions: [0, 0, 0, 0, 1, 1, 1, 1]

test set predictions: [0, 0, 1, 1]

comparison of actual vs predicted (test set):
   actual Class  predicted Class
0             0                0
1             0                0
2             1                1
3             1                1


# QUESTION 2  
PERFORMING LOGISTIC REGRESSION USING sci-kit LEARN LIBRARY

In [3]:
import sklearn
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

In [6]:
train_X = np.array([
    [60, 22],
    [62, 25],
    [67, 24],
    [70, 20],
    [71, 15],
    [72, 14],
    [75, 14],
    [78, 11]
])

train_y = np.array([0, 0, 0, 0, 1, 1, 1, 1])

test_X = np.array([
    [61, 23],
    [71, 19],
    [73, 15],
    [79, 13]
])

test_y = np.array([0, 0, 1, 1])

scaler = StandardScaler()
train_X = scaler.fit_transform(train_X)
test_X = scaler.transform(test_X)

model = LogisticRegression()
model.fit(train_X, train_y)

train_preds = model.predict(train_X)
test_preds = model.predict(test_X)

print("\nlearned coefficients (theta values):", model.coef_)
print("learned intercept:", model.intercept_)
print("\ntraining set predictions:", train_preds)
print("\ntest set predictions:", test_preds)

train_accuracy = accuracy_score(train_y, train_preds)
test_accuracy = accuracy_score(test_y, test_preds)
print("\ntraining set accuracy:", train_accuracy)
print("test set accuracy:", test_accuracy)

actual_test_df = pd.DataFrame(test_y, columns=['actual class'])
predicted_test_df = pd.DataFrame(test_preds, columns=['predicted class'])
result_test = pd.concat([actual_test_df, predicted_test_df], axis=1)
print("\ncomparison of actual vs predicted (Test Set):")
print(result_test)



learned coefficients (theta values): [[ 0.73151791 -1.12802981]]
learned intercept: [-0.03389761]

training set predictions: [0 0 0 0 1 1 1 1]

test set predictions: [0 0 1 1]

training set accuracy: 1.0
test set accuracy: 1.0

comparison of actual vs predicted (Test Set):
   actual class  predicted class
0             0                0
1             0                0
2             1                1
3             1                1
