<a href="https://colab.research.google.com/github/kanaparthiphani1/Machine-Learning/blob/main/Logistic_Regression_MultiClass_One_vs_Rest.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Imports**

In [4]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris

# $L_2$ Regularized Logistic Regression

## Sigmoid Function
$\hspace{20mm}{\sigma(z)} = \frac{1}{1+e^{-z}}$

In [5]:
def sigmoid_func(z):
  return 1/(1+np.exp(-z))

## Hypothesis Function in Logistic Regression
$\hspace{20mm}H = \sigma(Xw+b)\\[0.1pt]$


In [6]:
def compute_hypothesis(X,w,b):
  z = np.dot(X,w)+b
  return sigmoid_func(z)

## $L_2$ Regularized Cost Function
$\hspace{20mm}J_{w,b}(X)=\frac{-1}{m}\left [Y^Tlog(H)+(1-Y)^Tlog(1-H) \right ]+ \frac{\lambda}{2m}w^Tw\\[0.1pt]$

In [27]:
def L2_cost_func(X,Y,w,b,Lambda):
  H = compute_hypothesis(X,w,b)
  first_term = np.dot(Y.T,np.log(H))
  second_term = np.dot((1-Y).T,np.log(1-H))
  m = X.shape[0]
  combined = (-1/m)*(first_term + second_term)
  regularization_term = (Lambda/(2*m))*(np.dot(w.T,w))
  return (combined+regularization_term).squeeze()

## Gradients of $L_2$ Regularized Cost Function
$\hspace{20mm}\frac{dJ}{dw} = \frac{1}{m}\left [ X^T(H-Y) + \lambda w\right ]$<br><br>$\hspace{20mm}\frac{dJ}{db} = \frac{1}{m}\sum (H-Y) \\[0.1pt]  \\[0.1pt]$

In [8]:
def grad_L2_cost(X,Y,w,b,Lambda):
  H = compute_hypothesis(X,w,b)
  m = X.shape[0]
  dw = (1/m)*(np.dot(X.T,(H-Y)) + Lambda*w)
  db = (1/m)*np.sum(H-Y)
  return dw,db

## Gradient Descent in $L_2$ Regularized Logistic Regression
$\hspace{10mm}w = w - \alpha \frac{dJ}{dw}$<br><br>
$\hspace{10mm}b = b - \alpha \frac{dJ}{db}$<br><br>

In [31]:
def gradient_descent(X,Y,w,b,Lambda,learning_rate,cost_diff_thres):
  cost_diff = cost_diff_thres +1
  prev_cost = L2_cost_func(X,Y,w,b,Lambda)
  i = 0

  while abs(cost_diff) > cost_diff_thres:
    dw,db = grad_L2_cost(X,Y,w,b,Lambda)
    w = w - learning_rate*dw
    b = b - learning_rate*db
    cost = L2_cost_func(X,Y,w,b,Lambda)
    cost_diff = cost - prev_cost
    if cost_diff > 0:
      print("Diverging")
      break
    i = i+1
    prev_cost = cost
  return w,b

## One-vs-Rest for Multi-Class Classification


In [25]:
def one_vs_rest(X, Y, w, b, cost_diff_threshold, learning_rate, Lambda):
  classes = np.unique(Y)
  classes.sort()
  classes_dict = {}

  for i in classes:
    Y_class = np.where(Y==i,1,0)
    w,b = gradient_descent(X,Y_class,w,b,Lambda,learning_rate,cost_diff_threshold)
    classes_dict[i] = [w,b]
  return classes_dict


## Prediction in One-vs-Rest Approach 

In [36]:
def predict_labels_in_one_vs_rest(X, classwise_params_dict):
  classes = sorted(classwise_params_dict.keys())
  classes= np.array(classes)
  m = X.shape[0]
  class_wise_hypos = np.zeros((len(classes),m))

  for idx,i in enumerate(classes):
    params = classwise_params_dict[i]
    w = params[0]
    b = params[1]
    class_wise_hypos[idx] = compute_hypothesis(X,w,b).ravel()

  pred_label_indexes = np.argmax(class_wise_hypos,axis = 0).ravel()
  pred_labels = classes[pred_label_indexes]
  return pred_labels

# Iris Flower Species Prediction

## Data Collection


In [12]:
data = load_iris()
df = pd.DataFrame(data.data, columns=data.feature_names)
df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [13]:
print(data.target_names)
df['species'] = data.target_names[data.target]
df

['setosa' 'versicolor' 'virginica']


Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


In [14]:
df.isna().sum()

sepal length (cm)    0
sepal width (cm)     0
petal length (cm)    0
petal width (cm)     0
species              0
dtype: int64

In [15]:
df[df.duplicated(keep=False)]

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),species
101,5.8,2.7,5.1,1.9,virginica
142,5.8,2.7,5.1,1.9,virginica


In [16]:
df = df.drop_duplicates()

In [None]:
df = df.replace({'setosa': 0, 'versicolor': 1, 'virginica':2})
target_Y = df.pop('species')

iris_X = df.to_numpy()
iris_Y = target_Y.to_numpy().reshape(-1,1)

In [19]:
df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [21]:
def split_train_val_test(X, Y):
    np.random.seed(11235810) 
    data_count = X.shape[0]
    permuted_indices = np.random.permutation(data_count)

    train_ratio = 0.6
    validation_ratio = 0.2
    train_count = np.int(train_ratio * data_count)
    val_count = np.int(validation_ratio * data_count)

    shuffled_X = X[permuted_indices]
    shuffled_Y = Y[permuted_indices]

    train_X = shuffled_X[:train_count]
    train_Y = shuffled_Y[:train_count]

    val_X = shuffled_X[train_count:train_count+val_count]
    val_Y = shuffled_Y[train_count:train_count+val_count]
    
    test_X = shuffled_X[train_count+val_count:]
    test_Y = shuffled_Y[train_count+val_count:]

    return train_X, train_Y, val_X, val_Y, test_X, test_Y

In [22]:
train_X, train_Y, val_X, val_Y, test_X, test_Y = split_train_val_test(iris_X, iris_Y)

In [23]:
n = train_X.shape[1]
w = np.zeros((n,1))
b = 0
cost_diff_threshold = 1e-7
learning_rate = 0.001
Lambda = 0

In [32]:
classwise_params_dict = one_vs_rest(train_X, train_Y, w, b, cost_diff_threshold, learning_rate, Lambda)

In [34]:
def predict_and_evaluate(X, Y, classwise_params_dict):
    pred_Y = predict_labels_in_one_vs_rest(X, classwise_params_dict)
    correct_predictions = np.count_nonzero(pred_Y.ravel() == Y.ravel())
    accuracy = correct_predictions/len(Y)
    return accuracy   

In [37]:
accuracy = predict_and_evaluate(val_X, val_Y, classwise_params_dict)
print("Accuracy on Validation Data: ", accuracy)

Accuracy on Validation Data:  1.0
