In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Question 1, 1 vs All

In [52]:
def sigmoid(x):
    return 1/(1 + np.exp(-x))

class LogisticRegression():
    
    def __init__(self, learning_rate=0.01, iterations=1000):
        self.learning_rate = learning_rate
        self.iterations = iterations
        self.weights = None
        self.bias = None
        
    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0
        
        for _ in range(self.iterations):
            linear_predictions = np.dot(X, self.weights) + self.bias
            predictions = sigmoid(linear_predictions)
            
            dW = (1/n_samples) * np.dot(X.T, (predictions - y))
            db = (1/n_samples) * np.sum(predictions - y)
        
            self.weights = self.weights - self.learning_rate*dW
            self.bias = self.bias - self.learning_rate*db
            
    def predict(self, X):
        linear_predictions = np.dot(X, self.weights) + self.bias
        y_pred = sigmoid(linear_predictions)
        prob = sigmoid(linear_predictions)
        final_pred = [0 if y <= 0.5 else 1 for y in y_pred]
        return final_pred, prob

In [5]:
from sklearn.datasets import load_iris
X, y = load_iris(return_X_y=True)

In [6]:
X

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
       [4.9, 3

In [7]:
y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [8]:
setosa = []
versicolor = []
virginica = []

In [66]:
y_s = [1 if i == 0 else 0 for i in y]
y_v = [1 if i == 1 else 0 for i in y]
y_v2 = [1 if i == 2 else 0 for i in y]

In [67]:
print(y_s)
print(y_v)
print(y_v2)

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

In [68]:
modelSetosa = LogisticRegression()
modelSetosa.fit(X, y_s)
modelVersicolor = LogisticRegression()
modelVersicolor.fit(X, y_v)
modelVirginica = LogisticRegression()
modelVirginica.fit(X, y_v2)

In [69]:
setosa, predSetosa = modelSetosa.predict(X)

In [70]:
versicolor, predVersicolor = modelVersicolor.predict(X)

In [71]:
virginica, predVirginica = modelVirginica.predict(X)

In [75]:
predSetosa

array([9.28323906e-01, 8.85795192e-01, 9.10987254e-01, 8.71383117e-01,
       9.32539498e-01, 9.19454418e-01, 9.06303421e-01, 9.08518884e-01,
       8.60932790e-01, 8.86940640e-01, 9.35723297e-01, 8.90444410e-01,
       8.89756422e-01, 9.16489534e-01, 9.70729228e-01, 9.63326345e-01,
       9.53594417e-01, 9.23772655e-01, 9.23417881e-01, 9.32350037e-01,
       8.91737279e-01, 9.21653168e-01, 9.57195826e-01, 8.50327692e-01,
       8.39483825e-01, 8.55854381e-01, 8.82442117e-01, 9.19898786e-01,
       9.23866389e-01, 8.68167586e-01, 8.60531018e-01, 9.06326649e-01,
       9.55152905e-01, 9.64807799e-01, 8.80101045e-01, 9.27765312e-01,
       9.43483545e-01, 9.34998364e-01, 8.87143019e-01, 9.10715096e-01,
       9.31820516e-01, 7.98846266e-01, 9.04270809e-01, 8.78111552e-01,
       8.77506068e-01, 8.76023414e-01, 9.27093312e-01, 8.95861996e-01,
       9.34097718e-01, 9.12996848e-01, 5.44558638e-02, 5.80195524e-02,
       3.44445280e-02, 4.80350672e-02, 3.64424901e-02, 3.88399104e-02,
      

In [76]:
predVersicolor

array([0.15495558, 0.19791346, 0.17584211, 0.19497271, 0.14773974,
       0.13416661, 0.16249561, 0.16763215, 0.20923332, 0.19600067,
       0.14344235, 0.17293845, 0.20040677, 0.18672746, 0.1119966 ,
       0.0971088 , 0.11955471, 0.1532144 , 0.14189846, 0.13563967,
       0.17572599, 0.14101524, 0.13291964, 0.17940743, 0.18756498,
       0.20822815, 0.16851216, 0.15902321, 0.16245667, 0.1906576 ,
       0.19947899, 0.16269432, 0.11889519, 0.10768689, 0.19390412,
       0.17014374, 0.14952161, 0.14971895, 0.19450063, 0.16731508,
       0.14926898, 0.26358506, 0.176833  , 0.15691162, 0.15016561,
       0.19615976, 0.14116852, 0.18101429, 0.14372199, 0.17121364,
       0.34637491, 0.33164643, 0.37247545, 0.42740421, 0.39265671,
       0.39549798, 0.33086538, 0.37092071, 0.38457397, 0.36219505,
       0.44258718, 0.33816468, 0.44885908, 0.39194217, 0.3148489 ,
       0.33877468, 0.3622461 , 0.38697676, 0.47213687, 0.39700883,
       0.34741524, 0.35470747, 0.46078371, 0.41241874, 0.36246

In [77]:
predVirginica

array([0.00975504, 0.01601516, 0.01420709, 0.02028566, 0.00970205,
       0.00969242, 0.01584742, 0.01251245, 0.0239913 , 0.01537007,
       0.00769444, 0.01595009, 0.01583805, 0.01594917, 0.00339342,
       0.00441719, 0.00624821, 0.01054953, 0.00787724, 0.0094679 ,
       0.01191023, 0.01100939, 0.00818695, 0.01975531, 0.02208937,
       0.01862201, 0.01630875, 0.01017759, 0.00980832, 0.01968891,
       0.01979535, 0.0111905 , 0.00608254, 0.00447636, 0.0166142 ,
       0.01041816, 0.00667961, 0.00959407, 0.02004141, 0.01170183,
       0.0101117 , 0.03339723, 0.01735601, 0.01772824, 0.01584834,
       0.01850246, 0.00976821, 0.01693528, 0.00822976, 0.01205949,
       0.25190835, 0.30495201, 0.34366545, 0.43406288, 0.38041224,
       0.44664471, 0.37075795, 0.28019209, 0.31279239, 0.40453043,
       0.37828091, 0.33846918, 0.31684934, 0.43576399, 0.22874562,
       0.24165006, 0.4663419 , 0.29140777, 0.51118884, 0.32114372,
       0.5210771 , 0.26142346, 0.54973172, 0.41501477, 0.27221

In [89]:
final = []
results = [setosa, versicolor, virginica]
probResults = [predSetosa, predVersicolor, predVirginica]
for i in range(len(X)):
    clas = []
    for j in results:
        if(j[i] == 1):
            clas.append(1)
        else:
            clas.append(0)
    m = -1
    max = 0
    for j in range(len(clas)):
        if clas[j] == 1:
            if probResults[j][i] > max:
                max = probResults[j][i]
                m = j
    final.append(m)


In [94]:
for i in range(len(final)):
    if final[i] == -1:
        final[i] = 1
        

In [95]:
from sklearn.metrics import accuracy_score

### Accuracy of model 1vsAll

In [96]:
accuracy_score(final, y)

0.9666666666666667

# Question 2

#### No regularization alpha = 10, iteration = 1000, lamda = 0.2

In [144]:
def sigmoid(x):
    return 1/(1 + np.exp(-x))

class LogisticRegression():
    
    def __init__(self, learning_rate=10, iterations=1000):
        self.learning_rate = learning_rate
        self.iterations = iterations
        self.weights = None
        self.bias = None
        
    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0
        
        for _ in range(self.iterations):
            linear_predictions = np.dot(X, self.weights) + self.bias
            predictions = sigmoid(linear_predictions)
            
            dW = (1/n_samples) * np.dot(X.T, (predictions - y))
            db = (1/n_samples) * np.sum(predictions - y)
        
            self.weights = self.weights - self.learning_rate*dW
            self.bias = self.bias - self.learning_rate*db
            
    def predict(self, X):
        linear_predictions = np.dot(X, self.weights) + self.bias
        y_pred = sigmoid(linear_predictions)
        final_pred = [0 if y <= 0.5 else 1 for y in y_pred]
        return final_pred

In [145]:
class RidgeLogisticRegression():
    
    def __init__(self, learning_rate=10, iterations=1000, lamda=0.2):
        self.learning_rate = learning_rate
        self.iterations = iterations
        self.lamda = lamda
        self.weights = None
        self.bias = None
        
    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0
        
        for _ in range(self.iterations):
            linear_predictions = np.dot(X, self.weights) + self.bias
            predictions = sigmoid(linear_predictions)
            
            dW = (1/n_samples) * np.dot(X.T, (predictions - y))
            db = (1/n_samples) * np.sum(predictions - y)
        
            self.weights = self.weights*(self.learning_rate*self.lamda) - self.learning_rate*dW
            self.bias = self.bias*(self.learning_rate*self.lamda) - self.learning_rate*db
            
    def predict(self, X):
        linear_predictions = np.dot(X, self.weights) + self.bias
        y_pred = sigmoid(linear_predictions)
        final_pred = [0 if y <= 0.5 else 1 for y in y_pred]
        return final_pred

In [146]:
df = pd.read_csv('exam6.txt', names=['test1', 'test2', 'result'])

In [147]:
df

Unnamed: 0,test1,test2,result
0,0.051267,0.699560,1
1,-0.092742,0.684940,1
2,-0.213710,0.692250,1
3,-0.375000,0.502190,1
4,-0.513250,0.465640,1
...,...,...,...
113,-0.720620,0.538740,0
114,-0.593890,0.494880,0
115,-0.484450,0.999270,0
116,-0.006336,0.999270,0


In [148]:
X = df.drop(['result'], axis=1)

In [149]:
y = df['result']

In [150]:
model = LogisticRegression()

In [151]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=7)

In [152]:
model.fit(X_train, y_train)

In [153]:
y_pred = model.predict(X_test)

In [154]:
modelR = RidgeLogisticRegression()

In [155]:
modelR.fit(X_train, y_train)

  return 1/(1 + np.exp(-x))


In [156]:
y_pred2 = modelR.predict(X_test)

  return 1/(1 + np.exp(-x))


In [157]:
from sklearn.metrics import accuracy_score
print(f"Accuracy without regression {accuracy_score(y_pred, y_test)}")
print(f"Accuracy with regression {accuracy_score(y_pred2, y_test)}")

Accuracy without regression 0.5
Accuracy with regression 0.5833333333333334
