In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import normalize
import scipy.sparse
from sklearn import datasets
from sklearn.linear_model import LogisticRegression

In [2]:
iris = datasets.load_iris()
list(iris.keys())
X = iris["data"] # petal width
Y = iris["target"]

In [4]:
X = normalize(X, norm='l2')
def oneHotIt(Y):
    m = Y.shape[0]
    OHX = scipy.sparse.csr_matrix((np.ones(m), (Y, np.array(range(m)))))
    OHX = np.array(OHX.todense()).T
    return OHX
y = oneHotIt(Y)

In [5]:
x_train = X[0:140]
y_train = y[0:140]
x_test = X[140:]
y_test = y[140:]


In [6]:
class LogisticRegression:
    def __init__(self, epochs = 10):
        self.epochs = epochs
        self.theta = np.random.rand(4,3)
        self.eta = 0.1 # learning rate
           #m = y.shape[0]

    
    def train(self, X, y):
        m = y.shape[0]
        for iteration in range(self.epochs):
            Sig_inp = X.dot(self.theta)
            H_theta = 1/(1 + np.exp(-Sig_inp))
            err = H_theta - y
            SE = err * err
            MSE = np.mean(SE)
           # print("MSE after:", iteration, "iterations", MSE)
            gradients = 2/m * X.T.dot(err)
            self.theta = self.theta - self.eta * (gradients)
           
        
    def predict(self, X):
        
        theta_best = self.theta
        y_predict = X.dot(theta_best)
        y_predict = 1/(1 + np.exp(-y_predict))
        return y_predict
    
    
    def error(self, y_predict, y):
        error = np.square(np.subtract(y,y_predict)).mean() # root mean square error
        return error
    
    
    def accuracy(self, y_predict, y):
        accuracy = sum(y_predict == y) / (float(len(y)))
        return accuracy

In [7]:
class SoftmaxRegression:
    """
    Class representing a softmax regression model.
    Capable of performing multiclass classfication.
    Parameters
    ----------
    n_iter : float, default=3000
        Maximum number of iterations to be used by batch gradient descent.
    lr : float, default=1e-1
        Learning rate determining the size of steps in batch gradient descent.
    Attributes 
    ----------
    coef_ : array of shape (n_features,)
        Estimated coefficients of each feature and intercept.  
    """
    def __init__(self, epochs=3000, eta=1e-1):
        self.epochs = epochs
        self.eta = eta
        self.theta  = np.random.rand(5,1)
        
    def accuracy(self, y_predict, y):
    
        # calculating the prediction accuracy
        accuracy = sum(y_predict == y) / (float(len(y)))

        return accuracy
    def error(self,y_predict, y):
        error = np.square(np.subtract(y,y_predict)).mean() # root mean square error
        return error

    def train(self, X_train_b1, y):
        m = y.shape[0] + 1
        for iteration in range(self.epochs):
            Sig_inp = X_train_b1.dot(self.theta)
            H_theta = 1 / (1 + np.exp(-Sig_inp))   # Sigmoidal function, Sig_inp = X.m or X.Theta
            err = H_theta - y
            errm = err * err
            mean_err = np.mean(errm)
            # print("MSE after:", iteration, "iterations", mean_err)
            gradients = 2 / m * X_train_b1.T.dot(err)
            self.theta = self.theta - self.eta * (gradients)           # updating the weights or downhill steps (recall the downhill problem)

    def predict(self, X_train_b1):
        theta_best = self.theta
        y_predict = X_train_b1.dot(theta_best)
        return y_predict

    def predict_proba(self, y_predict):
        for i in range(y_predict.shape[0]):
            for j in range(y_predict.shape[1]):

                if y_predict[i][j]<0.5:
                    y_predict[i][j]=0
                else:
                    y_predict[i][j]=1

In [8]:
x_train.shape

(140, 4)

In [10]:
ola = LogisticRegression()
ola.train(x_train,y_train)
y_predict = ola.predict(x_train)
ola.error(y_predict, y_train)

0.28262016846670224

### Predicting With Gradient Descent 
gradient descent calculates the wieghts (m) by finding the more efficient weights that can minimize the errors calculated at each turn therefore producing a better prediction . with each iteration gradient descent tries to find a weight that brings the predicted outcome closer to the actual outcome .

* logistic regression is a widely used algorithm for classification, it uses a sigmoid function to calculate an output between 1 and 0 . its an algorithm best used in cases of classifications

* Linear regression find the predicted outcome by using the line of best fit method, it uses the calculated weight and bias to predict a linear extention of the trained data (i.e the linear relationship between the independent variables)


## Computing Error For multiple iterations

In [17]:
ola = LogisticRegression(epochs=100)
ola.train(x_train,y_train)
y_predict = ola.predict(x_train)
ola.error(y_predict, y_train)

0.183625053187671

* Error After 1000 iterations

In [18]:
ola = LogisticRegression(epochs=1000)
ola.train(x_train,y_train)
y_predict = ola.predict(x_train)
ola.error(y_predict, y_train)

0.1120165124208923

* Error After 10000 iterations

In [19]:
ola = LogisticRegression(epochs=10000)
ola.train(x_train,y_train)
y_predict = ola.predict(x_train)
ola.error(y_predict, y_train)

0.0815384359270191

## Testing For testing dataset 

In [20]:

y_predict = ola.predict(x_test)
ola.error(y_predict, y_test)

0.10988848047719155

In [21]:
y_predict

array([[0.00250748, 0.47145658, 0.75320026],
       [0.00628087, 0.44683757, 0.50905818],
       [0.0019469 , 0.51114336, 0.78243834],
       [0.0021695 , 0.49099058, 0.77144771],
       [0.00268538, 0.43043427, 0.76386173],
       [0.00410978, 0.46578383, 0.62456665],
       [0.00259583, 0.57677227, 0.66773116],
       [0.00425435, 0.48231632, 0.59357206],
       [0.00365314, 0.37092823, 0.72857725],
       [0.00353866, 0.45619113, 0.66599687]])

# Task 2

### Import iris dataset

In [22]:
iris = datasets.load_iris()
list(iris.keys())
X = iris["data"]

### Diffrrence Between Softmax Regression and Logistic Regression
Softmax Regression handles multiple classification cases while logistic regression is designed for binary classfication

In [23]:
#2: Extract the training data
X_train =X[0:99] # we have used the first 100 samples. first 50 belongs to class 1 and next 50 belongs to class 2

#3: Split the data into training and testing
X_train =X[0:89]   # trainig data: here we want to use first 90 samples for training so that we can use the last 10 samples for testing
X_test = X[90:99] # testing data

#4. Normalizing the data
X_train = normalize(X_train, norm='l2')
X_train = X_train.T
X_test = normalize(X_test, norm='l2')
X_test = X_test.T


#5. Extract the output from the dataset
y = iris["target"]
y_train=y[0:89]
y_test = y[90:99] 

#6. Reshaping the output
y_train=np.reshape(y_train, (1, 89)) #
y_train=y_train.T
y_test = np.reshape(y_test, (1, 9))
y_test = y_test.T

# 7. Normalizing the output
y_train = normalize(y_train, norm='l2')
y_test = normalize(y_test, norm='l2')

In [24]:
#8. Adding bias to the model
ones = np.ones((1, 89))
X_train_b1 = np.append(ones, X_train, axis=0)
X_train_b1 = X_train_b1.T
zeros = np.ones((1,9))
X_test_b1 = np.append(zeros, X_test, axis=0).T

## Computing Errors For the Following iterations

#### Error After 10 iterations , this is just a little effort to compute the efficient weights for prediction
* Error is quite high 

In [25]:
ola = SoftmaxRegression(epochs=10)
ola.train(X_train_b1,y_train)
y_pred = ola.predict(X_train_b1)
ola.predict_proba(y_pred)
ola.error(y_pred,y_train)

0.5617977528089888

#### Error After 100 iterations 
* Error reduces but stilll quite high

In [26]:
ola = SoftmaxRegression(epochs=100)
ola.train(X_train_b1,y_train)
y_pred = ola.predict(X_train_b1)
ola.predict_proba(y_pred)
ola.error(y_pred,y_train)

0.43820224719101125

#### Error After 1000 iterations 

* Error is 0  , that is , the model is predicting exactly the same as the test
* Observation : Error is 0 , this means the training is overfitting 

In [45]:
ola = SoftmaxRegression(epochs=1000)
ola.train(X_train_b1,y_train)
y_pred = ola.predict(X_train_b1)
ola.predict_proba(y_pred)
ola.error(y_pred,y_train)

0.0

#### Error After 10000 iterations 
* Error is 0  , that is , the model is predicting exactly the same as the test
* The gradient descent has reached its optimal

In [46]:
ola = SoftmaxRegression(epochs=10000)
ola.train(X_train_b1,y_train)
y_pred = ola.predict(X_t_b1)
ola.predict_proba(y_pred)
ola.error(y_pred,y_test)

0.0