In [3]:
# 1. Load the DataSet
import pandas as pd
from sklearn.datasets import load_iris

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Convert to DataFrame for better visualization
iris_df = pd.DataFrame(X, columns=iris.feature_names)
iris_df['species'] = y
iris_df.tail()
iris_df.info


<bound method DataFrame.info of      sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \
0                  5.1               3.5                1.4               0.2   
1                  4.9               3.0                1.4               0.2   
2                  4.7               3.2                1.3               0.2   
3                  4.6               3.1                1.5               0.2   
4                  5.0               3.6                1.4               0.2   
..                 ...               ...                ...               ...   
145                6.7               3.0                5.2               2.3   
146                6.3               2.5                5.0               1.9   
147                6.5               3.0                5.2               2.0   
148                6.2               3.4                5.4               2.3   
149                5.9               3.0                5.1               1.8

In [4]:
# 2. Preprocessing the Data

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [5]:
print(X_train)

[[-1.47393679  1.20365799 -1.56253475 -1.31260282]
 [-0.13307079  2.99237573 -1.27600637 -1.04563275]
 [ 1.08589829  0.08570939  0.38585821  0.28921757]
 [-1.23014297  0.75647855 -1.2187007  -1.31260282]
 [-1.7177306   0.30929911 -1.39061772 -1.31260282]
 [ 0.59831066 -1.25582892  0.72969227  0.95664273]
 [ 0.72020757  0.30929911  0.44316389  0.4227026 ]
 [-0.74255534  0.98006827 -1.27600637 -1.31260282]
 [-0.98634915  1.20365799 -1.33331205 -1.31260282]
 [-0.74255534  2.32160658 -1.27600637 -1.44608785]
 [-0.01117388 -0.80864948  0.78699794  0.95664273]
 [ 0.23261993  0.75647855  0.44316389  0.55618763]
 [ 1.08589829  0.08570939  0.55777524  0.4227026 ]
 [-0.49876152  1.87442714 -1.39061772 -1.04563275]
 [-0.49876152  1.4272477  -1.27600637 -1.31260282]
 [-0.37686461 -1.47941864 -0.01528151 -0.24472256]
 [ 0.59831066 -0.58505976  0.78699794  0.4227026 ]
 [ 0.72020757  0.08570939  1.01622064  0.8231577 ]
 [ 0.96400139 -0.13788033  0.38585821  0.28921757]
 [ 1.69538284  1.20365799  1.36

In [6]:
# 3. Define the Hypothesis (Model)  l
# logistic regression model for multi-class classification.

import numpy as np

# Add intercept term to X
X_train = np.c_[np.ones(X_train.shape[0]), X_train]
X_test = np.c_[np.ones(X_test.shape[0]), X_test]

# Initialize parameters
n_features = X_train.shape[1]
n_classes = len(np.unique(y_train))
theta = np.zeros((n_classes, n_features))


In [8]:
print(n_features)
print(n_classes)
print(theta)

5
3
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]


In [9]:
# 4. Define the Cost Function
# use the cross-entropy loss function for multi-class classification.

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
    return exp_z / exp_z.sum(axis=1, keepdims=True)

def compute_cost(X, y, theta):
    m = len(y)
    h = softmax(X.dot(theta.T))
    epsilon = 1e-5  # Small constant to prevent division by zero
    y_one_hot = np.eye(n_classes)[y]
    cost = -1/m * np.sum(y_one_hot * np.log(h + epsilon))
    return cost


In [10]:
print(sigmoid)

<function sigmoid at 0x000001D5C45D6980>


In [11]:
# 5. Implement the Optimizer
# Gradient Descent to minimize the cost function.

def gradient_descent(X, y, theta, learning_rate, num_iterations):
    m = len(y)
    cost_history = np.zeros(num_iterations)
    y_one_hot = np.eye(n_classes)[y]

    for i in range(num_iterations):
        h = softmax(X.dot(theta.T))
        gradients = -1/m * (y_one_hot - h).T.dot(X)
        theta = theta - learning_rate * gradients
        cost_history[i] = compute_cost(X, y, theta)

    return theta, cost_history


In [12]:
# 6. Train the Model

# Define hyperparameters
learning_rate = 0.01
num_iterations = 1000

# Train the model
theta_optimal, cost_history = gradient_descent(X_train, y_train, theta, learning_rate, num_iterations)

print("Optimal parameters:", theta_optimal)
print("Cost history over iterations:", cost_history)


Optimal parameters: [[-0.23377331 -0.56429005  0.65882701 -0.85948687 -0.80544139]
 [ 0.47556263  0.14255275 -0.59934182  0.12501013 -0.11515537]
 [-0.24178932  0.4217373  -0.05948519  0.73447675  0.92059676]]
Cost history over iterations: [1.08913578 1.07986424 1.07076465 1.06183397 1.05306921 1.04446737
 1.03602547 1.02774056 1.01960971 1.01163001 1.00379858 0.99611257
 0.98856915 0.98116554 0.97389897 0.96676672 0.95976609 0.95289444
 0.94614913 0.9395276  0.9330273  0.92664573 0.92038041 0.91422894
 0.90818892 0.90225801 0.89643391 0.89071435 0.88509712 0.87958004
 0.87416096 0.8688378  0.86360848 0.858471   0.85342338 0.84846368
 0.84359    0.83880049 0.83409333 0.82946673 0.82491895 0.82044829
 0.81605308 0.81173168 0.80748251 0.80330399 0.7991946  0.79515286
 0.7911773  0.7872665  0.78341906 0.77963362 0.77590886 0.77224348
 0.7686362  0.76508578 0.76159103 0.75815074 0.75476378 0.75142901
 0.74814533 0.74491167 0.74172697 0.73859022 0.7355004  0.73245655
 0.72945771 0.72650295 

In [16]:
# 7. Evaluate the Model

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Predict function
def predict(X, theta):
    probabilities = softmax(X.dot(theta.T))
    return np.argmax(probabilities, axis=1)

# Make predictions on the test set
y_pred = predict(X_test, theta_optimal)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", class_report)




Accuracy: 0.9666666666666667
Confusion Matrix:
 [[10  0  0]
 [ 0  8  1]
 [ 0  0 11]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      0.89      0.94         9
           2       0.92      1.00      0.96        11

    accuracy                           0.97        30
   macro avg       0.97      0.96      0.97        30
weighted avg       0.97      0.97      0.97        30

