In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [6]:
# Step 1: Load the Iris dataset
df = pd.read_csv(r"C:\Users\Nidhi\Desktop\sem 7\ml\ML datasets\ML datasets\Iris\Iris.csv")

In [7]:
# Step 2: Encode the species column to numerical values (for simplicity let's assume binary classification, Iris-setosa = 0)
df['Species'] = df['Species'].apply(lambda x: 0 if x == 'Iris-setosa' else 1)

In [8]:
# Features and target
X = df[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']].values
y = df['Species'].values

In [9]:
# Step 3: Normalize the data (since logistic regression converges faster with normalized data)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [10]:
# Step 4: Split the data into training and test sets (since the dataset is small, we'll use all data for training)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [11]:
# Sigmoid function inline
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [12]:
# Step 5: Initialize parameters for logistic regression
m_train, n_features = X_train.shape
theta = np.zeros(n_features)  # weights for each feature
bias = 0                      # bias term
learning_rate = 0.01
iterations = 1000

In [13]:
# Step 6: Training logistic regression using gradient descent
for _ in range(iterations):
    # Compute linear combination of inputs and weights
    linear_model = np.dot(X_train, theta) + bias
    
    # Apply sigmoid function to get predicted probabilities
    y_predicted = 1 / (1 + np.exp(-linear_model))  # sigmoid
    
    # Compute gradients for weights and bias
    d_theta = (1 / m_train) * np.dot(X_train.T, (y_predicted - y_train))  # gradient of the cost w.r.t. theta
    d_bias = (1 / m_train) * np.sum(y_predicted - y_train)                # gradient of the cost w.r.t. bias
    
    # Update weights and bias using gradient descent
    theta -= learning_rate * d_theta
    bias -= learning_rate * d_bias

In [14]:
# Step 7: Making predictions on the training set
linear_model_train = np.dot(X_train, theta) + bias
y_pred_train_probs = 1 / (1 + np.exp(-linear_model_train))  # apply sigmoid
y_pred_train = [1 if prob > 0.5 else 0 for prob in y_pred_train_probs]  # convert probabilities to binary predictions

In [15]:
# Since this is a very small dataset, we'll use the same dataset for predictions
linear_model_test = np.dot(X_test, theta) + bias
y_pred_test_probs = 1 / (1 + np.exp(-linear_model_test))  # apply sigmoid
y_pred_test = [1 if prob > 0.5 else 0 for prob in y_pred_test_probs]  # convert probabilities to binary predictions

In [16]:
# Step 8: Calculate accuracy
train_accuracy = np.mean(y_pred_train == y_train)
test_accuracy = np.mean(y_pred_test == y_test)

In [17]:
# Output the accuracy
print("Training Accuracy:", train_accuracy)
print("Test Accuracy:", test_accuracy)

Training Accuracy: 1.0
Test Accuracy: 1.0


In [18]:
from sklearn.metrics import accuracy_score
print(accuracy_score(y_pred_test,y_test))

1.0
