In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    roc_auc_score,
)
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
print("1. Loading the Iris dataset...")
iris = load_iris()
X = iris.data
y = iris.target
feature_names = iris.feature_names
target_names = iris.target_names

print(f"Features (X) shape: {X.shape}")
print(f"Target (y) shape: {y.shape}")
print(f"Feature names: {feature_names}")
print(f"Target names: {target_names}")
print("\nDataset description:")
print(iris.DESCR)


1. Loading the Iris dataset...
Features (X) shape: (150, 4)
Target (y) shape: (150,)
Feature names: ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
Target names: ['setosa' 'versicolor' 'virginica']

Dataset description:
.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

:Number of Instances: 150 (50 in each of three classes)
:Number of Attributes: 4 numeric, predictive attributes and the class
:Attribute Information:
    - sepal length in cm
    - sepal width in cm
    - petal length in cm
    - petal width in cm
    - class:
            - Iris-Setosa
            - Iris-Versicolour
            - Iris-Virginica

:Summary Statistics:

                Min  Max   Mean    SD   Class Correlation
sepal length:   4.3  7.9   5.84   0.83    0.7826
sepal width:    2.0  4.4   3.05   0.43   -0.4194
petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

:Missing A

In [3]:
print("2. Splitting data into training and testing sets (80% train, 20% test)...")
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
) # stratify=y ensures equal distribution of classes in train/test sets

print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"y_test shape: {y_test.shape}")

2. Splitting data into training and testing sets (80% train, 20% test)...
X_train shape: (120, 4)
X_test shape: (30, 4)
y_train shape: (120,)
y_test shape: (30,)


In [4]:
print("3. Training a Logistic Regression model...")
# Increased max_iter for convergence on some datasets/sklearn versions
# 'lbfgs' is a good default solver for multiclass problems
model = LogisticRegression(max_iter=200, random_state=42, solver='lbfgs')
model.fit(X_train, y_train)
print("Model training complete.")

3. Training a Logistic Regression model...
Model training complete.


In [5]:
print("4. Making predictions on the test set...")
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)

4. Making predictions on the test set...


In [6]:
print("5. Classification Metrics:")
# A. Accuracy Score
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy Score: {accuracy:.4f}")
precision = precision_score(y_test, y_pred, average='macro')
print(f"Precision (Weighted): {precision:.4f}")

# Recall (weighted average)
recall = recall_score(y_test, y_pred, average='macro')
print(f"Recall (Weighted): {recall:.4f}")

# F1-Score (weighted average)
f1 = f1_score(y_test, y_pred, average='macro')
print(f"F1-Score (Weighted): {f1:.4f}")

5. Classification Metrics:
Accuracy Score: 0.9667
Precision (Weighted): 0.9697
Recall (Weighted): 0.9667
F1-Score (Weighted): 0.9666


In [7]:
#commit again

In [8]:
#can you see this comment?

In [1]:
#this is a new commit made at eleventh commit

In [None]:
# first comment

In [2]:
# second comment

In [3]:
# third comment

In [4]:
# fourth comment

In [5]:
# fifth comment

In [6]:
# sixth comment