In [1]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
# ------------------------
# Load dataset
# ------------------------
iris = load_iris()
X = pd.DataFrame(data=iris['data'], columns=iris.feature_names)
y = pd.DataFrame(data=iris['target'], columns=['target'])

# Split into train/test
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, shuffle=True, random_state=43
)


In [3]:
# ------------------------
# Train three classifiers (One-vs-Rest)
# ------------------------
def train(X_train, y_train):
    # Classifier for class 0 vs (1,2)
    y0 = (y_train['target'] == 0).astype(int)  
    clf0 = LogisticRegression(max_iter=1000)
    clf0.fit(X_train, y0)

    # Classifier for class 1 vs (0,2)
    y1 = (y_train['target'] == 1).astype(int)
    clf1 = LogisticRegression(max_iter=1000)
    clf1.fit(X_train, y1)

    # Classifier for class 2 vs (0,1)
    y2 = (y_train['target'] == 2).astype(int)
    clf2 = LogisticRegression(max_iter=1000)
    clf2.fit(X_train, y2)

    return clf0, clf1, clf2

clf0, clf1, clf2 = train(X_train, y_train)


In [7]:
# ------------------------
# Predict function (OvR)
# ------------------------
def predict_one_vs_all(X, clf0, clf1, clf2):
    """
    Predict classes using one-vs-rest approach
    
    Parameters:
    X: features to predict
    clf0, clf1, clf2: trained one-vs-rest classifiers
    
    Returns:
    predicted_classes: array of predicted class labels
    """
    # Get probability for "being the class" (class 1 of each classifier)
    p0 = clf0.predict_proba(X)[:, 1]  # probability of being class 0
    p1 = clf1.predict_proba(X)[:, 1]  # probability of being class 1
    p2 = clf2.predict_proba(X)[:, 1]  # probability of being class 2

    # Stack probabilities column-wise
    probs = np.vstack([p0, p1, p2]).T  

    # Choose the class with the highest probability
    classes = np.argmax(probs, axis=1)
    return classes

# Predictions using manual OvR
y_pred_manual = predict_one_vs_all(X_test, clf0, clf1, clf2)

In [11]:
# ------------------------
# Compare with sklearn LogisticRegression (built-in OvR)
# ------------------------
clf_sklearn = LogisticRegression(max_iter=1000)
clf_sklearn.fit(X_train, y_train['target'])
y_pred_sklearn = clf_sklearn.predict(X_test)

print("Manual OvR accuracy:", accuracy_score(y_test, y_pred_manual))
print("Sklearn OvR accuracy:", accuracy_score(y_test, y_pred_sklearn))

print("\nFirst 10 manual OvR predictions:", y_pred_manual[:10])
print("First 10 sklearn OvR predictions:", y_pred_sklearn[:10])
print("True values:", y_test['target'].values[:10])



Manual OvR accuracy: 1.0
Sklearn OvR accuracy: 1.0

First 10 manual OvR predictions: [0 0 2 1 2 0 2 1 1 1]
First 10 sklearn OvR predictions: [0 0 2 1 2 0 2 1 1 1]
True values: [0 0 2 1 2 0 2 1 1 1]
