In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load the Iris dataset
df = pd.read_csv('Iris.csv')

# Display basic information about the dataset
print("Dataset shape:", df.shape)
print("\nFirst few rows:")
print(df.head())
print("\nDataset info:")
print(df.info())
print("\nUnique species:")
print(df['Species'].value_counts())
print("\nBasic statistics:")
print(df.describe())

Dataset shape: (150, 6)

First few rows:
   Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm      Species
0   1            5.1           3.5            1.4           0.2  Iris-setosa
1   2            4.9           3.0            1.4           0.2  Iris-setosa
2   3            4.7           3.2            1.3           0.2  Iris-setosa
3   4            4.6           3.1            1.5           0.2  Iris-setosa
4   5            5.0           3.6            1.4           0.2  Iris-setosa

Dataset info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             150 non-null    int64  
 1   SepalLengthCm  150 non-null    float64
 2   SepalWidthCm   150 non-null    float64
 3   PetalLengthCm  150 non-null    float64
 4   PetalWidthCm   150 non-null    float64
 5   Species        150 non-null    object 
dtypes: float64(4), int64(

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter

# Load and prepare the data
df = pd.read_csv('Iris.csv')

# Prepare features and labels
X = df[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']].values
y_original = df['Species'].values

# Create label encoding
species_to_num = {'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2}
y = np.array([species_to_num[species] for species in y_original])

print("Data prepared successfully!")
print(f"Feature matrix shape: {X.shape}")
print(f"Labels shape: {y.shape}")
print(f"Classes: {np.unique(y)}")

Data prepared successfully!
Feature matrix shape: (150, 4)
Labels shape: (150,)
Classes: [0 1 2]


In [4]:
# Helper functions for data preprocessing
def train_test_split_custom(X, y, test_size=0.3, random_state=42):
    """Custom train-test split implementation"""
    np.random.seed(random_state)
    n_samples = len(X)
    n_test = int(n_samples * test_size)
    
    # Random indices for test set
    test_indices = np.random.choice(n_samples, n_test, replace=False)
    train_indices = np.array([i for i in range(n_samples) if i not in test_indices])
    
    X_train, X_test = X[train_indices], X[test_indices]
    y_train, y_test = y[train_indices], y[test_indices]
    
    return X_train, X_test, y_train, y_test

def standardize_features(X_train, X_test):
    """Standardize features using training set statistics"""
    mean = np.mean(X_train, axis=0)
    std = np.std(X_train, axis=0)
    
    X_train_scaled = (X_train - mean) / std
    X_test_scaled = (X_test - mean) / std
    
    return X_train_scaled, X_test_scaled, mean, std

# Split the data
X_train, X_test, y_train, y_test = train_test_split_custom(X, y, test_size=0.3, random_state=42)

# Standardize features
X_train_scaled, X_test_scaled, feature_mean, feature_std = standardize_features(X_train, X_test)

print("Data preprocessing completed!")
print(f"Training set shape: {X_train_scaled.shape}")
print(f"Test set shape: {X_test_scaled.shape}")
print(f"Training labels distribution: {np.bincount(y_train)}")
print(f"Test labels distribution: {np.bincount(y_test)}")

Data preprocessing completed!
Training set shape: (105, 4)
Test set shape: (45, 4)
Training labels distribution: [31 37 37]
Test labels distribution: [19 13 13]


In [5]:
# 1. LOGISTIC REGRESSION FROM SCRATCH (Multi-class using One-vs-Rest)

class LogisticRegressionScratch:
    def __init__(self, learning_rate=0.01, max_iterations=1000, tolerance=1e-6):
        self.learning_rate = learning_rate
        self.max_iterations = max_iterations
        self.tolerance = tolerance
        self.weights = None
        self.bias = None
        
    def sigmoid(self, z):
        """Sigmoid activation function with numerical stability"""
        z = np.clip(z, -500, 500)  # Prevent overflow
        return 1 / (1 + np.exp(-z))
    
    def fit_binary(self, X, y):
        """Fit binary logistic regression"""
        n_samples, n_features = X.shape
        
        # Initialize weights and bias
        self.weights = np.random.normal(0, 0.01, n_features)
        self.bias = 0
        
        # Gradient descent
        for i in range(self.max_iterations):
            # Forward pass
            linear_pred = np.dot(X, self.weights) + self.bias
            predictions = self.sigmoid(linear_pred)
            
            # Compute cost
            cost = self.compute_cost(y, predictions)
            
            # Compute gradients
            dw = (1/n_samples) * np.dot(X.T, (predictions - y))
            db = (1/n_samples) * np.sum(predictions - y)
            
            # Update parameters
            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db
            
            # Check convergence
            if i > 0 and abs(prev_cost - cost) < self.tolerance:
                break
            prev_cost = cost
            
        return self
    
    def compute_cost(self, y_true, y_pred):
        """Compute logistic regression cost"""
        y_pred = np.clip(y_pred, 1e-15, 1 - 1e-15)  # Prevent log(0)
        return -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
    
    def predict_proba_binary(self, X):
        """Predict probabilities for binary classification"""
        linear_pred = np.dot(X, self.weights) + self.bias
        return self.sigmoid(linear_pred)

class MultiClassLogisticRegression:
    def __init__(self, learning_rate=0.01, max_iterations=1000, tolerance=1e-6):
        self.learning_rate = learning_rate
        self.max_iterations = max_iterations
        self.tolerance = tolerance
        self.binary_classifiers = {}
        self.classes = None
        
    def fit(self, X, y):
        """Fit multi-class logistic regression using One-vs-Rest"""
        self.classes = np.unique(y)
        
        for class_label in self.classes:
            # Create binary labels
            binary_y = (y == class_label).astype(int)
            
            # Train binary classifier
            binary_clf = LogisticRegressionScratch(
                learning_rate=self.learning_rate,
                max_iterations=self.max_iterations,
                tolerance=self.tolerance
            )
            binary_clf.fit_binary(X, binary_y)
            
            self.binary_classifiers[class_label] = binary_clf
            
        return self
    
    def predict_proba(self, X):
        """Predict class probabilities"""
        n_samples = X.shape[0]
        n_classes = len(self.classes)
        probabilities = np.zeros((n_samples, n_classes))
        
        for i, class_label in enumerate(self.classes):
            probabilities[:, i] = self.binary_classifiers[class_label].predict_proba_binary(X)
        
        # Normalize probabilities (softmax-like normalization)
        probabilities = probabilities / np.sum(probabilities, axis=1, keepdims=True)
        return probabilities
    
    def predict(self, X):
        """Predict class labels"""
        probabilities = self.predict_proba(X)
        return self.classes[np.argmax(probabilities, axis=1)]

# Train logistic regression model
print("Training Multi-class Logistic Regression from scratch...")
log_reg = MultiClassLogisticRegression(learning_rate=0.1, max_iterations=1000)
log_reg.fit(X_train_scaled, y_train)

# Make predictions
y_pred_log_reg = log_reg.predict(X_test_scaled)
y_pred_proba_log_reg = log_reg.predict_proba(X_test_scaled)

print("Logistic Regression training completed!")
print(f"Predictions shape: {y_pred_log_reg.shape}")
print(f"Probability predictions shape: {y_pred_proba_log_reg.shape}")

Training Multi-class Logistic Regression from scratch...
Logistic Regression training completed!
Predictions shape: (45,)
Probability predictions shape: (45, 3)


In [6]:
# 3. IMPLEMENT F1-SCORE METRICS FROM SCRATCH

class F1ScoreMetrics:
    def __init__(self):
        pass
    
    def confusion_matrix_multiclass(self, y_true, y_pred, num_classes):
        """Create confusion matrix for multi-class classification"""
        cm = np.zeros((num_classes, num_classes), dtype=int)
        
        for true_label, pred_label in zip(y_true, y_pred):
            cm[true_label][pred_label] += 1
        
        return cm
    
    def precision_recall_f1_per_class(self, y_true, y_pred, num_classes):
        """Calculate precision, recall, and F1-score for each class"""
        cm = self.confusion_matrix_multiclass(y_true, y_pred, num_classes)
        
        precision_per_class = np.zeros(num_classes)
        recall_per_class = np.zeros(num_classes)
        f1_per_class = np.zeros(num_classes)
        
        for i in range(num_classes):
            # True Positives for class i
            tp = cm[i, i]
            
            # False Positives for class i (predicted as i but actually not i)
            fp = np.sum(cm[:, i]) - tp
            
            # False Negatives for class i (actually i but predicted as something else)
            fn = np.sum(cm[i, :]) - tp
            
            # Calculate precision, recall, and F1-score
            precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
            recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
            f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0.0
            
            precision_per_class[i] = precision
            recall_per_class[i] = recall
            f1_per_class[i] = f1
        
        return precision_per_class, recall_per_class, f1_per_class, cm
    
    def macro_f1_score(self, y_true, y_pred, num_classes):
        """Calculate Macro F1-score"""
        _, _, f1_per_class, _ = self.precision_recall_f1_per_class(y_true, y_pred, num_classes)
        return np.mean(f1_per_class)
    
    def micro_f1_score(self, y_true, y_pred, num_classes):
        """Calculate Micro F1-score"""
        cm = self.confusion_matrix_multiclass(y_true, y_pred, num_classes)
        
        # Calculate total TP, FP, FN across all classes
        total_tp = np.sum(np.diag(cm))
        total_fp = np.sum(cm) - total_tp  # All off-diagonal elements
        total_fn = total_fp  # In multi-class, total FP = total FN  
        
        # Calculate micro precision, recall, and F1
        micro_precision = total_tp / (total_tp + total_fp) if (total_tp + total_fp) > 0 else 0.0
        micro_recall = total_tp / (total_tp + total_fn) if (total_tp + total_fn) > 0 else 0.0
        micro_f1 = 2 * (micro_precision * micro_recall) / (micro_precision + micro_recall) if (micro_precision + micro_recall) > 0 else 0.0
        
        return micro_f1
    
    def weighted_f1_score(self, y_true, y_pred, num_classes):
        """Calculate Weighted F1-score"""
        _, _, f1_per_class, _ = self.precision_recall_f1_per_class(y_true, y_pred, num_classes)
        
        # Calculate class weights based on support (number of samples per class)
        class_counts = np.bincount(y_true, minlength=num_classes)
        total_samples = len(y_true)
        class_weights = class_counts / total_samples
        
        # Calculate weighted F1-score
        weighted_f1 = np.sum(f1_per_class * class_weights)
        
        return weighted_f1

# Initialize F1-score calculator
f1_calculator = F1ScoreMetrics()

# Calculate F1-scores for Logistic Regression
print("=== F1-SCORE METRICS FOR LOGISTIC REGRESSION ===")
num_classes = len(np.unique(y_test))

precision_lr, recall_lr, f1_lr, cm_lr = f1_calculator.precision_recall_f1_per_class(y_test, y_pred_log_reg, num_classes)
macro_f1_lr = f1_calculator.macro_f1_score(y_test, y_pred_log_reg, num_classes)
micro_f1_lr = f1_calculator.micro_f1_score(y_test, y_pred_log_reg, num_classes)
weighted_f1_lr = f1_calculator.weighted_f1_score(y_test, y_pred_log_reg, num_classes)

print("Per-class metrics:")
for i in range(num_classes):
    print(f"Class {i}: Precision={precision_lr[i]:.4f}, Recall={recall_lr[i]:.4f}, F1={f1_lr[i]:.4f}")

print(f"\nMacro F1-score: {macro_f1_lr:.4f}")
print(f"Micro F1-score: {micro_f1_lr:.4f}")
print(f"Weighted F1-score: {weighted_f1_lr:.4f}")

print(f"\nConfusion Matrix:")
print(cm_lr)

=== F1-SCORE METRICS FOR LOGISTIC REGRESSION ===
Per-class metrics:
Class 0: Precision=1.0000, Recall=1.0000, F1=1.0000
Class 1: Precision=1.0000, Recall=0.6923, F1=0.8182
Class 2: Precision=0.7647, Recall=1.0000, F1=0.8667

Macro F1-score: 0.8949
Micro F1-score: 0.9111
Weighted F1-score: 0.9090

Confusion Matrix:
[[19  0  0]
 [ 0  9  4]
 [ 0  0 13]]


In [None]:
# 4. IMPLEMENT ROC CURVE AND AUC FROM SCRATCH

class ROCCurveAUC:
    def __init__(self):
        pass
    
    def roc_curve_binary(self, y_true_binary, y_scores):   #it is tpr vs fpr
        """Calculate ROC curve for binary classification"""
        # Sort scores in descending order
        sorted_indices = np.argsort(y_scores)[::-1]
        y_true_sorted = y_true_binary[sorted_indices]
        y_scores_sorted = y_scores[sorted_indices]
        
        # Get unique thresholds
        thresholds = np.unique(y_scores_sorted)
        thresholds = np.concatenate([[thresholds[0] + 1], thresholds, [thresholds[-1] - 1]])
        
        # Initialize arrays for TPR and FPR
        tpr_values = []
        fpr_values = []
        
        # Calculate number of positive and negative samples
        n_pos = np.sum(y_true_binary)
        n_neg = len(y_true_binary) - n_pos
        
        for threshold in thresholds:
            # Make predictions based on threshold
            y_pred = (y_scores >= threshold).astype(int)
            
            # Calculate True Positives, False Positives, True Negatives, False Negatives
            tp = np.sum((y_pred == 1) & (y_true_binary == 1))
            fp = np.sum((y_pred == 1) & (y_true_binary == 0))
            
            # Calculate TPR and FPR
            tpr = tp / n_pos if n_pos > 0 else 0
            fpr = fp / n_neg if n_neg > 0 else 0
            
            tpr_values.append(tpr)
            fpr_values.append(fpr)
        
        return np.array(fpr_values), np.array(tpr_values), thresholds
    
    def auc_trapezoid(self, fpr, tpr):
        """Calculate AUC using trapezoidal rule"""
        # Sort by FPR to ensure correct integration
        sorted_indices = np.argsort(fpr)
        fpr_sorted = fpr[sorted_indices]
        tpr_sorted = tpr[sorted_indices]
        
        auc = 0.0
        for i in range(1, len(fpr_sorted)):
            # Trapezoidal rule: (x2 - x1) * (y1 + y2) / 2
            auc += (fpr_sorted[i] - fpr_sorted[i-1]) * (tpr_sorted[i] + tpr_sorted[i-1]) / 2
        
        return auc
    
    def multiclass_roc_ovr(self, y_true, y_pred_proba, class_names=None):
        """Calculate ROC curves for multi-class classification using One-vs-Rest"""
        n_classes = y_pred_proba.shape[1]
        
        if class_names is None:
            class_names = [f"Class {i}" for i in range(n_classes)]
        
        roc_results = {}
        
        for i in range(n_classes):
            # Create binary labels for current class vs rest
            y_binary = (y_true == i).astype(int)
            y_scores = y_pred_proba[:, i]
            
            # Calculate ROC curve
            fpr, tpr, thresholds = self.roc_curve_binary(y_binary, y_scores)
            auc = self.auc_trapezoid(fpr, tpr)
            
            roc_results[class_names[i]] = {
                'fpr': fpr,
                'tpr': tpr,
                'thresholds': thresholds,
                'auc': auc
            }
        
        return roc_results

# Initialize ROC calculator
roc_calculator = ROCCurveAUC()

# Calculate ROC curves for Logistic Regression
print("=== ROC CURVE AND AUC FOR LOGISTIC REGRESSION ===")
class_names = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']

roc_results_lr = roc_calculator.multiclass_roc_ovr(y_test, y_pred_proba_log_reg, class_names)

for class_name, results in roc_results_lr.items():
    print(f"{class_name}: AUC = {results['auc']:.4f}")

# Calculate average AUC
avg_auc_lr = np.mean([results['auc'] for results in roc_results_lr.values()])
print(f"Average AUC (Logistic Regression): {avg_auc_lr:.4f}")


# Store ROC data for plotting
roc_data_summary = {
    'Logistic Regression': {
        'class_results': roc_results_lr,
        'avg_auc': avg_auc_lr
    },
    # 'Naive Bayes': {
    #     'class_results': roc_results_nb,
    #     'avg_auc': avg_auc_nb
    # }
}

=== ROC CURVE AND AUC FOR LOGISTIC REGRESSION ===
Iris-setosa: AUC = 0.9828
Iris-versicolor: AUC = 0.9339
Iris-virginica: AUC = 0.9688
Average AUC (Logistic Regression): 0.9618


In [12]:
# 5. LINEAR REGRESSION USING NORMAL EQUATIONS FROM SCRATCH4

# θ=(XTX)−1XTy

class LinearRegressionNormalEquation:
    def __init__(self):
        self.weights = None
        self.bias = None
        
    def fit(self, X, y):
        """Fit linear regression using normal equation: θ = (X^T X)^(-1) X^T y"""
        # Add bias term (intercept) to features
        X_with_bias = np.column_stack([np.ones(X.shape[0]), X])
        
        # Normal equation: θ = (X^T X)^(-1) X^T y
        XtX = np.dot(X_with_bias.T, X_with_bias)
        
        # Add small regularization term to prevent singular matrix
        XtX += 1e-8 * np.eye(XtX.shape[0])
        
        XtX_inv = np.linalg.inv(XtX)
        Xty = np.dot(X_with_bias.T, y)
        
        theta = np.dot(XtX_inv, Xty)
        
        self.bias = theta[0]
        self.weights = theta[1:]
        
        return self
    
    def predict(self, X):
        """Make predictions using learned parameters"""
        return np.dot(X, self.weights) + self.bias

class RegressionMetrics:
    def __init__(self):
        pass
    
    def mean_absolute_error(self, y_true, y_pred):
        """Calculate Mean Absolute Error"""
        return np.mean(np.abs(y_true - y_pred))
    
    def mean_squared_error(self, y_true, y_pred):
        """Calculate Mean Squared Error"""
        return np.mean((y_true - y_pred) ** 2)
    
    def r2_score(self, y_true, y_pred):
        """Calculate R-squared (coefficient of determination)"""
        ss_res = np.sum((y_true - y_pred) ** 2)  # Sum of squares of residuals
        ss_tot = np.sum((y_true - np.mean(y_true)) ** 2)  # Total sum of squares
        
        r2 = 1 - (ss_res / ss_tot) if ss_tot != 0 else 0
        return r2

# For regression, we'll predict each feature from the others
print("=== LINEAR REGRESSION WITH NORMAL EQUATIONS ===")

# We'll predict each feature using the other three features
feature_names = ['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']
regression_results = {}

for target_idx, target_name in enumerate(feature_names):
    print(f"\n--- Predicting {target_name} ---")
    
    # Prepare features and target
    feature_indices = [i for i in range(4) if i != target_idx]
    X_reg_train = X_train[:, feature_indices]  # Use original unscaled data for regression
    X_reg_test = X_test[:, feature_indices]
    y_reg_train = X_train[:, target_idx]
    y_reg_test = X_test[:, target_idx]
    
    # Fit linear regression model
    lr_model = LinearRegressionNormalEquation()
    lr_model.fit(X_reg_train, y_reg_train)
    
    # Make predictions
    y_pred_reg = lr_model.predict(X_reg_test)
    
    # Calculate metrics
    metrics_calculator = RegressionMetrics()
    mae = metrics_calculator.mean_absolute_error(y_reg_test, y_pred_reg)
    mse = metrics_calculator.mean_squared_error(y_reg_test, y_pred_reg)
    r2 = metrics_calculator.r2_score(y_reg_test, y_pred_reg)
    
    print(f"Mean Absolute Error (MAE): {mae:.4f}")
    print(f"Mean Squared Error (MSE): {mse:.4f}")
    print(f"R-squared Score: {r2:.4f}")
    
    # Store results
    regression_results[target_name] = {
        'model': lr_model,
        'predictions': y_pred_reg,
        'actual': y_reg_test,
        'mae': mae,
        'mse': mse,
        'r2': r2,
        'weights': lr_model.weights,
        'bias': lr_model.bias
    }
    
    print(f"Model weights: {lr_model.weights}")
    print(f"Model bias: {lr_model.bias:.4f}")

# Summary of regression results
print("\n=== REGRESSION RESULTS SUMMARY ===")
print("Target Variable      | MAE    | MSE    | R²     ")
print("-" * 45)
for target_name, results in regression_results.items():
    print(f"{target_name:<18} | {results['mae']:.4f} | {results['mse']:.4f} | {results['r2']:.4f}")

# Calculate average metrics
avg_mae = np.mean([results['mae'] for results in regression_results.values()])
avg_mse = np.mean([results['mse'] for results in regression_results.values()])
avg_r2 = np.mean([results['r2'] for results in regression_results.values()])

print("-" * 45)
print(f"{'Average':<18} | {avg_mae:.4f} | {avg_mse:.4f} | {avg_r2:.4f}")

=== LINEAR REGRESSION WITH NORMAL EQUATIONS ===

--- Predicting SepalLengthCm ---
Mean Absolute Error (MAE): 0.2467
Mean Squared Error (MSE): 0.0983
R-squared Score: 0.8523
Model weights: [ 0.67082518  0.76142087 -0.69692346]
Model bias: 1.7423

--- Predicting SepalWidthCm ---
Mean Absolute Error (MAE): 0.2506
Mean Squared Error (MSE): 0.0992
R-squared Score: 0.5176
Model weights: [ 0.60001072 -0.60196474  0.65514608]
Model bias: 1.0183

--- Predicting PetalLengthCm ---
Mean Absolute Error (MAE): 0.2418
Mean Squared Error (MSE): 0.1092
R-squared Score: 0.9676
Model weights: [ 0.74675192 -0.66004413  1.45302355]
Model bias: -0.3074

--- Predicting PetalWidthCm ---
Mean Absolute Error (MAE): 0.1399
Mean Squared Error (MSE): 0.0379
R-squared Score: 0.9404
Model weights: [-0.25524004  0.26825771  0.54260627]
Model bias: -0.1773

=== REGRESSION RESULTS SUMMARY ===
Target Variable      | MAE    | MSE    | R²     
---------------------------------------------
SepalLengthCm      | 0.2467 | 0.0

In [18]:
import plotly.graph_objects as go
import plotly.io as pio

# Data for logistic regression ROC curves
data = {
    "logistic_regression": {
        "iris_setosa": {"fpr": [0.0, 0.0384615, 0.0769231, 0.1153846, 0.1538462, 0.1923077, 0.2307692, 0.2692308, 0.3076923, 0.3461538, 0.3846154, 0.4230769, 0.4615385, 0.5, 0.5384615, 0.5769231, 0.6153846, 0.6538462, 0.6923077, 0.7307692, 0.7692308, 0.8076923, 0.8461538, 0.8846154, 0.9230769, 0.9615385, 1.0], "tpr": [0.0, 0.0526316, 0.1052632, 0.1578947, 0.2105263, 0.2631579, 0.3157895, 0.3684211, 0.4210526, 0.4736842, 0.5263158, 0.5789474, 0.6315789, 0.6842105, 0.7368421, 0.7894737, 0.8421053, 0.8947368, 0.9473684, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "auc": 0.9828},
        "iris_versicolor": {"fpr": [0.0, 0.03125, 0.0625, 0.09375, 0.125, 0.15625, 0.1875, 0.21875, 0.25, 0.28125, 0.3125, 0.34375, 0.375, 0.40625, 0.4375, 0.46875, 0.5, 0.53125, 0.5625, 0.59375, 0.625, 0.65625, 0.6875, 0.71875, 0.75, 0.78125, 0.8125, 0.84375, 0.875, 0.90625, 0.9375, 0.96875, 1.0], "tpr": [0.0, 0.0769231, 0.1538462, 0.2307692, 0.3076923, 0.3846154, 0.4615385, 0.5384615, 0.6153846, 0.6923077, 0.7692308, 0.8461538, 0.9230769, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "auc": 0.9339},
        "iris_virginica": {"fpr": [0.0, 0.03125, 0.0625, 0.09375, 0.125, 0.15625, 0.1875, 0.21875, 0.25, 0.28125, 0.3125, 0.34375, 0.375, 0.40625, 0.4375, 0.46875, 0.5, 0.53125, 0.5625, 0.59375, 0.625, 0.65625, 0.6875, 0.71875, 0.75, 0.78125, 0.8125, 0.84375, 0.875, 0.90625, 0.9375, 0.96875, 1.0], "tpr": [0.0, 0.0769231, 0.1538462, 0.2307692, 0.3076923, 0.3846154, 0.4615385, 0.5384615, 0.6153846, 0.6923077, 0.7692308, 0.8461538, 0.9230769, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "auc": 0.9688}
    },
    "naive_bayes": {
        "iris_setosa": {"auc": 0.9828},
        "iris_versicolor": {"auc": 1.0000}, 
        "iris_virginica": {"auc": 1.0000}
    }
}

# Brand colors
colors = ['#1FB8CD', '#DB4545', '#2E8B57']
class_names = ['Setosa', 'Versicolor', 'Virginica']
class_keys = ['iris_setosa', 'iris_versicolor', 'iris_virginica']

# Create figure
fig = go.Figure()

# Add logistic regression ROC curves for each class
for i, (class_key, class_name, color) in enumerate(zip(class_keys, class_names, colors)):
    class_data = data['logistic_regression'][class_key]
    
    fig.add_trace(go.Scatter(
        x=class_data['fpr'],
        y=class_data['tpr'],
        mode='lines',
        name=f'LogReg {class_name}: {class_data["auc"]:.3f}',
        line=dict(color=color, width=3),
        hovertemplate='<b>LogReg %{fullData.name}</b><br>FPR: %{x:.3f}<br>TPR: %{y:.3f}<extra></extra>'
    ))

# Add diagonal reference line
fig.add_trace(go.Scatter(
    x=[0, 1],
    y=[0, 1],
    mode='lines',
    name='Random: 0.50',
    line=dict(color='gray', width=2, dash='dash'),
    hovertemplate='<b>Random</b><br>FPR: %{x:.3f}<br>TPR: %{y:.3f}<extra></extra>'
))

# Add text annotations for Naive Bayes AUC values
annotations_text = []
nb_data = data['naive_bayes']
for i, (class_key, class_name, color) in enumerate(zip(class_keys, class_names, colors)):
    nb_auc = nb_data[class_key]['auc']
    annotations_text.append(f'NB {class_name}: {nb_auc:.3f}')

# Add annotation showing Naive Bayes results
fig.add_annotation(
    x=0.6, y=0.25,
    text='<br>'.join(['Naive Bayes AUC:'] + annotations_text),
    showarrow=False,
    font=dict(size=12),
    bgcolor='white',
    bordercolor='gray',
    borderwidth=1
)

# Update layout
fig.update_layout(
    title='ROC Multi-Class Iris Classification',
    xaxis_title='False Pos Rate',
    yaxis_title='True Pos Rate',
    showlegend=True,
    xaxis=dict(range=[0, 1], showgrid=True, gridcolor='lightgray'),
    yaxis=dict(range=[0, 1], showgrid=True, gridcolor='lightgray'),
    legend=dict(orientation='h', yanchor='bottom', y=1.05, xanchor='center', x=0.5)
)

# Update traces
fig.update_traces(cliponaxis=False)

# Save the chart

In [13]:

import plotly.graph_objects as go
import plotly.express as px

# Data from the provided JSON
data = {
    "f1_scores": [
        {"metric": "Macro F1", "Logistic Regression": 0.8949, "Naive Bayes": 0.9743},
        {"metric": "Micro F1", "Logistic Regression": 0.9111, "Naive Bayes": 0.9778},
        {"metric": "Weighted F1", "Logistic Regression": 0.9090, "Naive Bayes": 0.9777}
    ]
}

# Extract data for plotting
metrics = [item["metric"] for item in data["f1_scores"]]
logistic_scores = [item["Logistic Regression"] for item in data["f1_scores"]]
naive_bayes_scores = [item["Naive Bayes"] for item in data["f1_scores"]]

# Create the grouped bar chart
fig = go.Figure()

# Add Logistic Regression bars (blue)
fig.add_trace(go.Bar(
    name='Logistic Reg',
    x=metrics,
    y=logistic_scores,
    marker_color='#1FB8CD',
    text=[f'{score:.3f}' for score in logistic_scores],
    textposition='outside',
    textfont=dict(size=12)
))

# Add Naive Bayes bars (red)
fig.add_trace(go.Bar(
    name='Naive Bayes',
    x=metrics,
    y=naive_bayes_scores,
    marker_color='#DB4545',
    text=[f'{score:.3f}' for score in naive_bayes_scores],
    textposition='outside',
    textfont=dict(size=12)
))

# Update layout
fig.update_layout(
    title='F1-Score Comparison Models',
    xaxis_title='F1-Score Type',
    yaxis_title='F1-Score Value',
    barmode='group',
    showlegend=True,
    legend=dict(orientation='h', yanchor='bottom', y=1.05, xanchor='center', x=0.5),
    yaxis=dict(range=[0.8, 1.0], showgrid=True),
    xaxis=dict(showgrid=True)
)

# Update traces for cliponaxis
fig.update_traces(cliponaxis=False)



In [15]:
import plotly.graph_objects as go

# Data from the provided JSON
data = {
    "regression_results": [
        {"target": "SepalLengthCm", "MAE": 0.2467, "MSE": 0.0983, "R²": 0.8523},
        {"target": "SepalWidthCm", "MAE": 0.2506, "MSE": 0.0992, "R²": 0.5176},
        {"target": "PetalLengthCm", "MAE": 0.2418, "MSE": 0.1092, "R²": 0.9676},
        {"target": "PetalWidthCm", "MAE": 0.1399, "MSE": 0.0379, "R²": 0.9404}
    ]
}

# Extract data
targets = [item["target"] for item in data["regression_results"]]
mae_values = [item["MAE"] for item in data["regression_results"]]
mse_values = [item["MSE"] for item in data["regression_results"]]
r2_values = [item["R²"] for item in data["regression_results"]]

# Use brand colors for each target variable
colors = ['#1FB8CD', '#DB4545', '#2E8B57', '#5D878F']

# Create figure
fig = go.Figure()

# Add MAE bars (primary y-axis)
for i, target in enumerate(targets):
    fig.add_trace(go.Bar(
        name=f'{target} MAE',
        x=['MAE'],
        y=[mae_values[i]],
        text=[f'{mae_values[i]:.3f}'],
        textposition='outside',
        marker_color=colors[i],
        yaxis='y',
        offsetgroup=i,
        showlegend=False
    ))

# Add MSE bars (primary y-axis)  
for i, target in enumerate(targets):
    fig.add_trace(go.Bar(
        name=f'{target} MSE',
        x=['MSE'],
        y=[mse_values[i]],
        text=[f'{mse_values[i]:.4f}'],
        textposition='outside',
        marker_color=colors[i],
        yaxis='y',
        offsetgroup=i,
        showlegend=False
    ))

# Add R² bars (secondary y-axis)
for i, target in enumerate(targets):
    fig.add_trace(go.Bar(
        name=target,
        x=['R²'],
        y=[r2_values[i]],
        text=[f'{r2_values[i]:.3f}'],
        textposition='outside',
        marker_color=colors[i],
        yaxis='y2',
        offsetgroup=i
    ))

# Update layout with dual y-axes
fig.update_layout(
    title='Linear Regression Performance (Normal Eq)',
    xaxis_title='Metrics',
    yaxis=dict(
        title='Error Metrics',
        showgrid=True,
        side='left'
    ),
    yaxis2=dict(
        title='R² Score',
        overlaying='y',
        side='right',
        showgrid=False,
        range=[0, 1]
    ),
    barmode='group',
    showlegend=True,
    legend=dict(orientation='h', yanchor='bottom', y=1.05, xanchor='center', x=0.5),
    xaxis=dict(showgrid=True)
)

# Apply styling requirements
fig.update_traces(cliponaxis=False)

