## PREDICTIVE MODELING WITH CLASSIFICATION

### Importing libraries

In [2]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score, f1_score


### Loading and Preprocessing Data

In [3]:
# Load the Iris dataset
from sklearn.datasets import load_iris
data = load_iris()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target

# Preprocess the data (Standardization)
X = df.drop('target', axis=1)
y = df['target']
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


###Initializing and Training Models

In [4]:
# Initialize classifiers
classifiers = {
    'Logistic Regression': LogisticRegression(),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'Support Vector Machine': SVC()
}

# Define scoring metrics
scoring = {
    'accuracy': make_scorer(accuracy_score),
    'precision': make_scorer(precision_score, average='weighted'),
    'recall': make_scorer(recall_score, average='weighted'),
    'f1_score': make_scorer(f1_score, average='weighted')
}

# Train and evaluate each classifier using cross-validation
results = {}
for name, clf in classifiers.items():
    scores = {}
    for metric_name, metric in scoring.items():
        score = cross_val_score(clf, X_scaled, y, cv=5, scoring=metric)
        scores[metric_name] = np.mean(score)
    results[name] = scores


### Printing and Comparing Results

In [5]:
# Print the results
for name, metrics in results.items():
    print(f"Classifier: {name}")
    print(f"Accuracy: {metrics['accuracy']:.4f}")
    print(f"Precision: {metrics['precision']:.4f}")
    print(f"Recall: {metrics['recall']:.4f}")
    print(f"F1 Score: {metrics['f1_score']:.4f}")
    print('-'*30)

# Compare the performances
best_model = max(results, key=lambda x: results[x]['f1_score'])
print(f"The best model is: {best_model} with F1 Score: {results[best_model]['f1_score']:.4f}")


Classifier: Logistic Regression
Accuracy: 0.9600
Precision: 0.9633
Recall: 0.9600
F1 Score: 0.9598
------------------------------
Classifier: Decision Tree
Accuracy: 0.9667
Precision: 0.9550
Recall: 0.9533
F1 Score: 0.9599
------------------------------
Classifier: Random Forest
Accuracy: 0.9667
Precision: 0.9592
Recall: 0.9667
F1 Score: 0.9531
------------------------------
Classifier: Support Vector Machine
Accuracy: 0.9667
Precision: 0.9685
Recall: 0.9667
F1 Score: 0.9666
------------------------------
The best model is: Support Vector Machine with F1 Score: 0.9666
