<a href="https://colab.research.google.com/github/tanushree-dwibedi/Celebal-Assignment6/blob/main/Celebal_Assignment6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Prerequisites
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
import warnings
warnings.filterwarnings("ignore")

# Load dataset
data = load_breast_cancer()
X, y = data.data, data.target

# Split and scale
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train models
models = {
    "Logistic Regression": LogisticRegression(),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "SVM": SVC()
}

results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    results[name] = {
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred),
        "Recall": recall_score(y_test, y_pred),
        "F1 Score": f1_score(y_test, y_pred)
    }

print("\nInitial Model Performance:")
print(pd.DataFrame(results).T)

# GridSearchCV for SVM
param_grid_svm = {'C': [0.1, 1, 10], 'kernel': ['linear', 'rbf'], 'gamma': ['scale', 'auto']}
grid_search_svm = GridSearchCV(SVC(), param_grid_svm, cv=5, scoring='f1')
grid_search_svm.fit(X_train, y_train)

# RandomizedSearchCV for Random Forest
param_dist_rf = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 5, 10, 20],
    'min_samples_split': [2, 5, 10]
}
random_search_rf = RandomizedSearchCV(RandomForestClassifier(), param_dist_rf, n_iter=10, cv=5, scoring='f1', random_state=42)
random_search_rf.fit(X_train, y_train)

# Evaluate tuned models
print("\nTuned SVM Best Params:", grid_search_svm.best_params_)
print("Tuned RF Best Params:", random_search_rf.best_params_)

for name, model in [("Tuned SVM", grid_search_svm.best_estimator_), ("Tuned RF", random_search_rf.best_estimator_)]:
    y_pred = model.predict(X_test)
    print(f"\n{name} Performance:")
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("Precision:", precision_score(y_test, y_pred))
    print("Recall:", recall_score(y_test, y_pred))
    print("F1 Score:", f1_score(y_test, y_pred))



Initial Model Performance:
                     Accuracy  Precision    Recall  F1 Score
Logistic Regression  0.973684   0.972222  0.985915  0.979021
Decision Tree        0.947368   0.957746  0.957746  0.957746
Random Forest        0.964912   0.958904  0.985915  0.972222
SVM                  0.982456   0.972603  1.000000  0.986111

Tuned SVM Best Params: {'C': 1, 'gamma': 'scale', 'kernel': 'rbf'}
Tuned RF Best Params: {'n_estimators': 50, 'min_samples_split': 5, 'max_depth': 20}

Tuned SVM Performance:
Accuracy: 0.9824561403508771
Precision: 0.9726027397260274
Recall: 1.0
F1 Score: 0.9861111111111112

Tuned RF Performance:
Accuracy: 0.956140350877193
Precision: 0.9583333333333334
Recall: 0.971830985915493
F1 Score: 0.965034965034965
