# Assignment 1: Classification on Wine Dataset


In [18]:
import numpy as np
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load Wine dataset
wine = load_wine()
X, y = wine.data, wine.target

# Split data into training and testing sets (80-20 split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Logistic Regression
lr = LogisticRegression(max_iter=10000, random_state=42)
lr.fit(X_train, y_train)
lr_preds = lr.predict(X_test)
lr_accuracy = accuracy_score(y_test, lr_preds)
print(f"Logistic Regression Accuracy: {lr_accuracy:.4f}")

# Decision Tree
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)
dt_preds = dt.predict(X_test)
dt_accuracy = accuracy_score(y_test, dt_preds)
print(f"Decision Tree Accuracy: {dt_accuracy:.4f}")

# Random Forest
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)
rf_preds = rf.predict(X_test)
rf_accuracy = accuracy_score(y_test, rf_preds)
print(f"Random Forest Accuracy: {rf_accuracy:.4f}")

# Conclusion
accuracies = {"Logistic Regression": lr_accuracy, "Decision Tree": dt_accuracy, "Random Forest": rf_accuracy}
best_model = max(accuracies, key=accuracies.get)
print(f"\nThe best model for the Wine dataset is {best_model} with an accuracy of {accuracies[best_model]:.4f}.")


Logistic Regression Accuracy: 1.0000
Decision Tree Accuracy: 0.9444
Random Forest Accuracy: 1.0000

The best model for the Wine dataset is Logistic Regression with an accuracy of 1.0000.


# Assignment 2: Advanced Classification with Hyperparameter Tuning and SVM


In [19]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

# Load Breast Cancer dataset
cancer = load_breast_cancer()
X, y = cancer.data, cancer.target

# Preprocess and standardize data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data into training, validation, and testing sets (60-20-20)
X_temp, X_test, y_temp, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.25, random_state=42)  # 0.25 x 0.8 = 0.2

# Random Forest with hyperparameter tuning
param_grid_rf = {'n_estimators': [10, 50, 100, 150], 'max_depth': [None, 10, 20, 30], 'random_state': [42]}
grid_rf = GridSearchCV(RandomForestClassifier(), param_grid_rf, cv=5)
grid_rf.fit(X_train, y_train)
best_rf = grid_rf.best_estimator_
print("Best RF Params:", grid_rf.best_params_)

# Predict and evaluate RF
rf_preds = best_rf.predict(X_test)
rf_accuracy = accuracy_score(y_test, rf_preds)
print(f"Optimized Random Forest Accuracy: {rf_accuracy:.4f}")

# SVM with hyperparameter tuning
param_grid_svm = {'C': [0.1, 1, 10, 100], 'gamma': [1, 0.1, 0.01, 0.001], 'kernel': ['rbf'], 'random_state': [42]}
grid_svm = GridSearchCV(SVC(), param_grid_svm, cv=5)
grid_svm.fit(X_train, y_train)
best_svm = grid_svm.best_estimator_
print("\nBest SVM Params:", grid_svm.best_params_)

# Predict and evaluate SVM
svm_preds = best_svm.predict(X_test)
svm_accuracy = accuracy_score(y_test, svm_preds)
print(f"Optimized SVM Accuracy: {svm_accuracy:.4f}")

# Conclusion
accuracies = {"Optimized Random Forest": rf_accuracy, "Optimized SVM": svm_accuracy}
best_model = max(accuracies, key=accuracies.get)
print(f"\nThe best model for the Breast Cancer dataset is {best_model} with an accuracy of {accuracies[best_model]:.4f}.")

Best RF Params: {'max_depth': None, 'n_estimators': 100, 'random_state': 42}
Optimized Random Forest Accuracy: 0.9561

Best SVM Params: {'C': 10, 'gamma': 0.01, 'kernel': 'rbf', 'random_state': 42}
Optimized SVM Accuracy: 0.9825

The best model for the Breast Cancer dataset is Optimized SVM with an accuracy of 0.9825.
