<a href="https://colab.research.google.com/github/kushum-coder/2501460_kushum/blob/main/Worksheet8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install scikit-learn numpy pandas




In [2]:
import numpy as np
import pandas as pd

from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import f1_score, mean_squared_error


In [3]:
wine = load_wine()

X = wine.data
y = wine.target

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


Part 1 - Classification

In [4]:
dt_clf = DecisionTreeClassifier(random_state=42)
dt_clf.fit(X_train, y_train)

y_pred_dt = dt_clf.predict(X_test)
dt_f1 = f1_score(y_test, y_pred_dt, average='weighted')

print("Decision Tree F1 Score:", dt_f1)


Decision Tree F1 Score: 0.9439974457215836


In [5]:
rf_clf = RandomForestClassifier(random_state=42)
rf_clf.fit(X_train, y_train)

y_pred_rf = rf_clf.predict(X_test)
rf_f1 = f1_score(y_test, y_pred_rf, average='weighted')

print("Random Forest F1 Score:", rf_f1)


Random Forest F1 Score: 1.0


In [6]:
print("\nModel Comparison")
print("Decision Tree F1:", dt_f1)
print("Random Forest F1:", rf_f1)



Model Comparison
Decision Tree F1: 0.9439974457215836
Random Forest F1: 1.0


Part 2 - Hyperparameter Tuning


In [7]:
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 5, 10]
}

grid_search = GridSearchCV(
    RandomForestClassifier(random_state=42),
    param_grid,
    scoring='f1_weighted',
    cv=5
)

grid_search.fit(X_train, y_train)

print("Best Parameters:", grid_search.best_params_)


Best Parameters: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 100}


In [8]:
best_rf_clf = grid_search.best_estimator_

y_pred_best = best_rf_clf.predict(X_test)
best_f1 = f1_score(y_test, y_pred_best, average='weighted')

print("Best Tuned Random Forest F1 Score:", best_f1)


Best Tuned Random Forest F1 Score: 1.0


Part 3 - Regression

In [9]:
X_reg = wine.data
y_reg = wine.data[:, 0]  # Alcohol feature

X_train_r, X_test_r, y_train_r, y_test_r = train_test_split(
    X_reg, y_reg, test_size=0.2, random_state=42
)


In [10]:
dt_reg = DecisionTreeRegressor(random_state=42)
dt_reg.fit(X_train_r, y_train_r)

y_pred_dt_r = dt_reg.predict(X_test_r)

dt_mse = mean_squared_error(y_test_r, y_pred_dt_r)
print("Decision Tree MSE:", dt_mse)


Decision Tree MSE: 0.001705555555555562


In [11]:
rf_reg = RandomForestRegressor(random_state=42)
rf_reg.fit(X_train_r, y_train_r)

y_pred_rf_r = rf_reg.predict(X_test_r)
rf_mse = mean_squared_error(y_test_r, y_pred_rf_r)

print("Random Forest MSE:", rf_mse)


Random Forest MSE: 0.0011140116666664994


Part 4 - RandomizedSearchCV(Regeression tuning)

In [12]:
param_dist = {
    'n_estimators': [50, 100, 200, 300],
    'max_depth': [None, 5, 10, 20],
    'min_samples_leaf': [1, 2, 4]
}

random_search = RandomizedSearchCV(
    RandomForestRegressor(random_state=42),
    param_dist,
    n_iter=10,
    scoring='neg_mean_squared_error',
    cv=5,
    random_state=42
)

random_search.fit(X_train_r, y_train_r)

print("Best Regression Parameters:", random_search.best_params_)


Best Regression Parameters: {'n_estimators': 50, 'min_samples_leaf': 1, 'max_depth': 10}


In [13]:
best_rf_reg = random_search.best_estimator_

y_pred_best_r = best_rf_reg.predict(X_test_r)
best_mse = mean_squared_error(y_test_r, y_pred_best_r)

print("Best Tuned Random Forest MSE:", best_mse)


Best Tuned Random Forest MSE: 0.0008203411111110997
