<a href="https://colab.research.google.com/github/np03cs4a240030-lab/worksheet1_ayush/blob/main/workshop_8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import f1_score, mean_squared_error


# Load dataset
X, y = load_wine(return_X_y=True)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


In [2]:
dt_clf = DecisionTreeClassifier(random_state=42)
dt_clf.fit(X_train, y_train)

dt_preds = dt_clf.predict(X_test)
dt_f1 = f1_score(y_test, dt_preds, average="macro")

print("Decision Tree Classifier F1 Score:", dt_f1)


Decision Tree Classifier F1 Score: 0.9457411645054665


In [3]:
rf_clf = RandomForestClassifier(random_state=42)
rf_clf.fit(X_train, y_train)

rf_preds = rf_clf.predict(X_test)
rf_f1 = f1_score(y_test, rf_preds, average="macro")

print("Random Forest Classifier F1 Score:", rf_f1)


Random Forest Classifier F1 Score: 1.0


In [4]:
param_grid = {
    "n_estimators": [50, 100, 200],
    "max_depth": [None, 5, 10],
    "min_samples_split": [2, 5, 10]
}

grid_search = GridSearchCV(
    RandomForestClassifier(random_state=42),
    param_grid,
    scoring="f1_macro",
    cv=5,
    n_jobs=-1
)

grid_search.fit(X_train, y_train)


In [5]:
print("Best Parameters:", grid_search.best_params_)

best_rf_clf = grid_search.best_estimator_
best_preds = best_rf_clf.predict(X_test)

best_f1 = f1_score(y_test, best_preds, average="macro")
print("Tuned Random Forest F1 Score:", best_f1)


Best Parameters: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 50}
Tuned Random Forest F1 Score: 1.0


In [6]:
X_full, y_full = load_wine(return_X_y=True)

X_reg = X_full[:, 1:]   # features
y_reg = X_full[:, 0]    # target (alcohol)

X_train, X_test, y_train, y_test = train_test_split(
    X_reg, y_reg, test_size=0.2, random_state=42
)


In [7]:
dt_reg = DecisionTreeRegressor(random_state=42)
dt_reg.fit(X_train, y_train)

dt_preds = dt_reg.predict(X_test)
dt_mse = mean_squared_error(y_test, dt_preds)

print("Decision Tree Regressor MSE:", dt_mse)


Decision Tree Regressor MSE: 0.31197222222222226


In [8]:
rf_reg = RandomForestRegressor(random_state=42)
rf_reg.fit(X_train, y_train)

rf_preds = rf_reg.predict(X_test)
rf_mse = mean_squared_error(y_test, rf_preds)

print("Random Forest Regressor MSE:", rf_mse)


Random Forest Regressor MSE: 0.15426672999999946


In [9]:
param_dist = {
    "n_estimators": [50, 100, 200, 300],
    "max_depth": [None, 5, 10, 20],
    "max_features": ["sqrt", "log2", None]
}

random_search = RandomizedSearchCV(
    RandomForestRegressor(random_state=42),
    param_distributions=param_dist,
    n_iter=10,
    scoring="neg_mean_squared_error",
    cv=5,
    random_state=42,
    n_jobs=-1
)

random_search.fit(X_train, y_train)


In [10]:
print("Best Parameters:", random_search.best_params_)

best_rf_reg = random_search.best_estimator_
best_preds = best_rf_reg.predict(X_test)

best_mse = mean_squared_error(y_test, best_preds)
print("Tuned Random Forest Regressor MSE:", best_mse)


Best Parameters: {'n_estimators': 50, 'max_features': 'log2', 'max_depth': 20}
Tuned Random Forest Regressor MSE: 0.1615173477777774
