<a href="https://colab.research.google.com/github/prashannachauhankshetri99/Worksheet1/blob/main/Workshop8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import f1_score, mean_squared_error
import numpy as np



In [3]:
data = load_wine()
X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [5]:
# 1. Classification Models
# A Decision Tree Classifier
clf_dt = DecisionTreeClassifier(random_state=42)
clf_dt.fit(X_train, y_train)
y_pred_dt = clf_dt.predict(X_test)

# A Random Forest Classifier
clf_rf = RandomForestClassifier(random_state=42)
clf_rf.fit(X_train, y_train)
y_pred_rf = clf_rf.predict(X_test)

# Comparing the
f1_dt = f1_score(y_test, y_pred_dt, average="weighted")
f1_rf = f1_score(y_test, y_pred_rf, average="weighted")
print(f"Decision Tree Classifier F1 Score: {f1_dt}")
print(f"Random Forest Classifier F1 Score: {f1_rf}")

Decision Tree Classifier F1 Score: 0.9628353590455226
Random Forest Classifier F1 Score: 1.0


In [6]:
# 2. Hyperparameter Tuning for Random Forest Classifier
# Three hyperparameters: n_estimators, max_depth, and min_samples_split
param_grid = {
    "n_estimators": [50, 100, 200],
    "max_depth": [None, 10, 20, 30],
    "min_samples_split": [2, 5, 10]
}

grid_search = GridSearchCV(estimator=RandomForestClassifier(random_state=42),
                           param_grid=param_grid,
                           scoring="f1_weighted",
                           cv=5)
grid_search.fit(X_train, y_train)
print(f"Best Parameters for Random Forest Classifier: {grid_search.best_params_}")
print(f"Best F1 Score from Grid Search: {grid_search.best_score_}")

Best Parameters for Random Forest Classifier: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 50}
Best F1 Score from Grid Search: 0.9680809081527346


In [7]:
# 3. Regression Models
# Convert target to regression problem
y_reg = data.data[:, 0]
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X, y_reg, test_size=0.3, random_state=42)

# Training a Decision Tree Regressor
reg_dt = DecisionTreeRegressor(random_state=42)
reg_dt.fit(X_train_reg, y_train_reg)
y_pred_dt_reg = reg_dt.predict(X_test_reg)
mse_dt = mean_squared_error(y_test_reg, y_pred_dt_reg)

# Traning a Random Forest Regressor
reg_rf = RandomForestRegressor(random_state=42)
reg_rf.fit(X_train_reg, y_train_reg)
y_pred_rf_reg = reg_rf.predict(X_test_reg)
mse_rf = mean_squared_error(y_test_reg, y_pred_rf_reg)

print(f"Decision Tree Regressor MSE: {mse_dt}")
print(f"Random Forest Regressor MSE: {mse_rf}")

# Hyperparameter Tuning for Random Forest Regressor
# Three parameters: n_estimators, max_features, and max_depth
param_dist = {
    "n_estimators": [50, 100, 200, 300],
    "max_features": ["sqrt", "sqrt", "log2"],
    "max_depth": [None, 10, 20, 30, 40]
}

random_search = RandomizedSearchCV(estimator=RandomForestRegressor(random_state=42),
                                   param_distributions=param_dist,
                                   scoring="neg_mean_squared_error",
                                   n_iter=10,
                                   cv=5,
                                   random_state=42)
random_search.fit(X_train_reg, y_train_reg)
print(f"Best Parameters for Random Forest Regressor: {random_search.best_params_}")
print(f"Best Negative MSE from Randomized Search: {random_search.best_score_}")


Decision Tree Regressor MSE: 0.0017592592592592462
Random Forest Regressor MSE: 0.0013293277777777996
Best Parameters for Random Forest Regressor: {'n_estimators': 200, 'max_features': 'sqrt', 'max_depth': 40}
Best Negative MSE from Randomized Search: -0.10034172179916567
