In [52]:
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import f1_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.tree import DecisionTreeRegressor




1. Implement Classification Models:
• Train a Decision Tree Classifier and a Random Forest Classifier using scikit-learn.
• Compare the models based on their F1 scores.

In [53]:
# Load dataset
wine = load_wine()
X = wine.data
y = wine.target


In [54]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


In [55]:
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)

dt_predictions = dt_model.predict(X_test)

# Macro F1 score for multi-class classification
dt_f1 = f1_score(y_test, dt_predictions, average='macro')


In [56]:
rf_model = RandomForestClassifier(
    n_estimators=100, random_state=42
)
rf_model.fit(X_train, y_train)

rf_predictions = rf_model.predict(X_test)

rf_f1 = f1_score(y_test, rf_predictions, average='macro')


In [57]:
print("Decision Tree F1 Score:", dt_f1)
print("Random Forest F1 Score:", rf_f1)


Decision Tree F1 Score: 0.9457411645054665
Random Forest F1 Score: 1.0


2. Hyperparameter Tuning:
• Identify three hyperparameters of the Random Forest Classifier.
• Perform hyperparameter tuning using GridSearchCV to optimize these parameters.
• Take hints from the scikit-learn documentation to guide the implementation.

In [58]:
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 5, 10]
}


In [59]:
rf = RandomForestClassifier(random_state=42)

grid_search = GridSearchCV(
    estimator=rf,
    param_grid=param_grid,
    scoring='f1_macro',
    cv=5,
    n_jobs=-1
)

grid_search.fit(X_train, y_train)


In [60]:
print("Best Hyperparameters:", grid_search.best_params_)


Best Hyperparameters: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 50}


In [61]:
best_rf = grid_search.best_estimator_

y_pred = best_rf.predict(X_test)
tuned_f1 = f1_score(y_test, y_pred, average='macro')

print("Tuned Random Forest F1 Score:", tuned_f1)


Tuned Random Forest F1 Score: 1.0


3. Implement Regression Model:
• Train a Decision Tree Regressor and a Random Forest Regressor using scikit-learn.
• Identify three parameters for Random Forest Regressio and Perform hyperparameter tuning using
RandomSearchCV to optimize these parameters.

In [62]:
dt_reg = DecisionTreeRegressor(random_state=42)
dt_reg.fit(X_train, y_train)

dt_pred = dt_reg.predict(X_test)
dt_mse = mean_squared_error(y_test, dt_pred)

In [63]:
rf_reg = RandomForestRegressor(random_state=42)
rf_reg.fit(X_train, y_train)

rf_pred = rf_reg.predict(X_test)
rf_mse = mean_squared_error(y_test, rf_pred)


In [64]:
param_dist = {
    'n_estimators': [50, 100, 200, 300],
    'max_depth': [None, 5, 10, 20],
    'min_samples_split': [2, 5, 10]
}

random_search = RandomizedSearchCV(
    estimator=RandomForestRegressor(random_state=42),
    param_distributions=param_dist,
    n_iter=10,
    scoring='neg_mean_squared_error',
    cv=5,
    random_state=42,
    n_jobs=-1
)

random_search.fit(X_train, y_train)


In [65]:
best_rf_reg = random_search.best_estimator_

tuned_pred = best_rf_reg.predict(X_test)
tuned_mse = mean_squared_error(y_test, tuned_pred)
tuned_r2 = r2_score(y_test, tuned_pred)

print("Best Parameters:", random_search.best_params_)
print("Tuned RF MSE:", tuned_mse)
print("Tuned RF R2 Score:", tuned_r2)


Best Parameters: {'n_estimators': 200, 'min_samples_split': 2, 'max_depth': 10}
Tuned RF MSE: 0.007822222222222225
Tuned RF R2 Score: 0.9871350253807106
