In [1]:
import numpy as np
import pandas as pd

from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV

from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor

from sklearn.metrics import f1_score, mean_squared_error


Implement Classification Models:


In [2]:
wine = load_wine()
X = wine.data
y = wine.target

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print("Training samples:", X_train.shape[0])
print("Testing samples:", X_test.shape[0])


Training samples: 142
Testing samples: 36


In [3]:
dt_classifier = DecisionTreeClassifier(random_state=42)
dt_classifier.fit(X_train, y_train)

y_pred_dt = dt_classifier.predict(X_test)
f1_dt = f1_score(y_test, y_pred_dt, average='weighted')

print("Decision Tree Classifier F1 Score:", f1_dt)


Decision Tree Classifier F1 Score: 0.9439974457215836


In [4]:
rf_classifier = RandomForestClassifier(random_state=42)
rf_classifier.fit(X_train, y_train)

y_pred_rf = rf_classifier.predict(X_test)
f1_rf = f1_score(y_test, y_pred_rf, average='weighted')

print("Random Forest Classifier F1 Score:", f1_rf)


Random Forest Classifier F1 Score: 1.0


In [5]:
print("F1 Score Comparison")
print("Decision Tree Classifier:", f1_dt)
print("Random Forest Classifier:", f1_rf)


F1 Score Comparison
Decision Tree Classifier: 0.9439974457215836
Random Forest Classifier: 1.0


Hyperparameter Tuning:

In [6]:
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 5, 10]
}


In [7]:
grid_search = GridSearchCV(
    estimator=RandomForestClassifier(random_state=42),
    param_grid=param_grid,
    scoring='f1_weighted',
    cv=5,
    n_jobs=-1
)

grid_search.fit(X_train, y_train)

print("Best Parameters (Classifier):", grid_search.best_params_)
print("Best F1 Score:", grid_search.best_score_)


Best Parameters (Classifier): {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 100}
Best F1 Score: 0.9782952128219708


Implement Regression Model:

In [8]:
rf_regressor = RandomForestRegressor(random_state=42)
rf_regressor.fit(X_train, y_train)

y_pred_rf_reg = rf_regressor.predict(X_test)
mse_rf = mean_squared_error(y_test, y_pred_rf_reg)

print("Random Forest Regressor MSE:", mse_rf)


Random Forest Regressor MSE: 0.06483333333333333


In [9]:
param_dist = {
    'n_estimators': [50, 100, 200, 300],
    'max_depth': [None, 5, 10, 20],
    'min_samples_split': [2, 5, 10]
}


In [10]:
random_search = RandomizedSearchCV(
    estimator=RandomForestRegressor(random_state=42),
    param_distributions=param_dist,
    n_iter=10,
    cv=5,
    scoring='neg_mean_squared_error',
    random_state=42,
    n_jobs=-1
)

random_search.fit(X_train, y_train)

print("Best Parameters (Regressor):", random_search.best_params_)
print("Best MSE:", -random_search.best_score_)


Best Parameters (Regressor): {'n_estimators': 300, 'min_samples_split': 2, 'max_depth': 10}
Best MSE: 0.04472559113300492
