In [1]:
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import f1_score, mean_squared_error
import numpy as np

In [17]:
#loading the wine dataset
data = load_wine()
X = data.data
y = data.target


In [3]:
#splitting the dataset to test and train
X_train_clf, X_test_clf, y_train_clf, y_test_clf = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
dt_clf = DecisionTreeClassifier(random_state=42)
dt_clf.fit(X_train_clf, y_train_clf)
y_pred_dt = dt_clf.predict(X_test_clf)
f1_dt = f1_score(y_test_clf, y_pred_dt, average='weighted')#f1 score



In [5]:
rf_clf = RandomForestClassifier(random_state=42)
rf_clf.fit(X_train_clf, y_train_clf)
y_pred_rf = rf_clf.predict(X_test_clf)
f1_rf = f1_score(y_test_clf, y_pred_rf, average='weighted')

In [6]:
print("F1 Score - Decision Tree Classifier:", f1_dt)
print("F1 Score - Random Forest Classifier:", f1_rf)


F1 Score - Decision Tree Classifier: 0.9439974457215836
F1 Score - Random Forest Classifier: 1.0


In [7]:
rf_params = {
    'n_estimators': [50, 100, 150],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10]
}

In [8]:
grid_search_rf = GridSearchCV(RandomForestClassifier(random_state=42), rf_params, cv=5, scoring='f1_weighted')
grid_search_rf.fit(X_train_clf, y_train_clf)
print("Best Hyperparameters for Random Forest Classifier:", grid_search_rf.best_params_)

Best Hyperparameters for Random Forest Classifier: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 100}


Regression model

In [10]:

#data split for the regression
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X, y, test_size=0.2, random_state=42)

In [11]:
dt_reg = DecisionTreeRegressor(random_state=42)
dt_reg.fit(X_train_reg, y_train_reg)
y_pred_dt_reg = dt_reg.predict(X_test_reg)
mse_dt = mean_squared_error(y_test_reg, y_pred_dt_reg)

In [13]:

#random forest regressor
rf_reg = RandomForestRegressor(random_state=42)
rf_reg.fit(X_train_reg, y_train_reg)
y_pred_rf_reg = rf_reg.predict(X_test_reg)
mse_rf = mean_squared_error(y_test_reg, y_pred_rf_reg)
#for error
#error
print("MSE - Decision Tree Regressor:", mse_dt)
print("MSE - Random Forest Regressor:", mse_rf)

MSE - Decision Tree Regressor: 0.16666666666666666
MSE - Random Forest Regressor: 0.06483333333333333


In [15]:
#hyperparameter
rf_reg_params = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_leaf': [1, 2, 4]
}

In [16]:
random_search_rf_reg = RandomizedSearchCV(RandomForestRegressor(random_state=42), rf_reg_params, n_iter=20, cv=5, scoring='neg_mean_squared_error', random_state=42)
random_search_rf_reg.fit(X_train_reg, y_train_reg)
print("Best Hyperparameters for Random Forest Regressor:", random_search_rf_reg.best_params_)


Best Hyperparameters for Random Forest Regressor: {'n_estimators': 200, 'min_samples_leaf': 1, 'max_depth': 30}
