In [3]:
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score
from sklearn.model_selection import GridSearchCV

Decision Tree F1 Score: 0.9628
Random Forest F1 Score: 1.0000
Best Hyperparameters for Random Forest: {'max_depth': 5, 'min_samples_leaf': 4, 'min_samples_split': 2, 'n_estimators': 50}
Best Random Forest F1 Score after Tuning: 0.9816


In [4]:
# Load the Wine dataset
data = load_wine()
X = data.data
y = data.target

In [5]:
# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [6]:
# 1. Train a Decision Tree Classifier
dt_classifier = DecisionTreeClassifier(random_state=42)
dt_classifier.fit(X_train, y_train)
y_pred_dt = dt_classifier.predict(X_test)

In [7]:
# 2. Train a Random Forest Classifier
rf_classifier = RandomForestClassifier(random_state=42)
rf_classifier.fit(X_train, y_train)
y_pred_rf = rf_classifier.predict(X_test)

In [8]:
# Evaluate F1 scores
f1_dt = f1_score(y_test, y_pred_dt, average='weighted')
f1_rf = f1_score(y_test, y_pred_rf, average='weighted')

print(f"Decision Tree F1 Score: {f1_dt:.4f}")
print(f"Random Forest F1 Score: {f1_rf:.4f}")

Decision Tree F1 Score: 0.9628
Random Forest F1 Score: 1.0000


In [9]:
# 3. Hyperparameter Tuning for Random Forest using GridSearchCV
# Simplified hyperparameters for tuning
param_grid = {
    'n_estimators': [20, 50, 100],       # Number of trees in the forest
    'max_depth': [5, 10, 20],            # Maximum depth of the trees
    'min_samples_split': [2, 4, 6],       # Minimum samples required to split a node
    'min_samples_leaf': [1, 2, 4]         # Minimum samples required at each leaf node
}


In [10]:
# Perform GridSearchCV
grid_search = GridSearchCV(estimator=RandomForestClassifier(random_state=42),
                           param_grid=param_grid, cv=5, n_jobs=-1, scoring='f1_weighted')
grid_search.fit(X_train, y_train)
# Best hyperparameters
print("Best Hyperparameters for Random Forest:", grid_search.best_params_)

In [11]:
# Train the model with best hyperparameters
best_rf_classifier = grid_search.best_estimator_
y_pred_best_rf = best_rf_classifier.predict(X_test)

In [13]:
# Evaluate F1 score after tuning
f1_best_rf = f1_score(y_test, y_pred_best_rf, average='weighted')
print(f"Best Random Forest F1 Score after Tuning: {f1_best_rf:.4f}")

Best Random Forest F1 Score after Tuning: 0.9816
