Submitted By: Salina Gurung

Exercise 1: Implement Classification Models:

In [1]:
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score

In [2]:
data = load_wine()
X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

dt = DecisionTreeClassifier(random_state=42)
rf = RandomForestClassifier(random_state=42)

dt.fit(X_train, y_train)
rf.fit(X_train, y_train)

print("Decision Tree F1:", f1_score(y_test, dt.predict(X_test), average='macro'))
print("Random Forest F1:", f1_score(y_test, rf.predict(X_test), average='macro'))

Decision Tree F1: 0.9424740010946907
Random Forest F1: 1.0


Exercise 2: Hyperparameter Tuning:

In [3]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_estimators': [50, 100],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 5]
}

grid = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=3, scoring='f1_macro')
grid.fit(X_train, y_train)

print("Best Parameters:", grid.best_params_)
print("Best F1 Score:", grid.best_score_)

Best Parameters: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 100}
Best F1 Score: 0.9862945382658644


Exercise 3: Implement Regression Model:

In [4]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import mean_squared_error

dt_reg = DecisionTreeRegressor(random_state=42)
rf_reg = RandomForestRegressor(random_state=42)

dt_reg.fit(X_train, y_train)
rf_reg.fit(X_train, y_train)

print("Decision Tree MSE:", mean_squared_error(y_test, dt_reg.predict(X_test)))
print("Random Forest MSE:", mean_squared_error(y_test, rf_reg.predict(X_test)))

param_dist = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 5, 10]
}

rand_search = RandomizedSearchCV(rf_reg, param_dist, cv=3, scoring='neg_mean_squared_error', n_iter=5, random_state=42)
rand_search.fit(X_train, y_train)

print("Best Params:", rand_search.best_params_)

Decision Tree MSE: 0.16666666666666666
Random Forest MSE: 0.06483333333333333
Best Params: {'n_estimators': 200, 'min_samples_split': 10, 'max_depth': None}
