In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import f1_score, mean_squared_error, accuracy_score,r2_score
from sklearn.datasets import load_wine
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor

In [8]:
#load the wine dataset
wine_data = load_wine()

X = wine_data.data
y = wine_data.target

# Extract features and target
df = pd.DataFrame(X, columns=wine_data.feature_names)
df['target'] = y

df.head()

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline,target
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0,0
1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0,0
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0,0
3,14.37,1.95,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480.0,0
4,13.24,2.59,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735.0,0


In [9]:
df['target'].value_counts()

Unnamed: 0_level_0,count
target,Unnamed: 1_level_1
1,71
0,59
2,48


In [10]:
X = df.drop('target',axis=1)
y = df['target']

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [12]:
dt_classifier = DecisionTreeClassifier()

In [13]:
dt_classifier.fit(X_train,y_train)

In [14]:
y_pred = dt_classifier.predict(X_test)

In [15]:
f1 = f1_score(y_test,y_pred,average='weighted')
f1

0.9439974457215836

In [16]:
rf_classifier = RandomForestClassifier()

In [17]:
rf_classifier.fit(X_train,y_train)

In [18]:
y_pred = rf_classifier.predict(X_test)

In [19]:
f1 = f1_score(y_test,y_pred,average='weighted')
f1

1.0

In [20]:
param_grid_rf = {
    'n_estimators': [50, 100, 200],  # Number of trees in the forest
    'max_depth': [None, 10, 20, 30],  # Maximum depth of each tree; None allows unlimited depth
    'min_samples_split': [2, 5, 10]  # Minimum samples required to split a node
}


In [21]:
# Initialize GridSearchCV with a Random Forest classifier and a parameter grid
grid_search_rf = GridSearchCV(estimator=RandomForestClassifier(),
                              param_grid=param_grid_rf,
                              scoring='f1_weighted',
                              cv=5)
grid_search_rf.fit(X_train, y_train)


In [22]:
print("Best Parameters for Random Forest Classifier:", grid_search_rf.best_params_)
print("Best F1 Score from GridSearchCV:", grid_search_rf.best_score_)

Best Parameters for Random Forest Classifier: {'max_depth': None, 'min_samples_split': 10, 'n_estimators': 50}
Best F1 Score from GridSearchCV: 0.9782952128219708


In [23]:
X = df.drop(['target', 'alcohol'],axis=1)
y= df['alcohol']

In [24]:
dt_r = DecisionTreeRegressor()

In [25]:
dt_r.fit(X_train,y_train)

In [26]:
y_pred = dt_r.predict(X_test)

In [27]:
mse = mean_squared_error(y_test,y_pred)
mse

0.16666666666666666

In [28]:
r2 = r2_score(y_test,y_pred)
r2

0.7142857142857142

In [29]:
rf_r = RandomForestRegressor()

In [30]:
rf_r.fit(X_train,y_train)

In [31]:
y_pred= rf_r.predict(X_test)

In [32]:
mse = mean_squared_error(y_test,y_pred)
mse

0.06872777777777776

In [33]:
r2_score = r2_score(y_test,y_pred)
r2_score

0.8821809523809524

In [34]:
# Define a parameter distribution for Random Forest hyperparameter tuning
param_dist_rf_r = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10]
}

In [35]:
#Initialize RandomizedSearchCV with a Random Forest Regressor and a parameter distribution
random_search_rf_r = RandomizedSearchCV(estimator=RandomForestRegressor(),
                                          param_distributions=param_dist_rf_r,
                                          scoring='neg_mean_squared_error',
                                          cv=5,
                                          n_iter=10,
                                          random_state=42)
random_search_rf_r.fit(X_train, y_train)

In [37]:
print("Best Parameters for Random Forest Regressor:", random_search_rf_r.best_params_)


Best Parameters for Random Forest Regressor: {'n_estimators': 50, 'min_samples_split': 5, 'max_depth': 30}
