# **1. Implement Classification Models:**


## Necessary Imports

In [4]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import classification_report, f1_score, mean_squared_error, r2_score

## loading wine dataset from scikit learn

In [5]:
# Load the wine dataset
data = load_wine()

# Convert to pandas DataFrame
df = pd.DataFrame(data.data, columns=data.feature_names)

# Add the target column
df['target'] = data.target

# Display the first five rows
print(df.head())

   alcohol  malic_acid   ash  alcalinity_of_ash  magnesium  total_phenols  \
0    14.23        1.71  2.43               15.6      127.0           2.80   
1    13.20        1.78  2.14               11.2      100.0           2.65   
2    13.16        2.36  2.67               18.6      101.0           2.80   
3    14.37        1.95  2.50               16.8      113.0           3.85   
4    13.24        2.59  2.87               21.0      118.0           2.80   

   flavanoids  nonflavanoid_phenols  proanthocyanins  color_intensity   hue  \
0        3.06                  0.28             2.29             5.64  1.04   
1        2.76                  0.26             1.28             4.38  1.05   
2        3.24                  0.30             2.81             5.68  1.03   
3        3.49                  0.24             2.18             7.80  0.86   
4        2.69                  0.39             1.82             4.32  1.04   

   od280/od315_of_diluted_wines  proline  target  
0          

## splitting into feature and target

In [3]:
# Define features (X) and target (y)
X = df.drop('target', axis=1)
y = df['target']

## Training the data

In [6]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

## Train a Decision Tree Classifier

In [7]:
# Train a Decision Tree Classifier
dtc_classifier = DecisionTreeClassifier(random_state=42)
dtc_classifier.fit(X_train, y_train)

### Evaluate decision tree classifier

In [8]:
# Predict and evaluate Decision Tree
dtc_predictions = dtc_classifier.predict(X_test)
print("Decision Tree Classifier\n")
print("Classification Report:\n", classification_report(y_test, dtc_predictions))

Decision Tree Classifier

Classification Report:
               precision    recall  f1-score   support

           0       0.95      0.95      0.95        19
           1       0.95      1.00      0.98        21
           2       1.00      0.93      0.96        14

    accuracy                           0.96        54
   macro avg       0.97      0.96      0.96        54
weighted avg       0.96      0.96      0.96        54



## Train a Random Forest Classifier

In [9]:
# Train a Random Forest Classifier
rfc_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rfc_classifier.fit(X_train, y_train)

### Evaluate Random Forest Classifier

In [10]:
# Predict and evaluate Random Forest
rfc_predictions = rfc_classifier.predict(X_test)
print("\nRandom Forest Classifier\n")
print("Classification Report:\n", classification_report(y_test, rfc_predictions))


Random Forest Classifier

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      1.00      1.00        21
           2       1.00      1.00      1.00        14

    accuracy                           1.00        54
   macro avg       1.00      1.00      1.00        54
weighted avg       1.00      1.00      1.00        54



## Compare the models based on their F1 scores.

In [11]:
# Evaluate models using F1 score
dtc_f1 = f1_score(y_test, dtc_predictions, average="weighted")
rfc_f1 = f1_score(y_test, rfc_predictions, average="weighted")

# Compare F1 Scores
print("\nF1 Score Comparison:")
print(f"Decision Tree F1 Score: {dtc_f1:.4f}")
print(f"Random Forest F1 Score: {rfc_f1:.4f}")


F1 Score Comparison:
Decision Tree F1 Score: 0.9628
Random Forest F1 Score: 1.0000


---
# 2. Hyperparameter Tuning:

## Identify three hyperparameters of the Random Forest Classifier.

In [12]:
# Define parameter grid for Random Forest Classifier
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
}


## Perform hyperparameter tuning using GridSearchCV to optimize these parameters.

In [13]:
# Perform GridSearchCV
grid_search = GridSearchCV(estimator=RandomForestClassifier(random_state=42),param_grid=param_grid,cv=5,scoring='f1_weighted', verbose=1, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Best hyperparameters and evaluation
best_params = grid_search.best_params_
best_score = grid_search.best_score_

print("\nBest Hyperparameters for Random Forest Classifier:", best_params)
print("\nBest F1 Score for Random Forest Classifier:", best_score)

Fitting 5 folds for each of 36 candidates, totalling 180 fits

Best Hyperparameters for Random Forest Classifier: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 50}

Best F1 Score for Random Forest Classifier: 0.9680809081527346


In [14]:
# Evaluate the best model on the test set
best_rf = grid_search.best_estimator_
y_pred = best_rf.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      1.00      1.00        21
           2       1.00      1.00      1.00        14

    accuracy                           1.00        54
   macro avg       1.00      1.00      1.00        54
weighted avg       1.00      1.00      1.00        54



---
# 3. Implement Regression Model:

## • Train a Decision Tree Regressor and a Random Forest Regressor using scikit-learn.

In [15]:
 # Using the alcohol feature as the target for regression
y_reg = df['alcohol']
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X, y_reg, test_size=0.3, random_state=42)

In [16]:
# Train Decision Tree Regressor
dt_regressor = DecisionTreeRegressor(random_state=42)
dt_regressor.fit(X_train_reg, y_train_reg)
dt_reg_predictions = dt_regressor.predict(X_test_reg)

In [17]:
# Train a Random Forest Regressor
rf_regressor = RandomForestRegressor(random_state=42)
rf_regressor.fit(X_train_reg, y_train_reg)
rf_reg_predictions = rf_regressor.predict(X_test_reg)

In [18]:
# Evaluate Regression Models
dt_mse = mean_squared_error(y_test_reg, dt_reg_predictions)
rf_mse = mean_squared_error(y_test_reg, rf_reg_predictions)
dt_r2 = r2_score(y_test_reg, dt_reg_predictions)
rf_r2 = r2_score(y_test_reg, rf_reg_predictions)

print("Decision Tree Regressor MSE:", dt_mse)
print("Decision Tree Regressor R2 Score:", dt_r2)
print("\nRandom Forest Regressor MSE:", rf_mse)
print("Random Forest Regressor R2 Score:", rf_r2)


Decision Tree Regressor MSE: 0.0017592592592592462
Decision Tree Regressor R2 Score: 0.9967228563711633

Random Forest Regressor MSE: 0.0013293277777777996
Random Forest Regressor R2 Score: 0.9975237316304281


## • Identify three parameters for Random Forest Regressio and Perform hyperparameter tuning using RandomSearchCV to optimize these parameters.

In [19]:
# Hyperparameter Tuning for Random Forest Regressor
param_distributions = {
    'n_estimators': [50, 100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Perform RandomizedSearchCV
random_search = RandomizedSearchCV(estimator=RandomForestRegressor(random_state=42), param_distributions=param_distributions,
                                   n_iter=50,
                                   cv=5,
                                   scoring='neg_mean_squared_error',
                                   verbose=1,
                                   n_jobs=-1,
                                   random_state=42)
random_search.fit(X_train_reg, y_train_reg)

# Best hyperparameters
best_params_reg = random_search.best_params_
print("\nBest Hyperparameters for Random Forest Regressor:", best_params_reg)


Fitting 5 folds for each of 50 candidates, totalling 250 fits

Best Hyperparameters for Random Forest Regressor: {'n_estimators': 50, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_depth': 30}


In [20]:
# model evaluation
best_rf_regressor = random_search.best_estimator_
optimized_reg_predictions = best_rf_regressor.predict(X_test_reg)

optimized_mse = mean_squared_error(y_test_reg, optimized_reg_predictions)
optimized_r2 = r2_score(y_test_reg, optimized_reg_predictions)

print("Optimized Random Forest Regressor MSE:", optimized_mse)
print("Optimized Random Forest Regressor R2 Score:", optimized_r2)

Optimized Random Forest Regressor MSE: 0.0013222566871488896
Optimized Random Forest Regressor R2 Score: 0.9975369036398869
