<a href="https://colab.research.google.com/github/nanthitha25/data-analytics/blob/main/ex9.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import libraries
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

# Load the dataset
iris = load_iris()
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
df['species'] = iris.target

# Check for null values
print(df.isnull().sum())  # No missing values in Iris

# Assign X (independent variables) and Y (dependent variable)
X = df.drop('species', axis=1)
Y = df['species']

# Split the dataset
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=42)

# Train Random Forest with different parameter sets
params_list = [
    {'n_estimators': 10, 'max_depth': 3},
    {'n_estimators': 50, 'max_depth': 5},
    {'n_estimators': 100, 'max_depth': None},
    {'n_estimators': 200, 'max_depth': 10}
]

for params in params_list:
    model = RandomForestClassifier(n_estimators=params['n_estimators'], max_depth=params['max_depth'], random_state=42)
    model.fit(X_train, Y_train)
    Y_pred = model.predict(X_test)

    print(f"Parameters: {params}")
    print("Confusion Matrix:\n", confusion_matrix(Y_test, Y_pred))
    print("Accuracy:", accuracy_score(Y_test, Y_pred))
    print("Classification Report:\n", classification_report(Y_test, Y_pred))
    print("-" * 50)

sepal length (cm)    0
sepal width (cm)     0
petal length (cm)    0
petal width (cm)     0
species              0
dtype: int64
Parameters: {'n_estimators': 10, 'max_depth': 3}
Confusion Matrix:
 [[19  0  0]
 [ 0 13  0]
 [ 0  0 13]]
Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      1.00      1.00        13
           2       1.00      1.00      1.00        13

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45

--------------------------------------------------
Parameters: {'n_estimators': 50, 'max_depth': 5}
Confusion Matrix:
 [[19  0  0]
 [ 0 13  0]
 [ 0  0 13]]
Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      1.00      1.00  

In [None]:
from sklearn.model_selection import GridSearchCV

# Define the parameter grid
param_grid = {
    'n_estimators': [50, 100, 150],
    'max_depth': [3, 5, None],
    'min_samples_split': [2, 4, 6]
}

# Initialize the model
rf = RandomForestClassifier(random_state=42)

# Grid search
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, Y_train)

# Best parameters and accuracy
print("Best Parameters:", grid_search.best_params_)

best_model = grid_search.best_estimator_
Y_pred_best = best_model.predict(X_test)

# Evaluation
print("Confusion Matrix:\n", confusion_matrix(Y_test, Y_pred_best))
print("Accuracy:", accuracy_score(Y_test, Y_pred_best))
print("Classification Report:\n", classification_report(Y_test, Y_pred_best))

Best Parameters: {'max_depth': 3, 'min_samples_split': 2, 'n_estimators': 150}
Confusion Matrix:
 [[19  0  0]
 [ 0 13  0]
 [ 0  0 13]]
Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      1.00      1.00        13
           2       1.00      1.00      1.00        13

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45

