### Importing Dataset

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv("Social_Network_Ads.csv")

df

Unnamed: 0,Age,EstimatedSalary,Purchased
0,19,19000,0
1,35,20000,0
2,26,43000,0
3,27,57000,0
4,19,76000,0
...,...,...,...
395,46,41000,1
396,51,23000,1
397,50,20000,1
398,36,33000,0


In [3]:
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

X, y

(array([[    19,  19000],
        [    35,  20000],
        [    26,  43000],
        [    27,  57000],
        [    19,  76000],
        [    27,  58000],
        [    27,  84000],
        [    32, 150000],
        [    25,  33000],
        [    35,  65000],
        [    26,  80000],
        [    26,  52000],
        [    20,  86000],
        [    32,  18000],
        [    18,  82000],
        [    29,  80000],
        [    47,  25000],
        [    45,  26000],
        [    46,  28000],
        [    48,  29000],
        [    45,  22000],
        [    47,  49000],
        [    48,  41000],
        [    45,  22000],
        [    46,  23000],
        [    47,  20000],
        [    49,  28000],
        [    47,  30000],
        [    29,  43000],
        [    31,  18000],
        [    31,  74000],
        [    27, 137000],
        [    21,  16000],
        [    28,  44000],
        [    27,  90000],
        [    35,  27000],
        [    33,  28000],
        [    30,  49000],
        [   

### Splitting data

In [4]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

### Performing feature scaling

In [5]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

X_train, X_test

(array([[ 0.76320735, -1.11966439],
        [ 1.70253947, -0.29721867],
        [-0.08219156, -0.00348805],
        [-1.11545689, -1.61900643],
        [-0.3639912 , -0.79656071],
        [ 0.19960808,  0.02588501],
        [ 1.04500698,  0.52522706],
        [-0.92759047,  0.2608695 ],
        [-0.08219156,  0.02588501],
        [-0.92759047,  0.55460012],
        [ 0.29354129,  0.49585399],
        [-0.92759047, -0.32659173],
        [ 0.95107377,  0.58397318],
        [-1.58512295,  0.05525807],
        [ 0.66927414, -1.11966439],
        [ 1.04500698, -1.23715663],
        [-1.77298938, -1.29590276],
        [-0.55185762,  0.87770379],
        [ 1.9843391 , -1.20778357],
        [-1.86692259, -0.76718765],
        [ 0.10567486, -0.2678456 ],
        [-1.2093901 , -1.413395  ],
        [ 0.19960808, -0.15035336],
        [-0.45792441,  2.31698381],
        [ 1.42073983,  0.99519604],
        [-0.83365726,  2.28761074],
        [ 0.95107377,  0.76021155],
        [-1.30332332, -0.444

### Implementing Grid Search

* Defining Hyper parameters in a dictionary for Random Forest Classifier
* Using the `GridSearchCV` function to implement grid search

In [9]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier

hyperparameters = {
    'n_estimators': [100, 200, 300],            # Number of trees
    'max_depth': [None, 10, 20, 30],            # Max depth of tree
    'min_samples_split': [2, 5, 10],            # Min samples to split an internal node
    'min_samples_leaf': [1, 2, 4],              # Min samples at a leaf node
    'max_features': ['sqrt', 'log2'],           # Number of features considered for splitting
    'bootstrap': [True, False],                 # Whether bootstrap samples are used
    'criterion': ['gini', 'entropy'],           # Function to measure the quality of a split
    'class_weight': [None, 'balanced']          # Handling class imbalance
}

classifier = RandomForestClassifier()
grid_search = GridSearchCV(estimator=classifier, param_grid=hyperparameters, cv=5, scoring='accuracy', n_jobs=-1)

### Trying out all combinations for Random Forest Classifier

In [10]:
grid_search.fit(X_train, y_train)

### Finding the best accuracy and best params from grid search

In [11]:
best_accuracy = grid_search.best_score_
best_params = grid_search.best_params_

best_accuracy, best_params

(0.9125,
 {'bootstrap': True,
  'class_weight': 'balanced',
  'criterion': 'entropy',
  'max_depth': 10,
  'max_features': 'log2',
  'min_samples_leaf': 4,
  'min_samples_split': 10,
  'n_estimators': 300})