# Tuning Hyperparameters

## changing model's paramters called Hyperparameters, to enhance the overall model performance and get the best possible scores

### Now instead of train and test datasets we will need one other one called validation dataset, for tuning hyperparameters

# Hyperparameters can be changed:
## 1. Manually
## 2. Randomly with RandomSearchCV
## 3. Exhaustively with GridSearchCV

In [3]:
import pandas as pd
import numpy as np

In [4]:
dataset = pd.read_csv("./../heart.csv")

# Now for Manual Hyperparameter Tuning, We will create a function to do that!

In [5]:
from sklearn.metrics import accuracy_score, precision_score,recall_score ,f1_score
def evaluate_metrics(Y_actual,Y_pred):
    accuracy = accuracy_score(Y_actual,Y_pred)
    precision = precision_score(Y_actual,Y_pred,zero_division=0)
    recall = recall_score(Y_actual,Y_pred, zero_division=0)
    f1 = f1_score(Y_actual,Y_pred)
    scores = {"accuracy":accuracy,
              "precision":precision,
              "recall":recall,
              "f1":f1}
    # we can also print these scores
    print(f"Accuracy:{round(accuracy,2)}")
    print(f"precision:{round(precision,2)}")
    print(f"recall:{round(recall,2)}")
    print(f"f1_score:{round(f1,2)}")
    
    return scores
    

## step1 of Hypreparameter Tuning(split the dataset)

In [6]:
from sklearn.model_selection import train_test_split
# lets shuffle the data
mixed_dataset = dataset.sample(frac=1)
# X and Y dataset samples
X = dataset.drop("target",axis = 1)
Y = dataset["target"]

# train 70%, valid 15% and test 15%
X_temp,X_train,Y_temp,Y_train = train_test_split(X,Y,test_size=0.15,random_state=42,stratify=Y)

# Now let's split the temp into train and valid 70% and 15%
X_train,X_valid,Y_train, Y_valid = train_test_split(X_temp,Y_temp,test_size = 0.1765, random_state=42, stratify=Y_temp)

# Baseline/Initial Results without Tuning

In [11]:
from sklearn.ensemble import RandomForestClassifier
np.random.seed(5)
rfc_model = RandomForestClassifier()
rfc_model.fit(X_train,Y_train)
# baseline(first predictions)
y_predicted = rfc_model.predict(X_valid)
result = evaluate_metrics(Y_valid,y_predicted)

Accuracy:0.76
precision:0.75
recall:0.84
f1_score:0.79


# 1-Manually Tuning Hyperparameter

In [27]:
np.random.seed(5)
rfc_model2 = RandomForestClassifier(n_estimators=7,max_depth=9) # By default n_estimators = 100
rfc_model2.fit(X_train,Y_train)
# baseline(first predictions)
y_predicted = rfc_model2.predict(X_valid)
result = evaluate_metrics(Y_valid,y_predicted)

Accuracy:0.8
precision:0.77
recall:0.92
f1_score:0.84
