In [None]:
import pandas as pd
df=pd.read_csv('../input/pima-indians-diabetes-database/diabetes.csv')
df.head()

In [None]:
import numpy as np
df['Glucose']=np.where(df['Glucose']==0,df['Glucose'].median(),df['Glucose'])
df.head()


In [None]:
X=df.drop('Outcome',axis=1)
y=df['Outcome']

In [None]:
pd.DataFrame(X,columns=df.columns[:-1])

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.20,random_state=0)

In [None]:
from sklearn.ensemble import RandomForestClassifier
rf_classifier=RandomForestClassifier(n_estimators=10).fit(X_train,y_train)
prediction=rf_classifier.predict(X_test)

In [None]:
y.value_counts()

In [None]:
from sklearn.metrics import confusion_matrix,classification_report,accuracy_score
print(confusion_matrix(y_test,prediction))
print(accuracy_score(y_test,prediction))
print(classification_report(y_test,prediction))

In [None]:
### Manual Hyperparameter Tuning
model=RandomForestClassifier(n_estimators=300,criterion='entropy',
                             max_features='sqrt',min_samples_leaf=10,random_state=100).fit(X_train,y_train)
predictions=model.predict(X_test)
print(confusion_matrix(y_test,predictions))
print(accuracy_score(y_test,predictions))
print(classification_report(y_test,predictions))

In [None]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'criterion': ['entropy', 'gini'],
    'max_depth': [10, 120, 230, 340, 450,560, 670, 780, 890,1000],
    'max_features': ['auto', 'sqrt', 'log2'],
    'min_samples_leaf': 'min_samples_leaf': [1, 2, 4, 6, 8], 
                         [1, 2, 4, 6, 8]+2, 
                        [1, 2, 4, 6, 8]+ 4],
    'min_samples_split': [2, 5, 10, 14] - 2,
                         [2, 5, 10, 14] - 1,
                        [2, 5, 10, 14], 
                        [2, 5, 10, 14] +1,
                        [2, 5, 10, 14] + 2],
    'n_estimators':[200, 400, 600, 800,1000, 1200, 1400, 1600,1800, 2000]
}

print(param_grid)


In [None]:

#### Fit the grid_search to the data
rf=RandomForestClassifier()
grid_search=GridSearchCV(estimator=rf,param_grid=param_grid,cv=10,n_jobs=-1,verbose=2)
grid_search.fit(X_train,y_train)

In [None]:
grid_search.best_estimator_

In [None]:
best_grid=grid_search.best_estimator_

In [None]:
best_grid

In [None]:

y_pred=best_grid.predict(X_test)
print(confusion_matrix(y_test,y_pred))
print("Accuracy Score {}".format(accuracy_score(y_test,y_pred)))
print("Classification report: {}".format(classification_report(y_test,y_pred)))