In [19]:
import pandas as pd
import requests
import io
from sklearn.model_selection import train_test_split as tts
from sklearn.model_selection import GridSearchCV as gsc
from sklearn.ensemble import RandomForestClassifier as rfclib
from sklearn.metrics import accuracy_score

In [8]:
url = "https://raw.githubusercontent.com/pythonpandas303/Deep-Learning/main/diabetes2.csv"
download = requests.get(url).content

In [9]:
data = pd.read_csv(io.StringIO(download.decode('utf-8')))

In [10]:
X = data[['Age', 'BMI', 'Insulin', 'SkinThickness', 'BloodPressure', 'Glucose', 'Pregnancies', 'DiabetesPedigreeFunction']]
y = data['Diabetes']

In [11]:
x_train, x_test, y_train, y_test = tts(X, y, test_size=0.25, random_state=4, stratify=y)

In [12]:
param_grid = { 
    'n_estimators': [200, 500],
    'max_features': ['auto', 'sqrt', 'log2'],
    'max_depth' : [4,5,6,7,8],
    'criterion' :['gini', 'entropy']
}

In [13]:
rfc=rfclib(random_state=42)

In [14]:
CV_rfc = gsc(estimator=rfc, param_grid=param_grid, cv= 5)
CV_rfc.fit(x_train, y_train)

GridSearchCV(cv=5, estimator=RandomForestClassifier(random_state=42),
             param_grid={'criterion': ['gini', 'entropy'],
                         'max_depth': [4, 5, 6, 7, 8],
                         'max_features': ['auto', 'sqrt', 'log2'],
                         'n_estimators': [200, 500]})

In [15]:
CV_rfc.best_params_

{'criterion': 'gini',
 'max_depth': 8,
 'max_features': 'log2',
 'n_estimators': 500}

In [16]:
rfc1=rfclib(random_state=42, max_features='log2', n_estimators= 500, max_depth=8, criterion='gini')

In [17]:
rfc1.fit(x_train, y_train)

RandomForestClassifier(max_depth=8, max_features='log2', n_estimators=500,
                       random_state=42)

In [18]:
pred=rfc1.predict(x_test)

In [20]:
print("Accuracy for Random Forest on CV data: ",accuracy_score(y_test,pred))

Accuracy for Random Forest on CV data:  0.892


#### Given values expected to return [1] = Diabetic

In [23]:
rfc1.predict([[6,148,72,35,168,43.1,0.627,50]])

array([1], dtype=int64)

#### Given values expected to return [0] = non-Diabetic

In [25]:
rfc1.predict([[3,88,58,11,54,24.8,0.267,22]])

array([0], dtype=int64)