In [1]:
%matplotlib inline
from matplotlib import pyplot as plt
from sklearn.datasets import make_regression
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report

In [2]:
# Read in the diabetes dataset
df = pd.read_csv('../Resources/diabetes.csv')
X = df.drop('Outcome', axis=1)
y = df['Outcome']
target_names = ["negative", "positive"]

In [3]:
# Prepare the data
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
scaler = StandardScaler().fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [4]:
# Import a Random Forests classifier
from sklearn.ensemble import RandomForestClassifier

In [7]:
# Fit the Random Forests classifier model, and then print a classification report and the training and testing scores
clf=RandomForestClassifier(random_state=42).fit(X_train_scaled,y_train)
y_pred=clf.predict(X_test_scaled)
print(classification_report(y_test,y_pred,target_names=target_names))
print(f'Training:{clf.score(X_train_scaled,y_train)}')
print(f'Testing:{clf.score(X_test_scaled,y_test)}')

              precision    recall  f1-score   support

    negative       0.83      0.89      0.85       123
    positive       0.77      0.67      0.71        69

    accuracy                           0.81       192
   macro avg       0.80      0.78      0.78       192
weighted avg       0.80      0.81      0.80       192

Training:1.0
Testing:0.8072916666666666


In [9]:
# Import an Extremely Random Trees classifier
from sklearn.ensemble import ExtraTreesClassifier

In [10]:
# Fit the Extremely Random Trees classifier model, and then print a classification report and the training and testing scores
clf=ExtraTreesClassifier(random_state=42).fit(X_train_scaled,y_train)
y_pred=clf.predict(X_test_scaled)
print(classification_report(y_test,y_pred,target_names=target_names))
print(f'Training:{clf.score(X_train_scaled,y_train)}')
print(f'Testing:{clf.score(X_test_scaled,y_test)}')

              precision    recall  f1-score   support

    negative       0.80      0.89      0.84       123
    positive       0.75      0.59      0.66        69

    accuracy                           0.78       192
   macro avg       0.77      0.74      0.75       192
weighted avg       0.78      0.78      0.77       192

Training:1.0
Testing:0.78125


In [11]:
# Import an Adaptive Boosting classifier
from sklearn.ensemble import AdaBoostClassifier

In [12]:
# Fit the Adaptive Boosting classifier model, and then print a classification report and the training and testing scores
clf=AdaBoostClassifier(random_state=42).fit(X_train_scaled,y_train)
y_pred=clf.predict(X_test_scaled)
print(classification_report(y_test,y_pred,target_names=target_names))
print(f'Training:{clf.score(X_train_scaled,y_train)}')
print(f'Testing:{clf.score(X_test_scaled,y_test)}')

              precision    recall  f1-score   support

    negative       0.83      0.85      0.84       123
    positive       0.73      0.70      0.71        69

    accuracy                           0.80       192
   macro avg       0.78      0.77      0.78       192
weighted avg       0.80      0.80      0.80       192

Training:0.8229166666666666
Testing:0.796875


In [13]:
# BONUS
# Create a function to simplify testing the different models
def model_tester(model,X,y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
    scaler = StandardScaler().fit(X_train)
    X_train_scaled = scaler.transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    clf=model.fit(X_train_scaled,y_train)
    print(classification_report(y_test,y_pred,target_names=target_names))
    print(f'Training:{clf.score(X_train_scaled,y_train)}')
    print(f'Testing:{clf.score(X_test_scaled,y_test)}')
    


In [14]:
model_tester(AdaBoostClassifier(random_state=1),X,y)

              precision    recall  f1-score   support

    negative       0.83      0.85      0.84       123
    positive       0.73      0.70      0.71        69

    accuracy                           0.80       192
   macro avg       0.78      0.77      0.78       192
weighted avg       0.80      0.80      0.80       192

Training:0.8229166666666666
Testing:0.796875


In [15]:
model_tester(AdaBoostClassifier(random_state=1,n_estimators=200),X,y)
model_tester(AdaBoostClassifier(random_state=1,n_estimators=500,learning_rate=0.2),X,y)
model_tester(AdaBoostClassifier(random_state=1,n_estimators=2000,learning_rate=0.2),X,y)

              precision    recall  f1-score   support

    negative       0.83      0.85      0.84       123
    positive       0.73      0.70      0.71        69

    accuracy                           0.80       192
   macro avg       0.78      0.77      0.78       192
weighted avg       0.80      0.80      0.80       192

Training:0.8819444444444444
Testing:0.7916666666666666
              precision    recall  f1-score   support

    negative       0.83      0.85      0.84       123
    positive       0.73      0.70      0.71        69

    accuracy                           0.80       192
   macro avg       0.78      0.77      0.78       192
weighted avg       0.80      0.80      0.80       192

Training:0.8454861111111112
Testing:0.8229166666666666
              precision    recall  f1-score   support

    negative       0.83      0.85      0.84       123
    positive       0.73      0.70      0.71        69

    accuracy                           0.80       192
   macro avg      