In [1]:
%matplotlib inline
from matplotlib import pyplot as plt
from sklearn.datasets import make_regression
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report

In [2]:
# Read in the diabetes dataset
df = pd.read_csv('../Resources/diabetes.csv')
X = df.drop('Outcome', axis=1)
y = df['Outcome']
target_names = ["negative", "positive"]

In [3]:
# Prepare the data
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
scaler = StandardScaler().fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [6]:
# Import a Random Forests classifier
from sklearn.ensemble import RandomForestClassifier

In [13]:
# Fit the Random Forests classifier model, and then print a classification report and the training and testing scores
clf = RandomForestClassifier(random_state=1).fit(X_train_scaled, y_train)
train_score = clf.score(X_train_scaled, y_train)
test_score = clf.score(X_test_scaled, y_test)
y_pred = clf.predict(X_test_scaled)
cr = classification_report(y_test, y_pred, target_names=target_names)
print(cr)
print(f"Train Score: {train_score:.3f}")
print(f"Test Score: {test_score:.3f}")

              precision    recall  f1-score   support

    negative       0.83      0.89      0.86       123
    positive       0.78      0.67      0.72        69

    accuracy                           0.81       192
   macro avg       0.80      0.78      0.79       192
weighted avg       0.81      0.81      0.81       192

Train Score: 1.000
Test Score: 0.812


In [9]:
# Import an Extremely Random Trees classifier
from sklearn.ensemble import ExtraTreesClassifier

In [14]:
# Fit the Extremely Random Trees classifier model, and then print a classification report and the training and testing scores
clf = ExtraTreesClassifier(random_state=1).fit(X_train_scaled, y_train)
train_score = clf.score(X_train_scaled, y_train)
test_score = clf.score(X_test_scaled, y_test)
y_pred = clf.predict(X_test_scaled)
cr = classification_report(y_test, y_pred, target_names=target_names)
print(cr)
print(f"Train Score: {train_score:.3f}")
print(f"Test Score: {test_score:.3f}")

              precision    recall  f1-score   support

    negative       0.82      0.86      0.84       123
    positive       0.73      0.65      0.69        69

    accuracy                           0.79       192
   macro avg       0.77      0.76      0.76       192
weighted avg       0.78      0.79      0.78       192

Train Score: 1.000
Test Score: 0.786


In [11]:
# Import an Adaptive Boosting classifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier

In [16]:
# Fit the Adaptive Boosting classifier model, and then print a classification report and the training and testing scores
clf = AdaBoostClassifier(random_state=1).fit(X_train_scaled, y_train)
train_score = clf.score(X_train_scaled, y_train)
test_score = clf.score(X_test_scaled, y_test)
y_pred = clf.predict(X_test_scaled)
cr = classification_report(y_test, y_pred, target_names=target_names)
print(cr)
print(f"Train Score: {train_score:.3f}")
print(f"Test Score: {test_score:.3f}")

              precision    recall  f1-score   support

    negative       0.83      0.85      0.84       123
    positive       0.73      0.70      0.71        69

    accuracy                           0.80       192
   macro avg       0.78      0.77      0.78       192
weighted avg       0.80      0.80      0.80       192

Train Score: 0.823
Test Score: 0.797
