In [None]:
# Examples from https://machinelearningmastery.com/metrics-evaluate-machine-learning-algorithms-python/

import pandas as pd
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
names = ['preg', 'plas', 'pres', 'skin', 'test', 
         'mass', 'pedi', 'age', 'class']

### Read in the file __`data/pima-indians-diabetes.data.csv`__ and set the __`names`__ argument to the column names above

In [None]:
dataframe = pd.read_csv(
    'data/pima-indians-diabetes.data.csv', names=names)

### Examine the data

In [None]:
dataframe.head()

### Grab just the values from the dataframe into an array. Put all of the rows of data for the first 8 columns into a variable __`X`__ and put all of the labels into __`Y`__ from the 9th column 

In [None]:
array = dataframe.values
X = array[:, :8]
Y = array[:, 8]

### Create a __`KFold`__ instance with 10 splits and a __`random_state`__ of 7

In [None]:
kfold = model_selection.KFold(n_splits=10, random_state=7)

### Execute the __`model_selection.cross_val_score()`__ function on a LogisticRegression instance with your __`X`__ and __`Y`__ data, the kfold instance and the _accuracy_ scoring method

In [None]:
model = LogisticRegression()
results = model_selection.cross_val_score(
    model, X, Y, cv=kfold, scoring='accuracy')

### Print out the accuracy mean and std for the results

In [None]:
print(f'Accuracy: {results.mean():.3f} ({results.std():.3f})')

### Create a LogisticRegression model and re-run the __`model_selection.cross_val_score()`__ method with a *neg_log_loss* scoring

In [None]:
model = LogisticRegression()
results = model_selection.cross_val_score(
    model, X, Y, cv=kfold, scoring='neg_log_loss')

### Print out the logloss mean and std for the results

In [None]:
print(f'Logloss: {results.mean():.3f} ({results.std():.3f})')

### Create a LogisticRegression model and re-run the __`model_selection.cross_val_score()`__ method with a *roc_auc* scoring

In [None]:
model = LogisticRegression()
results = model_selection.cross_val_score(
    model, X, Y, cv=kfold, scoring='roc_auc')

### Print out the AUC mean and std for the results

In [None]:
print(f'AUC: {results.mean():.3f} ({results.std():.3f})')

### Now we will use a __`confusion_matrix`__

In [None]:
from sklearn.metrics import confusion_matrix

### Create training and test data using __`model_selection.train_test_split()`__ with a test size of 0.33 and a random seed of 7. Fit your training data to a LogisticRegression model and predict the results of your test values

In [None]:
X_train, X_test, Y_train, Y_test = model_selection.train_test_split(
    X, Y, test_size=0.33, random_state=7)
model = LogisticRegression()
model.fit(X_train, Y_train)
predicted = model.predict(X_test)

### Compare the actual test data results with the predicted results by invoking a confusion matrix

In [None]:
matrix = confusion_matrix(Y_test, predicted)
matrix

### Now we are going to use a __`classification_report`__

In [None]:
from sklearn.metrics import classification_report

### Generate a classification report with the actual test data results and the predicted results by creating a classification_report

In [None]:
report = classification_report(Y_test, predicted)
print(report)

### Regression models have different scoring options. Read the Boston housing data back in.

In [None]:
dataframe = pd.read_csv('data/Boston.csv')

### Put the values for the DataFrame into an array (or use .iloc on the DataFrame. Create your feature matrix from the first 12 columns. Create your target matrix from the 13th column.

In [None]:
X = dataframe.iloc[:, :12]
Y = dataframe.iloc[:, 12]

### Create a LinearRegression model and generate a __`cross_val_score`__ with a scoring type of *neg_mean_absolute_error*

In [None]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()
results = model_selection.cross_val_score(
    model, X, Y, cv=kfold, scoring='neg_mean_absolute_error')

### Print out the MAE mean and std for the results

In [None]:
print(f'MAE: {results.mean():.3f} ({results.std():.3f})')

### Create a LinearRegression model and generate a __`cross_val_score`__ with a scoring type of *neg_mean_squared_error*

In [None]:
model = LinearRegression() 
results = model_selection.cross_val_score(
    model, X, Y, cv=kfold, scoring='neg_mean_squared_error')

### Print out the MSE mean and std for the results

In [None]:
print(f'MSE: {results.mean():.3f} ({results.std():.3f})')

### Create a LinearRegression model and generate a __`cross_val_score`__ with a scoring type of _r2_

In [None]:
model = LinearRegression()
results = model_selection.cross_val_score(
    model, X, Y, cv=kfold, scoring='r2')

### Print out the $ r^2 $ mean and std for the results

In [None]:
print(f'r^2: {results.mean():.3f} ({results.std():.3f})')