In [None]:
import boto3
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC

## Defining the s3 bucket
s3 = boto3.resource('s3')
bucket_name = 'data-445'
bucket = s3.Bucket(bucket_name)

## Defining the file to be read from s3 bucket
file_key = 'Chapter6/wine.csv'

bucket_object = bucket.Object(file_key)
file_object = bucket_object.get()
file_content_stream = file_object.get('Body')

## Reading the csv file
wine = pd.read_csv(file_content_stream)

## Defining input and targer 
X = wine.drop('Wine', axis = 1)
Y = wine['Wine']

## Standardizing input variables
scaler = MinMaxScaler(feature_range = (-1, 1))
X = scaler.fit_transform(X)

## Defining logistic model 
logit = LogisticRegression(max_iter = 5000, solver = 'lbfgs', multi_class = 'auto', random_state = 1000)
logit_scores = cross_val_score(logit, X, Y, cv = 10, n_jobs = -1)

## Defining decision tree model
tree = DecisionTreeClassifier(criterion = 'entropy', max_depth = 5, random_state = 1000)
tree_scores = cross_val_score(tree, X, Y, cv = 10, n_jobs = -1)

## Defining svc model
svc = SVC(kernel = 'rbf', gamma = 'scale', random_state = 1000)
svc_scores = cross_val_score(svc, X, Y, cv = 10, n_jobs = -1)

## Plotting cross-validation results
fig = plt.figure(figsize = (12, 8))
plt.plot(logit_scores, color = 'blue')
plt.plot(tree_scores, color = 'orange')
plt.plot(svc_scores, color = 'brown')
plt.xlabel('Fold Number')
plt.ylabel('10-fold Cross-Validation Accuracy')
plt.legend(['logistic', 'Decision-tree', 'SVC'], loc = 'lower right')
plt.grid()
plt.show()

# fig.savefig('chapter6_plot1.pdf')