In [1]:
import boto3
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.multiclass import OneVsRestClassifier, OneVsOneClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC

## Defining the s3 bucket
s3 = boto3.resource('s3')
bucket_name = 'data-448'
bucket = s3.Bucket(bucket_name)

## Defining the file to be read from s3 bucket
file_key = 'Chapter2/wine.csv'

bucket_object = bucket.Object(file_key)
file_object = bucket_object.get()
file_content_stream = file_object.get('Body')

## Reading the csv file
wine = pd.read_csv(file_content_stream)
wine.head()

Unnamed: 0,Wine,Alcohol,Malic.acid,Ash,Acl,Mg,Phenols,Flavanoids,Nonflavanoid.phenols,Proanth,Color.int,Hue,OD,Proline
0,1,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050
2,1,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185
3,1,14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480
4,1,13.24,2.59,2.87,21.0,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735


In [2]:
wine['Wine'].value_counts()

2    71
1    59
3    48
Name: Wine, dtype: int64

## Data Splitting

In [5]:
## Defining input and target variables
X = wine.drop(columns = 'Wine', axis = 1)
Y = wine['Wine']

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, stratify = Y)

## Standardizing the data 
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

## One-vs-all Classifier

In [6]:
## Building the classifier (using SVM) 
one_vs_all_svm = OneVsRestClassifier(estimator = SVC(kernel = 'rbf', probability = True)).fit(X_train, Y_train)

## Predicting on the test
one_vs_all_svm_pred = one_vs_all_svm.predict_proba(X_test)
one_vs_all_svm_pred

array([[1.19419411e-03, 9.96774955e-01, 2.03085072e-03],
       [2.36817884e-03, 9.92692129e-01, 4.93969191e-03],
       [9.58928994e-01, 3.50467514e-02, 6.02425445e-03],
       [9.53958937e-01, 4.23740214e-02, 3.66704175e-03],
       [4.12213078e-03, 9.84671727e-01, 1.12061418e-02],
       [1.00133711e-02, 1.19157550e-01, 8.70829079e-01],
       [2.43997170e-02, 3.55335550e-02, 9.40066728e-01],
       [3.07672204e-02, 9.26968227e-01, 4.22645529e-02],
       [3.69601174e-02, 9.51802803e-01, 1.12370797e-02],
       [9.04093086e-03, 4.69127591e-01, 5.21831478e-01],
       [9.75441277e-01, 1.73765539e-02, 7.18216859e-03],
       [1.13288488e-02, 2.26054580e-01, 7.62616572e-01],
       [5.08972523e-01, 4.41904541e-01, 4.91229356e-02],
       [1.59638280e-02, 3.87449749e-01, 5.96586423e-01],
       [7.56904370e-01, 2.20702893e-01, 2.23927370e-02],
       [2.39354197e-01, 7.13118469e-01, 4.75273343e-02],
       [2.66721921e-03, 1.27635262e-01, 8.69697519e-01],
       [8.64609397e-03, 4.05747