# Hackathon

In [258]:
# read in the dataset
import pandas as pd
url = './Classification Techniques/Datasets/wine.data'
wine = pd.read_csv(url, header=None)
wine.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,1,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050
2,1,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185
3,1,14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480
4,1,13.24,2.59,2.87,21.0,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735


In [259]:
# examine the response variable
wine[0].value_counts()

2    71
1    59
3    48
Name: 0, dtype: int64

In [260]:
# define X and y
X = wine.drop(0, axis=1)
y = wine[0]

In [261]:
# split into training and testing sets
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

# Naive Bayes

In [262]:
from sklearn import metrics
from sklearn.naive_bayes import GaussianNB

In [263]:
model = GaussianNB()
model.fit(X_train, y_train)
model

GaussianNB()

In [264]:
expected = y_test
predicted = model.predict(X_test)

In [265]:
print(metrics.classification_report(expected, predicted))
print(metrics.confusion_matrix(expected, predicted))

             precision    recall  f1-score   support

          1       1.00      1.00      1.00        18
          2       1.00      1.00      1.00        17
          3       1.00      1.00      1.00        10

avg / total       1.00      1.00      1.00        45

[[18  0  0]
 [ 0 17  0]
 [ 0  0 10]]


# multi nomial

In [266]:
from sklearn.naive_bayes import MultinomialNB
model = MultinomialNB(alpha=3.0)
model

MultinomialNB(alpha=3.0, class_prior=None, fit_prior=True)

In [267]:
model.fit(X_train, y_train)
expected = y_test
predicted = model.predict(X_test)
print(metrics.classification_report(expected, predicted))
print(metrics.confusion_matrix(expected, predicted))

             precision    recall  f1-score   support

          1       0.93      0.78      0.85        18
          2       0.76      0.94      0.84        17
          3       0.78      0.70      0.74        10

avg / total       0.83      0.82      0.82        45

[[14  2  2]
 [ 1 16  0]
 [ 0  3  7]]


# Decision Tree

In [268]:
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import matplotlib.pyplot as plt

from sklearn import datasets
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics
%pylab inline 
%matplotlib inline

Populating the interactive namespace from numpy and matplotlib


In [269]:
model = DecisionTreeClassifier(criterion='entropy', max_depth=11)
model.fit(X_train, y_train)

DecisionTreeClassifier(class_weight=None, criterion='entropy', max_depth=11,
            max_features=None, max_leaf_nodes=None, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            presort=False, random_state=None, splitter='best')

In [270]:
expected = y_test
predicted = model.predict(X_test)

In [271]:
# summarize the fit of the model
print(metrics.classification_report(expected, predicted))
print(metrics.confusion_matrix(expected, predicted))

             precision    recall  f1-score   support

          1       1.00      0.94      0.97        18
          2       0.94      1.00      0.97        17
          3       1.00      1.00      1.00        10

avg / total       0.98      0.98      0.98        45

[[17  1  0]
 [ 0 17  0]
 [ 0  0 10]]


# Random Forest

In [272]:
from sklearn.ensemble import RandomForestClassifier

forest = RandomForestClassifier(criterion='entropy',n_estimators=13)

forest.fit(X_train,y_train)
forest.score(X_test,y_test)

0.97777777777777775

In [273]:
expected = y_test
predicted = forest.predict(X_test)
# summarize the fit of the model
print(metrics.classification_report(expected, predicted))
print(metrics.confusion_matrix(expected, predicted))

             precision    recall  f1-score   support

          1       0.95      1.00      0.97        18
          2       1.00      0.94      0.97        17
          3       1.00      1.00      1.00        10

avg / total       0.98      0.98      0.98        45

[[18  0  0]
 [ 1 16  0]
 [ 0  0 10]]


# SVM

In [274]:
# load the iris datasets
from sklearn import svm

In [275]:
model = svm.SVC(kernel='linear', C=0.1)
model.fit(X_train, y_train)
expected = y_test
predicted = model.predict(X_test)
# summarize the fit of the model
print(metrics.classification_report(expected, predicted))
print(metrics.confusion_matrix(expected, predicted))

             precision    recall  f1-score   support

          1       1.00      0.94      0.97        18
          2       0.89      1.00      0.94        17
          3       1.00      0.90      0.95        10

avg / total       0.96      0.96      0.96        45

[[17  1  0]
 [ 0 17  0]
 [ 0  1  9]]


# Kernl SVM

In [276]:
model = svm.SVC(kernel='rbf', random_state=0, gamma=0.10, C=10)

In [277]:
model.fit(X_train, y_train)
expected = y_test
predicted = model.predict(X_test)
# summarize the fit of the model
print(metrics.classification_report(expected, predicted))
print(metrics.confusion_matrix(expected, predicted))

             precision    recall  f1-score   support

          1       0.00      0.00      0.00        18
          2       0.38      1.00      0.55        17
          3       0.00      0.00      0.00        10

avg / total       0.14      0.38      0.21        45

[[ 0 18  0]
 [ 0 17  0]
 [ 0 10  0]]


# Logistic

In [278]:
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
import warnings
warnings.filterwarnings('ignore')

In [279]:
model = LogisticRegression(C=1,penalty='l2')
model.fit(X_train, y_train)
expected = y_test
predicted = model.predict(X_test)
# summarize the fit of the model
print(metrics.classification_report(expected, predicted))
print(metrics.confusion_matrix(expected, predicted))

             precision    recall  f1-score   support

          1       1.00      0.94      0.97        18
          2       0.89      1.00      0.94        17
          3       1.00      0.90      0.95        10

avg / total       0.96      0.96      0.96        45

[[17  1  0]
 [ 0 17  0]
 [ 0  1  9]]


# KNN

In [280]:
from sklearn.neighbors import KNeighborsClassifier

In [281]:
model = KNeighborsClassifier(n_neighbors=11)
print(model)
model.fit(X_train, y_train)
expected = y_test
predicted = model.predict(X_test)
# summarize the fit of the model
print(metrics.classification_report(expected, predicted))
print(metrics.confusion_matrix(expected, predicted))

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=11, p=2,
           weights='uniform')
             precision    recall  f1-score   support

          1       0.93      0.78      0.85        18
          2       0.75      0.71      0.73        17
          3       0.43      0.60      0.50        10

avg / total       0.75      0.71      0.73        45

[[14  0  4]
 [ 1 12  4]
 [ 0  4  6]]
