In [1]:
import numpy as np
import cv2
from skimage.color import rgb2hsv, hsv2rgb
from skimage import feature
from matplotlib import pyplot as plt
import scipy.misc
from skimage.io import imread
import os
import fnmatch

In [2]:
import pandas as pd
df = pd.read_csv("/Users/nehayadav/Downloads/MLProject/Dataset/SingleGenres.csv",delimiter=",").fillna("-NA-")
df.head()

Unnamed: 0,Genre,imdbId,Feature1
0,Comedy,113101,0
1,Drama,114117,1
2,Drama,110299,1
3,Comedy,115683,0
4,Drama,114753,1


In [3]:
columns = list(df.columns.values)
print(columns)

['Genre', 'imdbId', 'Feature1']


In [4]:
df.Genre.value_counts()

Drama          4205
Comedy         2408
Documentary    1559
Horror          715
Thriller        329
Western         292
Action          131
Sci-Fi           91
Short            85
Family           59
Adventure        51
Crime            45
Romance          45
Mystery          44
Musical          38
Fantasy          27
Animation        23
Music            17
Biography        14
War              12
Name: Genre, dtype: int64

In [5]:
uniqueGenre = df.Genre.unique()
print uniqueGenre.shape

(20,)


In [6]:
def getSIFTFeatures(image):
    sift = cv2.xfeatures2d.SIFT_create()
    keypoints, descriptors = sift.detectAndCompute(image, None)
    return keypoints, descriptors

In [7]:
def getBOWTrain():
    imageDir = "/Users/nehayadav/Downloads/MLProject/Dataset/MovieGenreFullPosters/"  
    inputData = []
    labels = []
    bow_train = cv2.BOWKMeansTrainer(20)
    detect = cv2.xfeatures2d.SIFT_create()
    for index, row in df.iterrows(): 
        filename = str(row[columns[1]])+".jpg"
        label = row[columns[2]]
        original_image = cv2.imread(imageDir+filename,0)
        keypoints, descriptors = getSIFTFeatures(original_image)
        bow_train.add(descriptors)
    return bow_train 

In [8]:
bow_train = getBOWTrain()

In [9]:
def loadBOWFeatures(bow_train):
    flann_params = dict(algorithm = 1, trees = 5) 
    matcher = cv2.FlannBasedMatcher(flann_params, dict(checks=50))
    voc = bow_train.cluster()
    extract = cv2.xfeatures2d.SIFT_create()
    detect = cv2.xfeatures2d.SIFT_create()
    bow_extract = cv2.BOWImgDescriptorExtractor( extract, cv2.BFMatcher(cv2.NORM_L2) )
    bow_extract.setVocabulary( voc )
    imageDir = "/Users/nehayadav/Downloads/MLProject/Dataset/MovieGenreFullPosters/"  
    inputData = []
    labels = []
    for index, row in df.iterrows(): 
        filename = str(row[columns[1]])+".jpg"
        label = row[columns[2]]
        original_image =cv2.imread(imageDir+filename,0) 
        bowFeatures = bow_extract.compute(original_image, detect.detect(original_image))
        inputData.extend(bowFeatures)
        labels.append(label)
    return inputData,labels

In [10]:
def loadSIFTFeatures():
    imageDir = "/Users/nehayadav/Downloads/MLProject/Dataset/MovieGenreFullPosters/"  
    inputData = []
    labels = []
    for index, row in df.iterrows(): 
        filename = str(row[columns[1]])+".jpg"
        label = row[columns[2]]
        original_image = cv2.imread(imageDir+filename,0)
        keypoints, descriptors = getSIFTFeatures(original_image)
        inputData.extend(descriptors)
        labels.append(label)
    return inputData,labels

In [11]:
t0 = time()
BOWFeatures,BOWLabels = loadBOWFeatures(bow_train)
test_time = time() - t0
print("Feature Extraction time:  %0.3fs" % test_time)
BOWFeatures,BOWLabels = np.asarray(BOWFeatures),np.asarray(BOWLabels)
print BOWFeatures.shape , BOWLabels.shape

(10190, 20) (10190,)


In [None]:
SIFTFeatures,SIFTLabels = loadSIFTFeatures()
SIFTFeatures,SIFTLabels = np.asarray(SIFTFeatures),np.asarray(SIFTLabels)
print (SIFTFeatures.shape,SIFTLabels.shape)

In [12]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(BOWFeatures, BOWLabels, test_size=0.25, random_state=42)
X_train, X_test, y_train, y_test = np.asarray(X_train), np.asarray(X_test), np.asarray(y_train), np.asarray(y_test)
print (X_train.shape, X_test.shape, y_train.shape, y_test.shape)

((7642, 20), (2548, 20), (7642,), (2548,))


In [13]:
from __future__ import print_function
from sklearn.metrics import confusion_matrix
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_regression,chi2,SelectPercentile,SelectFpr
from sklearn.model_selection import GridSearchCV
from pprint import pprint
from sklearn import metrics
from time import time

In [14]:
from sklearn.ensemble import RandomForestClassifier
print("Training: ")
clf = RandomForestClassifier(n_estimators=700)
#print(clf)
t0 = time()
param_grid = {'n_estimators': [100, 400, 500, 700, 1000],
              'max_features': ['auto', 'sqrt','log2'] }
#clf = GridSearchCV(RandomForestClassifier(), param_grid)
clf.fit(X_train, y_train)
train_time = time() - t0
print("train time: %0.3fs" % train_time)
t0 = time()
pred = clf.predict(X_test)
test_time = time() - t0
print("test time:  %0.3fs" % test_time)
score = metrics.accuracy_score(y_test, pred)
print("confusion matrix:")
print(metrics.confusion_matrix(y_test, pred))

print("Calculated Accuracy is {}".format(metrics.accuracy_score(y_test, pred)))
print("Precision Score is {}".format(metrics.precision_score(y_test, pred, average='weighted')))
print("Recall Score is {}".format(metrics.recall_score(y_test, pred, average='weighted')))
print("F1 Score is {}".format(metrics.f1_score(y_test, pred, average='weighted')))

Training: 
train time: 29.776s
test time:  1.463s
confusion matrix:
[[218 359  12   1   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0]
 [131 896  29   3   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0]
 [ 59 302  43   2   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0]
 [ 12 166   7   6   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0]
 [ 13  63   2   0   1   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0]
 [  4   6   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0]
 [ 18  42   0   0   0   0   1   0   0   0   0   0   0   0   0   0   0   0
    0   0]
 [  1   5   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0]
 [  1   8   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0]
 [  6  13   1   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0]
 [  2  26   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


In [20]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.feature_selection import RFECV
from sklearn.model_selection import StratifiedKFold
pipeline = Pipeline([
    ('rfe', RFECV(estimator=RandomForestClassifier(),scoring='accuracy',cv=StratifiedKFold(2),step=1)),
    ('clf', RandomForestClassifier()),
])
parameters = {
    'clf__n_estimators': [10,20,50,100],
    'clf__max_features': ['auto', 'sqrt', 'log2'],
    'clf__random_state':[300,400,700]
}
MLPModel = GridSearchCV(pipeline, parameters, n_jobs=-1, verbose=1)
print("Performing grid search...")
print("pipeline:", [name for name, _ in pipeline.steps])
print("parameters:")
pprint(parameters)
t0 = time()
MLPModel.fit(X_train, y_train)
print("done in %0.3fs" % (time() - t0))
print()

print("Best score: %0.3f" % MLPModel.best_score_)
print("Best parameters set:")
best_parameters = MLPModel.best_estimator_.get_params()
for param_name in sorted(parameters.keys()):
    print("\t%s: %r" % (param_name, best_parameters[param_name]))

predicted = MLPModel.predict(X_test)
print("Calculated Accuracy is {}".format(metrics.accuracy_score(y_test, predicted)))
print("Precision Score is {}".format(metrics.precision_score(y_test, predicted, average='weighted')))
print("Recall Score is {}".format(metrics.recall_score(y_test, predicted, average='weighted')))
print("F1 Score is {}".format(metrics.f1_score(y_test, predicted, average='weighted')))

Performing grid search...
pipeline: ['rfe', 'clf']
parameters:
{'clf__max_features': ['auto', 'sqrt', 'log2'],
 'clf__n_estimators': [10, 20, 50, 100],
 'clf__random_state': [300, 400, 700]}
Fitting 3 folds for each of 36 candidates, totalling 108 fits


[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:  2.7min
[Parallel(n_jobs=-1)]: Done 108 out of 108 | elapsed:  6.6min finished


done in 411.294s

Best score: 0.454
Best parameters set:
	clf__max_features: 'auto'
	clf__n_estimators: 100
	clf__random_state: 300
Calculated Accuracy is 0.451334379906
Precision Score is 0.456019676402
Recall Score is 0.451334379906
F1 Score is 0.376279529905


In [19]:
from sklearn.ensemble import AdaBoostClassifier
clf = AdaBoostClassifier()
print("Training: ")
print(clf)
t0 = time()
clf.fit(X_train, y_train)
train_time = time() - t0
print("train time: %0.3fs" % train_time)
t0 = time()
pred = clf.predict(X_test)
test_time = time() - t0
print("test time:  %0.3fs" % test_time)
score = metrics.accuracy_score(y_test, pred)
print("confusion matrix:")
print(metrics.confusion_matrix(y_test, pred))

print("Calculated Accuracy is {}".format(metrics.accuracy_score(y_test, pred)))
print("Precision Score is {}".format(metrics.precision_score(y_test, pred, average='weighted')))
print("Recall Score is {}".format(metrics.recall_score(y_test, pred, average='weighted')))
print("F1 Score is {}".format(metrics.f1_score(y_test, pred, average='weighted')))

Training: 
AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,
          learning_rate=1.0, n_estimators=50, random_state=None)
train time: 1.766s
test time:  0.067s
confusion matrix:
[[   0  568    0    0    0    0    0    0   16    0    0    0    0    0
     6    0    0    0    0    0]
 [   0 1017    0    0    0    0    0    0   18    0    0    0    0    0
    24    0    0    0    0    0]
 [   0  388    0    0    0    0    0    0   12    0    0    0    0    0
     6    0    0    0    0    0]
 [   0  185    0    0    0    0    0    0    1    0    0    0    0    0
     5    0    0    0    0    0]
 [   0   72    0    0    0    0    0    0    4    0    0    0    0    0
     3    0    0    0    0    0]
 [   0   10    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0]
 [   0   58    0    0    0    0    0    0    3    0    0    0    0    0
     0    0    0    0    0    0]
 [   0    6    0    0    0    0    0    0    0    0    0    0    0    0


In [22]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.feature_selection import RFECV
pipeline = Pipeline([
     ('rfe', RFECV(estimator=SVC(kernel="linear"),scoring='accuracy',cv=StratifiedKFold(2),step=1)),
    ('clf', AdaBoostClassifier()),
])
parameters = {
    'clf__n_estimators': [50,70,90],
    'clf__random_state': [1,20,40]
}
AdaBoostModel = GridSearchCV(pipeline, parameters, n_jobs=-1, verbose=1)
print("Performing grid search...")
print("pipeline:", [name for name, _ in pipeline.steps])
print("parameters:")
pprint(parameters)
t0 = time()
AdaBoostModel.fit(X_train, y_train)
print("done in %0.3fs" % (time() - t0))
print()

print("Best score: %0.3f" % AdaBoostModel.best_score_)
print("Best parameters set:")
best_parameters = AdaBoostModel.best_estimator_.get_params()
for param_name in sorted(parameters.keys()):
    print("\t%s: %r" % (param_name, best_parameters[param_name]))

predicted = AdaBoostModel.predict(X_test)
print("confusion matrix:")
print(metrics.confusion_matrix(y_test, pred))
print("Calculated Accuracy is {}".format(metrics.accuracy_score(y_test, predicted)))
print("Precision Score is {}".format(metrics.precision_score(y_test, predicted, average='weighted')))
print("Recall Score is {}".format(metrics.recall_score(y_test, predicted, average='weighted')))
print("F1 Score is {}".format(metrics.f1_score(y_test, predicted, average='weighted')))

Performing grid search...
pipeline: ['rfe', 'clf']
parameters:
{'clf__n_estimators': [50, 70, 90], 'clf__random_state': [1, 20, 40]}
Fitting 3 folds for each of 9 candidates, totalling 27 fits


[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:  4.3min finished


done in 308.742s

Best score: 0.407
Best parameters set:
	clf__n_estimators: 50
	clf__random_state: 1
confusion matrix:
[[   0  568    0    0    0    0    0    0   16    0    0    0    0    0
     6    0    0    0    0    0]
 [   0 1017    0    0    0    0    0    0   18    0    0    0    0    0
    24    0    0    0    0    0]
 [   0  388    0    0    0    0    0    0   12    0    0    0    0    0
     6    0    0    0    0    0]
 [   0  185    0    0    0    0    0    0    1    0    0    0    0    0
     5    0    0    0    0    0]
 [   0   72    0    0    0    0    0    0    4    0    0    0    0    0
     3    0    0    0    0    0]
 [   0   10    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0]
 [   0   58    0    0    0    0    0    0    3    0    0    0    0    0
     0    0    0    0    0    0]
 [   0    6    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0]
 [   0    9    0    0    0    0    0    

In [None]:
from sklearn.neural_network import MLPClassifier
#clf = MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(70, 20, 10), random_state=1)
print("Training: ")
#print(clf)
t0 = time()
param_grid = {'hidden_layer_sizes': [(70, 20, 10),(40, 20, 10),(90, 20, 10)],
              'activation' :['logistic', 'tanh', 'relu'],
              'solver': ['lbfgs', 'sgd', 'adam'],
                'alpha':[ 0.0001, 0.001, 0.01]
clf = GridSearchCV(MLPClassifier(), param_grid)
print(clf)
X_train = SelectKBest(chi2, k=2).fit_transform(X_train, y_train)
X_test = SelectKBest(chi2, k=2).fit_transform(X_test, y_test)
clf.fit(X_train, y_train)
train_time = time() - t0
print("train time: %0.3fs" % train_time)
t0 = time()
pred = clf.predict(X_test)
test_time = time() - t0
print("test time:  %0.3fs" % test_time)
score = metrics.accuracy_score(y_test, pred)
print("confusion matrix:")
print(metrics.confusion_matrix(y_test, pred))

print("Calculated Accuracy is {}".format(metrics.accuracy_score(y_test, pred)))
print("Precision Score is {}".format(metrics.precision_score(y_test, pred, average='weighted')))
print("Recall Score is {}".format(metrics.recall_score(y_test, pred, average='weighted')))
print("F1 Score is {}".format(metrics.f1_score(y_test, pred, average='weighted')))

Training: 
GridSearchCV(cv=None, error_score='raise',
       estimator=MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False),
       fit_params={}, iid=True, n_jobs=1,
       param_grid={'alpha': [0.0001, 0.001, 0.01], 'activation': ['identity', 'logistic', 'tanh', 'relu'], 'solver': ['lbfgs', 'sgd', 'adam'], 'learning_rate': ['constant', 'invscaling', 'adaptive'], 'hidden_layer_sizes': [(70, 20, 10), (40, 20, 10), (90, 20, 10)]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=0)




In [None]:
from sklearn.neural_network import MLPClassifier
from sklearn.feature_selection import RFECV
pipeline = Pipeline([
    ('rfe', RFECV(estimator=SVC(kernel="linear"),scoring='accuracy',cv=StratifiedKFold(2),step=1)),
    ('clf', MLPClassifier()),
])
parameters = {
    'clf__hidden_layer_sizes': [(70, 20, 10),(40, 20, 10),(90, 20, 10)],
    'clf__activation': ['identity', 'logistic', 'tanh', 'relu'],
    'clf__solver':['lbfgs', 'sgd', 'adam'],
    'clf__alpha':[ 0.0001, 0.001, 0.001],
    'clf__learning_rate':['constant', 'invscaling', 'adaptive'],
    'kbest__k':(1,2),
    'kbest__score_func':(f_regression,chi2)
}
MLPModel = GridSearchCV(pipeline, parameters, n_jobs=-1, verbose=1)
print("Performing grid search...")
print("pipeline:", [name for name, _ in pipeline.steps])
print("parameters:")
pprint(parameters)
t0 = time()
MLPModel.fit(X_train, y_train)
print("done in %0.3fs" % (time() - t0))
print()

print("Best score: %0.3f" % MLPModel.best_score_)
print("Best parameters set:")
best_parameters = MLPModel.best_estimator_.get_params()
for param_name in sorted(parameters.keys()):
    print("\t%s: %r" % (param_name, best_parameters[param_name]))

predicted = MLPModel.predict(X_test)
print("Calculated Accuracy is {}".format(metrics.accuracy_score(y_test, predicted)))
print("Precision Score is {}".format(metrics.precision_score(y_test, predicted, average='weighted')))
print("Recall Score is {}".format(metrics.recall_score(y_test, predicted, average='weighted')))
print("F1 Score is {}".format(metrics.f1_score(y_test, predicted, average='weighted')))

In [None]:
from sklearn.ensemble import VotingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
clf1 = LogisticRegression()
clf2 = KNeighborsClassifier()
clf3 = MLPClassifier()
eclf1 = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('knn', clf3)], voting='hard')
print("Training: ")
print(eclf1)
t0 = time()
eclf1 = eclf1.fit(X_train, y_train)
train_time = time() - t0
print("train time: %0.3fs" % train_time)
t0 = time()
pred = eclf1.predict(X_test)
test_time = time() - t0
print("test time:  %0.3fs" % test_time)
score = metrics.accuracy_score(y_test, pred)
print("confusion matrix:")
print(metrics.confusion_matrix(y_test, pred))

print("Calculated Accuracy is {}".format(metrics.accuracy_score(y_test, pred)))
print("Precision Score is {}".format(metrics.precision_score(y_test, pred, average='weighted')))
print("Recall Score is {}".format(metrics.recall_score(y_test, pred, average='weighted')))
print("F1 Score is {}".format(metrics.f1_score(y_test, pred, average='weighted')))

In [101]:
from sklearn.svm import SVC
from sklearn.feature_selection import RFECV
from sklearn.model_selection import StratifiedKFold
pipeline = Pipeline([
    ('rfe', RFECV(estimator=SVC(kernel="linear"),scoring='accuracy',cv=StratifiedKFold(2),step=1)),
    ('clf', SVC()),
])
parameters = {
    'clf__kernel':('linear','rbf','sigmoid','poly'),
    'clf__C': (0.001,0.0001,0.01)
}
SVC_clf = GridSearchCV(pipeline, parameters, n_jobs=-1, verbose=1)
print("Performing grid search...")
print("pipeline:", [name for name, _ in pipeline.steps])
print("parameters:")
pprint(parameters)
t0 = time()
SVC_clf.fit(X_train, y_train)
print("done in %0.3fs" % (time() - t0))
print()

print("Best score: %0.3f" % SVC_clf.best_score_)
print("Best parameters set:")
best_parameters = SVC_clf.best_estimator_.get_params()
for param_name in sorted(parameters.keys()):
    print("\t%s: %r" % (param_name, best_parameters[param_name]))

predicted = SVC_clf.predict(X_test)
print("Calculated Accuracy is {}".format(metrics.accuracy_score(y_test, predicted)))
print("Precision Score is {}".format(metrics.precision_score(y_test, predicted, average='weighted')))
print("Recall Score is {}".format(metrics.recall_score(y_test, predicted, average='weighted')))
print("F1 Score is {}".format(metrics.f1_score(y_test, predicted, average='weighted')))
print("confusion matrix:")
print(metrics.confusion_matrix(y_test, pred))

Performing grid search...
pipeline: ['rfe', 'clf']
parameters:
{'clf__C': (0.001, 0.0001, 0.01),
 'clf__kernel': ('linear', 'rbf', 'sigmoid', 'poly')}
Fitting 3 folds for each of 12 candidates, totalling 36 fits


[Parallel(n_jobs=-1)]: Done  36 out of  36 | elapsed:   40.7s finished


done in 43.849s

Best score: 0.505
Best parameters set:
	clf__C: 0.001
	clf__kernel: 'linear'
Calculated Accuracy is 0.542339696525
Precision Score is 0.294132346427
Recall Score is 0.542339696525
F1 Score is 0.381410589495
confusion matrix:
[[134 404   6]
 [170 926  12]
 [ 72 312   7]]


In [103]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.feature_selection import RFECV

pipeline = Pipeline([
    ('rfe', RFECV(estimator=SVC(kernel="linear"),scoring='accuracy',cv=StratifiedKFold(2),step=1)),
    ('clf', KNeighborsClassifier()),
])
parameters = {
    'clf__n_neighbors':(3,5,10,15),
    'clf__weights':('uniform','distance'),
    'clf__algorithm':('auto', 'ball_tree', 'kd_tree', 'brute')
}
KNNModel = GridSearchCV(pipeline, parameters, n_jobs=-1, verbose=1)
print("Performing grid search...")
print("pipeline:", [name for name, _ in pipeline.steps])
print("parameters:")
pprint(parameters)
t0 = time()
KNNModel.fit(X_train, y_train)
print("done in %0.3fs" % (time() - t0))
print()

print("Best score: %0.3f" % KNNModel.best_score_)
print("Best parameters set:")
best_parameters = KNNModel.best_estimator_.get_params()
for param_name in sorted(parameters.keys()):
    print("\t%s: %r" % (param_name, best_parameters[param_name]))

predicted = KNNModel.predict(X_test)
print("Calculated Accuracy is {}".format(metrics.accuracy_score(y_test, predicted)))
print("Precision Score is {}".format(metrics.precision_score(y_test, predicted, average='weighted')))
print("Recall Score is {}".format(metrics.recall_score(y_test, predicted, average='weighted')))
print("F1 Score is {}".format(metrics.f1_score(y_test, predicted, average='weighted')))

Performing grid search...
pipeline: ['clf']
parameters:
{'clf__algorithm': ('auto', 'ball_tree', 'kd_tree', 'brute'),
 'clf__n_neighbors': (3, 5, 10, 15),
 'clf__weights': ('uniform', 'distance')}
Fitting 3 folds for each of 32 candidates, totalling 96 fits


[Parallel(n_jobs=-1)]: Done  76 tasks      | elapsed:    5.6s


done in 11.343s

Best score: 0.471
Best parameters set:
	clf__algorithm: 'auto'
	clf__n_neighbors: 15
	clf__weights: 'uniform'
Calculated Accuracy is 0.493392070485
Precision Score is 0.45301706218
Recall Score is 0.493392070485
F1 Score is 0.449311337433


[Parallel(n_jobs=-1)]: Done  96 out of  96 | elapsed:   11.1s finished
