In [1]:
# global Imports
import pandas as pd
import numpy as numpy

#sk learn imports
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.model_selection import cross_validate
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import make_scorer

#Data reporting
from IPython.display import display

# Global definitions:
overall_results = []
averaging_approach = 'macro'
zero_division_approach = 0
number_of_folds = 5
scoring = {'Accuracy': make_scorer(accuracy_score),
            'Precision': make_scorer(precision_score, average=averaging_approach, zero_division=zero_division_approach),
            'Recall': make_scorer(recall_score, average=averaging_approach, zero_division=zero_division_approach)}

# Helper functions
def parse_k_fold_results(results):
    return "m: " + str(numpy.average(results)) + " std: " + str(numpy.std(results))

def parse_argument_tuple_as_string(argumentsTuple):
    return "max Depth: " + str(argumentsTuple[0])  + \
           ", min Samples: " + str(argumentsTuple[1])

def calculate_results_holdout(classifier_used, X_train, X_test, y_train, y_test):
    classifier_used.fit(X_train, y_train)

    # predict the test set on our trained classifier
    y_test_predicted = classifier.predict(X_test)

    acc = metrics.accuracy_score(y_test, y_test_predicted)
    recall=metrics.recall_score(y_test, y_test_predicted)
    precision = metrics.precision_score(y_test, y_test_predicted)

    return pd.Series({
            'arguments': parse_argument_tuple_as_string(argumentTuple),
            'accuracy':acc,
            'precision':precision,
            'recall':recall
        })

def calculate_results_cross_validate(classifier_used, description_used, data, target):
   scores = cross_validate(classifier_used, data, target,
                                scoring = scoring,
                                cv = number_of_folds,
                                error_score = 0)

   return pd.Series({
            'classifier': str(classifier),
            'arguments': description_used,
            'mean_accuracy': numpy.average(scores.get('test_Accuracy')),
            'mean_precision': numpy.average(scores.get('test_Precision')),
            'mean_recall': numpy.average(scores.get('test_Recall')),
            'accuracy': parse_k_fold_results(scores.get('test_Accuracy')),
            'precision': parse_k_fold_results(scores.get('test_Precision')),
            'recall':parse_k_fold_results(scores.get('test_Recall'))
        })

def print_results(array, column_for_max):
    df = pd.DataFrame(array)
    display('Results', df)

    #best = decision_tree_results_df[decision_tree_results_df['mean_accuracy'].argmax()]
    best = df.iloc[df[column_for_max].idxmax()]
    display(best)

# Congressional Voting

In [2]:
votingDataLearn = pd.read_csv("data/voting/CongressionalVotingID.shuf.lrn.csv", na_values='unknown')
votingDataSolutionExample = pd.read_csv("data/voting/CongressionalVotingID.shuf.sol.ex.csv", na_values='unknown')
votingDataTest = pd.read_csv("data/voting/CongressionalVotingID.shuf.tes.csv", na_values='unknown')
display("Original Data", votingDataLearn)

#Recode values
votingDataLearn = votingDataLearn.dropna()
votingDataLearn = votingDataLearn.replace('y', 1)\
    .replace('n', 0)\
    .replace('democrat', 2)\
    .replace('republican', 3)
votingDataLearn = votingDataLearn[votingDataLearn.columns[1:17]].astype('category')

display("Recoded Data", votingDataLearn)

# Prepare a train/test set split
X_train, X_test, y_train, y_test = train_test_split(votingDataLearn[votingDataLearn.columns[2:17]],
                                                    votingDataLearn[votingDataLearn.columns[1]], test_size=0.33)

'Original Data'

Unnamed: 0,ID,class,handicapped-infants,water-project-cost-sharing,adoption-of-the-budget-resolution,physician-fee-freeze,el-salvador-aid,religious-groups-in-schools,anti-satellite-test-ban,aid-to-nicaraguan-contras,mx-missile,immigration,synfuels-crporation-cutback,education-spending,superfund-right-to-sue,crime,duty-free-exports,export-administration-act-south-africa
0,213,democrat,n,n,y,n,n,n,y,y,y,n,y,n,n,n,y,y
1,94,democrat,y,n,y,n,n,n,y,n,y,y,y,n,n,n,y,y
2,188,democrat,y,n,y,n,n,n,y,y,y,n,n,n,n,n,y,
3,61,democrat,y,y,y,n,n,,y,y,y,y,n,n,n,n,y,
4,184,democrat,,,,,,,,,y,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
213,250,democrat,y,n,y,n,n,n,y,y,,n,y,n,n,n,y,y
214,26,democrat,y,n,y,n,n,n,y,y,y,y,n,n,n,n,y,y
215,110,democrat,y,,y,n,n,n,y,y,y,n,n,n,n,n,y,
216,34,republican,n,y,n,y,y,y,n,n,n,n,n,y,y,y,n,y


'Recoded Data'

Unnamed: 0,class,handicapped-infants,water-project-cost-sharing,adoption-of-the-budget-resolution,physician-fee-freeze,el-salvador-aid,religious-groups-in-schools,anti-satellite-test-ban,aid-to-nicaraguan-contras,mx-missile,immigration,synfuels-crporation-cutback,education-spending,superfund-right-to-sue,crime,duty-free-exports
0,2,0,0,1,0,0,0,1,1,1,0,1,0,0,0,1
1,2,1,0,1,0,0,0,1,0,1,1,1,0,0,0,1
5,2,0,0,1,0,0,0,1,1,1,1,0,0,1,0,1
8,3,0,0,0,1,1,1,0,0,0,1,0,1,1,1,0
9,3,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
207,3,0,0,0,1,1,1,0,0,0,0,0,1,1,1,0
212,3,1,1,1,1,1,1,1,1,0,1,0,0,1,1,0
214,2,1,0,1,0,0,0,1,1,1,1,0,0,0,0,1
216,3,0,1,0,1,1,1,0,0,0,0,0,1,1,1,0


## Neighbours

In [3]:
from sklearn import neighbors

knn_results = []

n_neighbors = range(1,10,1)

for n in n_neighbors:
    classifier = neighbors.KNeighborsClassifier(n)
    description = "N = " + str(n)
    result = calculate_results_cross_validate(classifier,
                                              description,
                                              votingDataLearn[votingDataLearn.columns[2:17]],
                                              votingDataLearn[votingDataLearn.columns[1]])
    knn_results.append(result)
    overall_results.append(result)

print_results(knn_results, "mean_accuracy")

'Results'

Unnamed: 0,classifier,arguments,mean_accuracy,mean_precision,mean_recall,accuracy,precision,recall
0,KNeighborsClassifier(n_neighbors=1),N = 1,0.591304,0.540594,0.538393,m: 0.591304347826087 std: 0.05897678246195886,m: 0.5405944055944055 std: 0.08137150458617437,m: 0.5383928571428572 std: 0.06301938387325104
1,KNeighborsClassifier(n_neighbors=2),N = 2,0.678261,0.610393,0.578274,m: 0.6782608695652173 std: 0.05897678246195885,m: 0.6103928939997051 std: 0.15449372546140622,m: 0.5782738095238095 std: 0.07604245293601514
2,KNeighborsClassifier(n_neighbors=3),N = 3,0.608696,0.547284,0.54369,m: 0.608695652173913 std: 0.07275304578557178,m: 0.5472840802987862 std: 0.09930513533310184,m: 0.5436904761904763 std: 0.0816831511892668
3,KNeighborsClassifier(n_neighbors=4),N = 4,0.66087,0.589899,0.58244,m: 0.6608695652173913 std: 0.08430747578115352,m: 0.5898991965207134 std: 0.158902797566028,m: 0.5824404761904762 std: 0.10085553811844443
4,KNeighborsClassifier(),N = 5,0.66087,0.603157,0.58869,m: 0.6608695652173913 std: 0.05070392952039388,m: 0.603157096171802 std: 0.07833256543097623,m: 0.5886904761904763 std: 0.06293668307988201
5,KNeighborsClassifier(n_neighbors=6),N = 6,0.713043,0.719294,0.610357,m: 0.7130434782608696 std: 0.05217391304347825,m: 0.7192940685045949 std: 0.12556545248847883,m: 0.6103571428571429 std: 0.05861762599336061
6,KNeighborsClassifier(n_neighbors=7),N = 7,0.695652,0.678699,0.60994,m: 0.6956521739130436 std: 0.07275304578557179,m: 0.6786988304093567 std: 0.12866800837714396,m: 0.6099404761904762 std: 0.07653465786736936
7,KNeighborsClassifier(n_neighbors=8),N = 8,0.695652,0.695921,0.604107,m: 0.6956521739130436 std: 0.07275304578557179,m: 0.695921052631579 std: 0.14675595892656465,m: 0.604107142857143 std: 0.06890954997021462
8,KNeighborsClassifier(n_neighbors=9),N = 9,0.686957,0.655219,0.603274,m: 0.6869565217391305 std: 0.08430747578115352,m: 0.655219298245614 std: 0.14566583838622166,m: 0.6032738095238095 std: 0.08913837048639896


classifier                   KNeighborsClassifier(n_neighbors=6)
arguments                                                  N = 6
mean_accuracy                                           0.713043
mean_precision                                          0.719294
mean_recall                                             0.610357
accuracy          m: 0.7130434782608696 std: 0.05217391304347825
precision         m: 0.7192940685045949 std: 0.12556545248847883
recall            m: 0.6103571428571429 std: 0.05861762599336061
Name: 5, dtype: object

## Bayes

In [4]:
from sklearn import naive_bayes

bayes_results = []

alphas = range(0,10,1)

for alpha in alphas:
    classifier = naive_bayes.CategoricalNB(alpha = alpha)
    description = "Alpha = " + str(alpha)
    result = calculate_results_cross_validate(classifier,
                                              description,
                                              votingDataLearn[votingDataLearn.columns[2:17]],
                                              votingDataLearn[votingDataLearn.columns[1]])
    bayes_results.append(result)
    overall_results.append(result)

print_results(bayes_results, "mean_accuracy")

  'setting alpha = %.1e' % _ALPHA_MIN)
  'setting alpha = %.1e' % _ALPHA_MIN)
  'setting alpha = %.1e' % _ALPHA_MIN)
  'setting alpha = %.1e' % _ALPHA_MIN)
  'setting alpha = %.1e' % _ALPHA_MIN)


'Results'

Unnamed: 0,classifier,arguments,mean_accuracy,mean_precision,mean_recall,accuracy,precision,recall
0,CategoricalNB(alpha=0),Alpha = 0,0.669565,0.6511,0.660298,m: 0.6695652173913043 std: 0.07064381221422576,m: 0.6510997335997335 std: 0.08349457091612156,m: 0.6602976190476191 std: 0.0944572903762088
1,CategoricalNB(alpha=1),Alpha = 1,0.669565,0.6511,0.660298,m: 0.6695652173913043 std: 0.07064381221422576,m: 0.6510997335997335 std: 0.08349457091612156,m: 0.6602976190476191 std: 0.0944572903762088
2,CategoricalNB(alpha=2),Alpha = 2,0.669565,0.6511,0.660298,m: 0.6695652173913043 std: 0.07064381221422576,m: 0.6510997335997335 std: 0.08349457091612156,m: 0.6602976190476191 std: 0.0944572903762088
3,CategoricalNB(alpha=3),Alpha = 3,0.669565,0.6511,0.660298,m: 0.6695652173913043 std: 0.07064381221422576,m: 0.6510997335997335 std: 0.08349457091612156,m: 0.6602976190476191 std: 0.0944572903762088
4,CategoricalNB(alpha=4),Alpha = 4,0.669565,0.6511,0.660298,m: 0.6695652173913043 std: 0.07064381221422576,m: 0.6510997335997335 std: 0.08349457091612156,m: 0.6602976190476191 std: 0.0944572903762088
5,CategoricalNB(alpha=5),Alpha = 5,0.678261,0.658104,0.666964,m: 0.6782608695652174 std: 0.085199643227241,m: 0.6581043956043956 std: 0.09447004687734278,m: 0.6669642857142857 std: 0.10496766859965583
6,CategoricalNB(alpha=6),Alpha = 6,0.678261,0.658104,0.666964,m: 0.6782608695652174 std: 0.085199643227241,m: 0.6581043956043956 std: 0.09447004687734278,m: 0.6669642857142857 std: 0.10496766859965583
7,CategoricalNB(alpha=7),Alpha = 7,0.678261,0.658104,0.666964,m: 0.6782608695652174 std: 0.085199643227241,m: 0.6581043956043956 std: 0.09447004687734278,m: 0.6669642857142857 std: 0.10496766859965583
8,CategoricalNB(alpha=8),Alpha = 8,0.678261,0.658104,0.666964,m: 0.6782608695652174 std: 0.085199643227241,m: 0.6581043956043956 std: 0.09447004687734278,m: 0.6669642857142857 std: 0.10496766859965583
9,CategoricalNB(alpha=9),Alpha = 9,0.678261,0.658104,0.666964,m: 0.6782608695652174 std: 0.085199643227241,m: 0.6581043956043956 std: 0.09447004687734278,m: 0.6669642857142857 std: 0.10496766859965583


classifier                                CategoricalNB(alpha=5)
arguments                                              Alpha = 5
mean_accuracy                                           0.678261
mean_precision                                          0.658104
mean_recall                                             0.666964
accuracy            m: 0.6782608695652174 std: 0.085199643227241
precision         m: 0.6581043956043956 std: 0.09447004687734278
recall            m: 0.6669642857142857 std: 0.10496766859965583
Name: 5, dtype: object

## Perceptron

In [5]:
from sklearn import linear_model

perceptron_results = []

classifier = linear_model.Perceptron()
description = "No additional args."
result = calculate_results_cross_validate(classifier,
                                          description,
                                          votingDataLearn[votingDataLearn.columns[2:17]],
                                          votingDataLearn[votingDataLearn.columns[1]])
perceptron_results.append(result)
overall_results.append(result)

print_results(perceptron_results, "mean_accuracy")

'Results'

Unnamed: 0,classifier,arguments,mean_accuracy,mean_precision,mean_recall,accuracy,precision,recall
0,Perceptron(),No additional args.,0.652174,0.605439,0.624464,m: 0.6521739130434783 std: 0.13187609467915742,m: 0.6054388422035479 std: 0.18708629019311446,m: 0.6244642857142857 std: 0.15862491054338837


classifier                                          Perceptron()
arguments                                    No additional args.
mean_accuracy                                           0.652174
mean_precision                                          0.605439
mean_recall                                             0.624464
accuracy          m: 0.6521739130434783 std: 0.13187609467915742
precision         m: 0.6054388422035479 std: 0.18708629019311446
recall            m: 0.6244642857142857 std: 0.15862491054338837
Name: 0, dtype: object

## Decision Tree

In [6]:
from sklearn import tree
import itertools

# Parameters for the decision tree
max_depth_arguments = range(1, 10, 2)#[5,10,15]
min_samples_leaf_arguments = [2,20,50,100]
argumentTuples = list(itertools.product(max_depth_arguments,
                                        min_samples_leaf_arguments))
decision_tree_results = []

for argumentTuple in argumentTuples:
    max_depth = argumentTuple[0]
    min_samples_leaf = argumentTuple[1]

    classifier = tree.DecisionTreeClassifier(criterion = 'gini',
                                             max_depth = max_depth,
                                             min_samples_leaf = min_samples_leaf,
                                             splitter = 'best')

    #result = calculate_results_holdout(classifier, X_train, X_test, y_train, y_test)
    result = calculate_results_cross_validate(classifier,
                                              parse_argument_tuple_as_string(argumentTuple),
                                              votingDataLearn[votingDataLearn.columns[2:17]],
                                              votingDataLearn[votingDataLearn.columns[1]])
    decision_tree_results.append(result)
    overall_results.append(result)

print_results(decision_tree_results, "mean_accuracy")

'Results'

Unnamed: 0,classifier,arguments,mean_accuracy,mean_precision,mean_recall,accuracy,precision,recall
0,"DecisionTreeClassifier(max_depth=1, min_sample...","max Depth: 1, min Samples: 2",0.704348,0.678831,0.680298,m: 0.7043478260869565 std: 0.11135868239013649,m: 0.6788311688311688 std: 0.11844489384109545,m: 0.6802976190476191 std: 0.13145121440560176
1,"DecisionTreeClassifier(max_depth=1, min_sample...","max Depth: 1, min Samples: 20",0.704348,0.678831,0.680298,m: 0.7043478260869565 std: 0.11135868239013649,m: 0.6788311688311688 std: 0.11844489384109545,m: 0.6802976190476191 std: 0.13145121440560176
2,"DecisionTreeClassifier(max_depth=1, min_sample...","max Depth: 1, min Samples: 50",0.66087,0.330435,0.5,m: 0.6608695652173914 std: 0.017391304347826077,m: 0.3304347826086957 std: 0.008695652173913038,m: 0.5 std: 0.0
3,"DecisionTreeClassifier(max_depth=1, min_sample...","max Depth: 1, min Samples: 100",0.66087,0.330435,0.5,m: 0.6608695652173914 std: 0.017391304347826077,m: 0.3304347826086957 std: 0.008695652173913038,m: 0.5 std: 0.0
4,"DecisionTreeClassifier(max_depth=3, min_sample...","max Depth: 3, min Samples: 2",0.678261,0.620957,0.601012,m: 0.6782608695652174 std: 0.058976782461958845,m: 0.6209571417499806 std: 0.17647325328412466,m: 0.6010119047619048 std: 0.06221066816519568
5,"DecisionTreeClassifier(max_depth=3, min_sample...","max Depth: 3, min Samples: 20",0.704348,0.672284,0.674464,m: 0.7043478260869565 std: 0.11135868239013649,m: 0.6722835497835498 std: 0.12922232997475017,m: 0.6744642857142857 std: 0.1402313746469532
6,"DecisionTreeClassifier(max_depth=3, min_sample...","max Depth: 3, min Samples: 50",0.66087,0.330435,0.5,m: 0.6608695652173914 std: 0.017391304347826077,m: 0.3304347826086957 std: 0.008695652173913038,m: 0.5 std: 0.0
7,"DecisionTreeClassifier(max_depth=3, min_sample...","max Depth: 3, min Samples: 100",0.66087,0.330435,0.5,m: 0.6608695652173914 std: 0.017391304347826077,m: 0.3304347826086957 std: 0.008695652173913038,m: 0.5 std: 0.0
8,"DecisionTreeClassifier(max_depth=5, min_sample...","max Depth: 5, min Samples: 2",0.669565,0.632017,0.59619,m: 0.6695652173913043 std: 0.07064381221422576,m: 0.6320168067226891 std: 0.08986614228091011,m: 0.5961904761904762 std: 0.07000931262964437
9,"DecisionTreeClassifier(max_depth=5, min_sample...","max Depth: 5, min Samples: 20",0.704348,0.672284,0.674464,m: 0.7043478260869565 std: 0.11135868239013649,m: 0.6722835497835498 std: 0.12922232997475017,m: 0.6744642857142857 std: 0.1402313746469532


classifier        DecisionTreeClassifier(max_depth=1, min_sample...
arguments                              max Depth: 1, min Samples: 2
mean_accuracy                                              0.704348
mean_precision                                             0.678831
mean_recall                                                0.680298
accuracy             m: 0.7043478260869565 std: 0.11135868239013649
precision            m: 0.6788311688311688 std: 0.11844489384109545
recall               m: 0.6802976190476191 std: 0.13145121440560176
Name: 0, dtype: object

## SVM

In [7]:
from sklearn import svm
import itertools

# Parameters for the decision tree
kernels = {"linear", "poly", "sigmoid", "rbf"}
gamma = [0.001]#numpy.arange(0.001, 1., 0.1)
gamma.append ("scale")
gamma.append ("auto")
c = range(1, 302, 100)
argumentTuples = list(itertools.product(kernels,
                                        gamma,
                                        c))

svc_results = []

for argumentTuple in argumentTuples:
    kernel = argumentTuple[0]
    gamma = argumentTuple[1]
    c = argumentTuple[2]

    classifier = svm.SVC(kernel = kernel, gamma=gamma, C=c)

    #result = calculate_results_holdout(classifier, X_train, X_test, y_train, y_test)
    result = calculate_results_cross_validate(classifier,
                                              "Kernel: " + kernel,
                                              votingDataLearn[votingDataLearn.columns[2:17]],
                                              votingDataLearn[votingDataLearn.columns[1]])
    svc_results.append(result)
    overall_results.append(result)

print_results(svc_results, "mean_accuracy")

'Results'

Unnamed: 0,classifier,arguments,mean_accuracy,mean_precision,mean_recall,accuracy,precision,recall
0,"SVC(C=1, gamma=0.001, kernel='sigmoid')",Kernel: sigmoid,0.66087,0.330435,0.5,m: 0.6608695652173914 std: 0.017391304347826077,m: 0.3304347826086957 std: 0.008695652173913038,m: 0.5 std: 0.0
1,"SVC(C=101, gamma=0.001, kernel='sigmoid')",Kernel: sigmoid,0.678261,0.589693,0.606012,m: 0.6782608695652174 std: 0.034782608695652154,m: 0.5896934570342836 std: 0.14172111156098502,m: 0.6060119047619048 std: 0.0681838417895262
2,"SVC(C=201, gamma=0.001, kernel='sigmoid')",Kernel: sigmoid,0.73913,0.718262,0.706964,m: 0.7391304347826086 std: 0.07275304578557179,m: 0.7182623448103325 std: 0.08439054982659913,m: 0.7069642857142858 std: 0.10141879004926399
3,"SVC(C=301, gamma=0.001, kernel='sigmoid')",Kernel: sigmoid,0.721739,0.687294,0.687381,m: 0.7217391304347827 std: 0.07064381221422575,m: 0.6872936867054513 std: 0.08378548701141678,m: 0.6873809523809524 std: 0.0994780881535011
4,"SVC(C=1, kernel='sigmoid')",Kernel: sigmoid,0.652174,0.602928,0.566726,m: 0.6521739130434783 std: 0.0727530457855718,m: 0.6029281537176274 std: 0.15030365697540057,m: 0.5667261904761904 std: 0.083044226394223
5,"SVC(C=101, kernel='sigmoid')",Kernel: sigmoid,0.591304,0.562271,0.545179,m: 0.5913043478260869 std: 0.13072431633367743,m: 0.5622710622710623 std: 0.136185887506513,m: 0.5451785714285714 std: 0.131719129735669
6,"SVC(C=201, kernel='sigmoid')",Kernel: sigmoid,0.6,0.563346,0.557679,m: 0.5999999999999999 std: 0.1179535649239177,m: 0.5633455433455433 std: 0.12485848188201817,m: 0.5576785714285715 std: 0.12504851666163172
7,"SVC(C=301, kernel='sigmoid')",Kernel: sigmoid,0.6,0.563346,0.557679,m: 0.5999999999999999 std: 0.1179535649239177,m: 0.5633455433455433 std: 0.12485848188201817,m: 0.5576785714285715 std: 0.12504851666163172
8,"SVC(C=1, gamma='auto', kernel='sigmoid')",Kernel: sigmoid,0.695652,0.633937,0.627381,m: 0.6956521739130436 std: 0.047628048478710105,m: 0.6339369397297786 std: 0.17864363771642802,m: 0.6273809523809524 std: 0.08294467188496384
9,"SVC(C=101, gamma='auto', kernel='sigmoid')",Kernel: sigmoid,0.756522,0.741915,0.706429,m: 0.7565217391304347 std: 0.11536086227323131,m: 0.7419148361098825 std: 0.14954586301740702,m: 0.7064285714285715 std: 0.1290134597207425


classifier            SVC(C=301, gamma='auto', kernel='sigmoid')
arguments                                        Kernel: sigmoid
mean_accuracy                                           0.773913
mean_precision                                          0.763725
mean_recall                                             0.717976
accuracy           m: 0.773913043478261 std: 0.08430747578115354
precision         m: 0.7637254901960784 std: 0.10712595754750129
recall             m: 0.7179761904761905 std: 0.0988270208260447
Name: 11, dtype: object

## Overall Results

In [8]:
print_results(overall_results, "mean_accuracy")

'Results'

Unnamed: 0,classifier,arguments,mean_accuracy,mean_precision,mean_recall,accuracy,precision,recall
0,KNeighborsClassifier(n_neighbors=1),N = 1,0.591304,0.540594,0.538393,m: 0.591304347826087 std: 0.05897678246195886,m: 0.5405944055944055 std: 0.08137150458617437,m: 0.5383928571428572 std: 0.06301938387325104
1,KNeighborsClassifier(n_neighbors=2),N = 2,0.678261,0.610393,0.578274,m: 0.6782608695652173 std: 0.05897678246195885,m: 0.6103928939997051 std: 0.15449372546140622,m: 0.5782738095238095 std: 0.07604245293601514
2,KNeighborsClassifier(n_neighbors=3),N = 3,0.608696,0.547284,0.543690,m: 0.608695652173913 std: 0.07275304578557178,m: 0.5472840802987862 std: 0.09930513533310184,m: 0.5436904761904763 std: 0.0816831511892668
3,KNeighborsClassifier(n_neighbors=4),N = 4,0.660870,0.589899,0.582440,m: 0.6608695652173913 std: 0.08430747578115352,m: 0.5898991965207134 std: 0.158902797566028,m: 0.5824404761904762 std: 0.10085553811844443
4,KNeighborsClassifier(),N = 5,0.660870,0.603157,0.588690,m: 0.6608695652173913 std: 0.05070392952039388,m: 0.603157096171802 std: 0.07833256543097623,m: 0.5886904761904763 std: 0.06293668307988201
...,...,...,...,...,...,...,...,...
83,"SVC(C=301, kernel='poly')",Kernel: poly,0.626087,0.589063,0.584345,m: 0.6260869565217391 std: 0.05897678246195884,m: 0.5890625795772854 std: 0.0715004745964195,m: 0.5843452380952381 std: 0.06509401451457411
84,"SVC(C=1, gamma='auto', kernel='poly')",Kernel: poly,0.660870,0.330435,0.500000,m: 0.6608695652173914 std: 0.017391304347826077,m: 0.3304347826086957 std: 0.008695652173913038,m: 0.5 std: 0.0
85,"SVC(C=101, gamma='auto', kernel='poly')",Kernel: poly,0.626087,0.589976,0.590179,m: 0.6260869565217391 std: 0.08064016083039745,m: 0.589976445123504 std: 0.09652329802283476,m: 0.5901785714285714 std: 0.09204698469961009
86,"SVC(C=201, gamma='auto', kernel='poly')",Kernel: poly,0.626087,0.589063,0.584345,m: 0.6260869565217391 std: 0.05897678246195884,m: 0.5890625795772854 std: 0.0715004745964195,m: 0.5843452380952381 std: 0.06509401451457411


classifier            SVC(C=301, gamma='auto', kernel='sigmoid')
arguments                                        Kernel: sigmoid
mean_accuracy                                           0.773913
mean_precision                                          0.763725
mean_recall                                             0.717976
accuracy           m: 0.773913043478261 std: 0.08430747578115354
precision         m: 0.7637254901960784 std: 0.10712595754750129
recall             m: 0.7179761904761905 std: 0.0988270208260447
Name: 51, dtype: object

# Amazon

In [9]:
amazonDataLearn = pd.read_csv("data/amazon/amazon_review_ID.shuf.lrn.csv")
amazonDataSolution = pd.read_csv("data/amazon/amazon_review_ID.shuf.sol.ex.csv")
amazonDataTest = pd.read_csv("data/amazon/amazon_review_ID.shuf.tes.csv")