In [5]:
import os
import sys
import csv
import operator
import time
import datetime
import platform

In [6]:
def writeText(text, path, mode = 'w'):
    with open (path, mode, encoding = 'utf-8') as textout:
        textout.write((text))
        
def writeJson(json, path, mode = 'w'):
    with open(path, mode) as file:
        file.write(json.dumps(json))
        
def writeCsv(listOut, outputFile):
    with open (outputFile, "w", newline='', encoding = 'utf-8') as outputfile:
        writer = csv.writer(outputfile, delimiter = ",")
        for element in listOut:
            writer.writerow(element)
            
def getTxt(path):
    return open(path, 'r').read()

def getCsv(path, delim = ','):
    list_return = []
    with open (path, encoding = 'utf-8') as file:
        csvreader = csv.reader(file, delimiter = delim)        
        for i, line in enumerate(csvreader):
            list_return.append(line)
    return list_return

In [7]:
def getFloatCsv(path, delim = ','):
    list_return = []
    with open (path, encoding = 'utf-8') as file:
        csvreader = csv.reader(file, delimiter = delim)        
        for i, line in enumerate(csvreader):
            list_return.append([float(x) for x in line])
    return list_return

### data import

In [8]:
if platform.system() == 'Windows':
    feat = getFloatCsv('..\\output\\feat.csv')
else:
    feat = getFloatCsv('../output/feat.csv')

In [9]:
if platform.system() == 'Windows':
    label = getCsv('..\\output\\labels.csv')
else:
    label = getCsv('../output/labels.csv')

### numpy prep

In [10]:
import numpy as np

In [11]:
X = np.array(feat)

### sklearn prep

In [12]:
from sklearn.preprocessing import MultiLabelBinarizer

In [13]:
from sklearn import metrics

In [14]:
multilabel_binarizer = MultiLabelBinarizer()
multilabel_binarizer.fit(label)
y = multilabel_binarizer.transform(label)

In [57]:
X, y

(array([[-0.40620422,  0.37869263,  0.58084106, ...,  0.03137207,
          0.64916992, -1.32333374],
        [-0.50666809,  0.45892334,  0.73953247, ..., -0.08215332,
          1.0402832 , -1.47055054],
        [-0.44416809,  0.43939209,  0.61599731, ..., -0.01086426,
          0.69067383, -1.39877319],
        ...,
        [-1.87205505,  9.57649994,  5.45687866, ..., -7.40915108,
          4.59017944, -0.98852539],
        [-1.97251892,  9.65673065,  5.61557007, ..., -7.52267647,
          4.98129272, -1.13574219],
        [-1.91001892,  9.6371994 ,  5.49203491, ..., -7.45138741,
          4.63168335, -1.06396484]]), array([[0, 1, 0, ..., 0, 1, 0],
        [0, 1, 0, ..., 0, 1, 0],
        [0, 1, 0, ..., 0, 1, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]]))

In [58]:
from sklearn.model_selection import train_test_split
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size = 0.15, random_state = 42)

In [59]:
print('Dims training set: ', train_X.shape, train_y.shape)
print('Dims training set: ', test_X.shape, test_y.shape)

Dims training set:  (425, 300) (425, 158)
Dims training set:  (75, 300) (75, 158)


### RandomForest

In [60]:
from sklearn.ensemble import RandomForestClassifier

In [61]:
rf = RandomForestClassifier(n_estimators=100, max_depth=15,random_state=0)
rf.fit(train_X, train_y)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=15, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=None,
            oob_score=False, random_state=0, verbose=0, warm_start=False)

In [62]:
pred_rf = rf.predict(test_X)

In [63]:
print('Precision: ', metrics.precision_score(test_y, pred_rf, average="samples"))

Precision:  0.9479632774632776


In [64]:
print('Accuracy: ', metrics.accuracy_score(test_y, pred_rf))

Accuracy:  0.44


# kNN

In [65]:
from sklearn.neighbors import KNeighborsClassifier

In [66]:
knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(train_X, train_y)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=1, p=2,
           weights='uniform')

In [67]:
pred_knn = knn.predict(test_X)
print('Precision: ', metrics.precision_score(test_y, pred_knn, average="samples"))

Precision:  0.925334997361313


In [68]:
print('Accuracy: ', metrics.accuracy_score(test_y, pred_knn))

Accuracy:  0.5866666666666667


# OnevsRest

In [69]:
from sklearn import metrics
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import SVC
from numpy import array

clf = OneVsRestClassifier(SVC(probability=True, gamma='auto'))
clf.fit(train_X, train_y)
predictions = clf.predict(test_X)

my_metrics = metrics.classification_report(test_y, predictions)

print(my_metrics)

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       1.00      1.00      1.00         4
           2       1.00      0.67      0.80         3
           3       1.00      1.00      1.00         1
           4       1.00      1.00      1.00         1
           5       0.60      0.50      0.55         6
           6       0.00      0.00      0.00         0
           7       0.00      0.00      0.00         2
           8       1.00      1.00      1.00        32
           9       0.00      0.00      0.00         0
          10       1.00      1.00      1.00         4
          11       0.00      0.00      0.00         2
          12       0.00      0.00      0.00         0
          13       0.00      0.00      0.00         0
          14       1.00      0.60      0.75         5
          15       1.00      0.80      0.89        10
          16       0.00      0.00      0.00         1
          17       0.00    

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


In [70]:
print(clf.score(test_X, test_y, sample_weight=None))

0.37333333333333335


# Nerual Network

In [71]:
from sklearn.neural_network import MLPClassifier

In [72]:
mclf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(15,), random_state=1)

In [73]:
mclf.fit(train_X, train_y)
predictionsm = mclf.predict(test_X)

In [74]:
print('Precision: ', metrics.precision_score(test_y, predictionsm,average='samples'))

Precision:  0.7890497835497836


  'precision', 'predicted', average, warn_for)


In [75]:
print('Accuracy: ', metrics.accuracy_score(test_y, predictionsm))

Accuracy:  0.13333333333333333


# RidgeClassifier

In [83]:
from sklearn.linear_model import RidgeClassifierCV

In [84]:
rc = RidgeClassifierCV()

In [85]:
rc.fit(train_X, train_y)

RidgeClassifierCV(alphas=array([ 0.1,  1. , 10. ]), class_weight=None,
         cv=None, fit_intercept=True, normalize=False, scoring=None,
         store_cv_values=False)