In [13]:
import os
import sys
import csv
import operator
import time
import datetime
import platform

In [14]:
def writeText(text, path, mode = 'w'):
    with open (path, mode, encoding = 'utf-8') as textout:
        textout.write((text))
        
def writeJson(json, path, mode = 'w'):
    with open(path, mode) as file:
        file.write(json.dumps(json))
        
def writeCsv(listOut, outputFile):
    with open (outputFile, "w", newline='', encoding = 'utf-8') as outputfile:
        writer = csv.writer(outputfile, delimiter = ",")
        for element in listOut:
            writer.writerow(element)
            
def getTxt(path):
    return open(path, 'r').read()

def getCsv(path, delim = ','):
    list_return = []
    with open (path, encoding = 'utf-8') as file:
        csvreader = csv.reader(file, delimiter = delim)        
        for i, line in enumerate(csvreader):
            list_return.append(line)
    return list_return

In [15]:
def getFloatCsv(path, delim = ','):
    list_return = []
    with open (path, encoding = 'utf-8') as file:
        csvreader = csv.reader(file, delimiter = delim)        
        for i, line in enumerate(csvreader):
            list_return.append([float(x) for x in line])
    return list_return

### data import

In [16]:
if platform.system() == 'Windows':
    feat = getFloatCsv('..\\output\\feat.csv')
else:
    feat = getFloatCsv('../output/feat.csv')

In [17]:
if platform.system() == 'Windows':
    label = getCsv('..\\output\\labels.csv')
else:
    label = getCsv('../output/labels.csv')

### numpy prep

In [18]:
import numpy as np

In [19]:
X = np.array(feat)

### sklearn prep

In [20]:
from sklearn.preprocessing import MultiLabelBinarizer

In [21]:
from sklearn import metrics

In [22]:
multilabel_binarizer = MultiLabelBinarizer()
multilabel_binarizer.fit(label)
y = multilabel_binarizer.transform(label)

In [23]:
X, y

(array([[-0.40620422,  0.37869263,  0.58084106, ...,  0.03137207,
          0.64916992, -1.32333374],
        [-0.50666809,  0.45892334,  0.73953247, ..., -0.08215332,
          1.0402832 , -1.47055054],
        [-0.44416809,  0.43939209,  0.61599731, ..., -0.01086426,
          0.69067383, -1.39877319],
        ...,
        [-1.87205505,  9.57649994,  5.45687866, ..., -7.40915108,
          4.59017944, -0.98852539],
        [-1.97251892,  9.65673065,  5.61557007, ..., -7.52267647,
          4.98129272, -1.13574219],
        [-1.91001892,  9.6371994 ,  5.49203491, ..., -7.45138741,
          4.63168335, -1.06396484]]), array([[0, 1, 0, ..., 0, 1, 0],
        [0, 1, 0, ..., 0, 1, 0],
        [0, 1, 0, ..., 0, 1, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]]))

In [24]:
from sklearn.model_selection import train_test_split
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size = 0.25, random_state = 42)

In [25]:
print('Dims training set: ', train_X.shape, train_y.shape)
print('Dims training set: ', test_X.shape, test_y.shape)

Dims training set:  (375, 300) (375, 158)
Dims training set:  (125, 300) (125, 158)


### RandomForest

In [69]:
from sklearn.ensemble import RandomForestClassifier

In [70]:
rf = RandomForestClassifier(n_estimators=100, max_depth=15,random_state=0)
rf.fit(train_X, train_y)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=15, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=None,
            oob_score=False, random_state=0, verbose=0, warm_start=False)

In [71]:
pred_rf = rf.predict(test_X)

In [72]:
print('Precision: ', metrics.precision_score(test_y, pred_rf, average="samples"))

Precision:  0.9414464137170019


  'precision', 'predicted', average, warn_for)


In [73]:
print('Accuracy: ', metrics.accuracy_score(test_y, pred_rf))

Accuracy:  0.368


# kNN

In [43]:
from sklearn.neighbors import KNeighborsClassifier

In [44]:
knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(train_X, train_y)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=1, p=2,
           weights='uniform')

In [45]:
pred_knn = knn.predict(test_X)
print('Precision: ', metrics.precision_score(test_y, pred_knn, average="samples"))

Precision:  0.8962183816183816


In [46]:
print('Accuracy: ', metrics.accuracy_score(test_y, pred_knn))

Accuracy:  0.504


<br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br>

# OnevsRest

In [47]:
from sklearn import metrics
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import SVC
from numpy import array

clf = OneVsRestClassifier(SVC(probability=True, gamma='auto'))
clf.fit(train_X, train_y)
predictions = clf.predict(test_X)

my_metrics = metrics.classification_report(test_y, predictions)

print(my_metrics)

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       1.00      0.83      0.91         6
           2       1.00      0.75      0.86         4
           3       1.00      1.00      1.00         1
           4       1.00      1.00      1.00         4
           5       1.00      0.47      0.64        15
           6       0.00      0.00      0.00         0
           7       0.00      0.00      0.00         7
           8       1.00      1.00      1.00        63
           9       0.00      0.00      0.00         1
          10       0.86      0.75      0.80         8
          11       0.00      0.00      0.00        11
          12       0.00      0.00      0.00         0
          13       0.00      0.00      0.00         0
          14       1.00      0.50      0.67         6
          15       0.93      0.87      0.90        15
          16       0.00      0.00      0.00         3
          17       0.00    

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


In [48]:
print(clf.score(test_X, test_y, sample_weight=None))

0.328


# Nerual Network

In [49]:
from sklearn.neural_network import MLPClassifier

In [50]:
mclf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(15,), random_state=1)

In [51]:
mclf.fit(train_X, train_y)
predictionsm = mclf.predict(test_X)

In [53]:
print('Precision: ', metrics.precision_score(test_y, predictionsm,average='samples'))

Precision:  0.7804645583174995


In [54]:
print('Accuracy: ', metrics.accuracy_score(test_y, predictionsm))

Accuracy:  0.168
