In [1]:
import os
import sys
import csv
import operator
import time
import datetime
import platform

In [2]:
def writeText(text, path, mode = 'w'):
    with open (path, mode, encoding = 'utf-8') as textout:
        textout.write((text))
        
def writeJson(json, path, mode = 'w'):
    with open(path, mode) as file:
        file.write(json.dumps(json))
        
def writeCsv(listOut, outputFile):
    with open (outputFile, "w", newline='', encoding = 'utf-8') as outputfile:
        writer = csv.writer(outputfile, delimiter = ",")
        for element in listOut:
            writer.writerow(element)
            
def getTxt(path):
    return open(path, 'r').read()

def getCsv(path, delim = ','):
    list_return = []
    with open (path, encoding = 'utf-8') as file:
        csvreader = csv.reader(file, delimiter = delim)        
        for i, line in enumerate(csvreader):
            list_return.append(line)
    return list_return

In [3]:
def getFloatCsv(path, delim = ','):
    list_return = []
    with open (path, encoding = 'utf-8') as file:
        csvreader = csv.reader(file, delimiter = delim)        
        for i, line in enumerate(csvreader):
            list_return.append([float(x) for x in line])
    return list_return

### data import

In [4]:
if platform.system() == 'Windows':
    feat = getFloatCsv('..\\output\\feat.csv')
else:
    feat = getFloatCsv('../output/feat.csv')

In [5]:
if platform.system() == 'Windows':
    label = getCsv('..\\output\\labels.csv')
else:
    label = getCsv('../output/labels.csv')

### numpy prep

In [6]:
import numpy as np

In [7]:
X = np.array(feat)

### sklearn prep

In [8]:
from sklearn.preprocessing import MultiLabelBinarizer

In [9]:
multilabel_binarizer = MultiLabelBinarizer()
multilabel_binarizer.fit(label)
y = multilabel_binarizer.transform(label)

In [10]:
X, y

(array([[-0.40620422,  0.37869263,  0.58084106, ...,  0.03137207,
          0.64916992, -1.32333374],
        [-0.50666809,  0.45892334,  0.73953247, ..., -0.08215332,
          1.0402832 , -1.47055054],
        [-0.44416809,  0.43939209,  0.61599731, ..., -0.01086426,
          0.69067383, -1.39877319],
        ...,
        [-1.87205505,  9.57649994,  5.45687866, ..., -7.40915108,
          4.59017944, -0.98852539],
        [-1.97251892,  9.65673065,  5.61557007, ..., -7.52267647,
          4.98129272, -1.13574219],
        [-1.91001892,  9.6371994 ,  5.49203491, ..., -7.45138741,
          4.63168335, -1.06396484]]), array([[0, 0, 0, ..., 0, 1, 0],
        [0, 0, 0, ..., 0, 1, 0],
        [0, 0, 0, ..., 0, 1, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]]))

In [11]:
from sklearn.model_selection import train_test_split
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size = 0.25, random_state = 42)

In [12]:
print('Dims training set: ', train_X.shape, train_y.shape)
print('Dims training set: ', test_X.shape, test_y.shape)

Dims training set:  (375, 300) (375, 116)
Dims training set:  (125, 300) (125, 116)


### RandomForest

In [13]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification

In [14]:
clf = RandomForestClassifier(n_estimators=100, max_depth=2,random_state=0)
clf.fit(train_X, train_y)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=2, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=None,
            oob_score=False, random_state=0, verbose=0, warm_start=False)

In [15]:
predicted = clf.predict(test_X)

In [16]:
predicted

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

### Evaluation

In [17]:
from sklearn import metrics

In [18]:
print('Accuracy: ', metrics.accuracy_score(test_y, predicted))

Accuracy:  0.032


<br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br>