-
Notifications
You must be signed in to change notification settings - Fork 1
/
classifier.py
149 lines (128 loc) · 6.74 KB
/
classifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# uncompyle6 version 3.7.4
# Python bytecode 3.8 (3413)
# Decompiled from: Python 3.8.5 (default, Jul 28 2020, 12:59:40)
# [GCC 9.3.0]
# Embedded file name: /media/pranav/Data1/Personal/Projects/Strandburg-Peshkin 2019-20/code/classifier.py
# Compiled at: 2020-03-13 17:22:47
# Size of source mod 2**32: 8496 bytes
from config import *
from errors import *
from variables import *
import os, random
from sklearn import svm
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from joblib import dump, load
import numpy as np
from matplotlib import pyplot as plt
def load_svm():
from sklearn import svm
return svm.SVC(gamma='scale')
def load_knn():
from sklearn import neighbors
return neighbors.KNeighborsClassifier(NEAREST_NEIGHBOUR_CLASSIFIER_K)
def load_RandomForest():
from sklearn import ensemble
return ensemble.RandomForestClassifier()
ClassifiersDictionary = {'SVM':load_svm,
'k-NN':load_knn,
'RF':load_RandomForest}
class ClassifierBundle:
"""Class ClassifierBundle, one of several nodes in a hierarchical tree of nodes, whose class is TODO.
Attributes are:
Functions are:
classify: Takes features and returns classification.
train: Takes features and classification, trains the learning algorithm.
SaveLearning: Saves .joblib files [FUNCTIONALITY REMOVED]
LoadLearning: Loads .joblib files [FUNCTIONALITY REMOVED]
Errors associated are: #TODO
"""
def __init__(self, NodeName, StatesConsidered, GroupedOutputs, ClassifiersToUse='RF'):
"""
Init function for class ClassifierNode.
Args:
Nodename (str): A string identifier for the node.
StatesConsidered (list):A list of all states this classifier will have to consider.
GroupedOutputs (dict): A dictionary with desired outputs mapped to non-intersecting tuples
that fall under those outputs. For example DYNAMIC could map to
(WALK, LOPE).
ClassifiersToUse (str): A string containing comma-separated keys from ClassifierDictionary.
Represents what classifier(s) this node engages for classification.
Returns:
None, for now.
Raises:
None, for now.
"""
self.StatesConsidered = StatesConsidered
self.GroupedOutputs = GroupedOutputs
self.ClassifiersToUse = [ClassifiersDictionary[Classifier.lstrip().rstrip()]() for Classifier in ClassifiersToUse.split(',')]
self.ClassifierNames = ClassifiersToUse.split(',')
def classify(self, FeatureTuple):
"""
classification function for class ClassifierNode.
Args:
FeatureTuple (numpy.array): A numpy.array containing all necessary features.
Returns:
Classification (dict): Each element is a str:numpy.array. The str needs to be one of the elements of
ClassifiersToUse. For example, to get the results of the Random Forest classification, use
>>>> classify(<...>)['RF']
Raises:
None, for now.
"""
Mat = []
for Classifier in self.ClassifiersToUse:
Mat.append(Classifier.predict(FeatureTuple))
return {self.ClassifierNames[i]:Mat[i] for i in range(len(self.ClassifierNames))}
def train(self, FeaturesForTraining, ClassificationsForTraining):
"""
train function to implement learning in ClassifierNode, trains all available classifiers and deletes all but the best one.
Args:
FeaturesForTraining (numpy.array): A numpy array containing all features, such that len(FeatureTuple[i])
is the same for every run on this node.
Classification (numpy.array): A numpy array of strings indicating the true classification. All strings should be members of
self.GroupedOutputs (items!!!!).
Returns:
None, for now.
Raises:
None, for now.
"""
for Classifier in self.ClassifiersToUse:
Classifier.fit(X=FeaturesForTraining, y=ClassificationsForTraining)
def UseBestClassifier(self, FeaturesForTraining, ClassificationsForTraining):
if CLASSIFIER_SELECTION_HEURISTIC == 'ACCURACY':
BestClassifier = self.ClassifiersToUse[0]
TrainedClassifiers = self.ClassifiersToUse
BestAccuracy = accuracy_score(BestClassifier.predict(FeaturesForTraining), ClassificationsForTraining)
for TrainedClassifier in TrainedClassifiers:
if accuracy_score(TrainedClassifier.predict(FeaturesForTraining), ClassificationsForTraining) > BestAccuracy:
BestClassifier = TrainedClassifier
BestAccuracy = accuracy(BestClassifier.predict(FeaturesForTraining), ClassificationsForTraining)
return BestClassifier
def _ShowTrainingForAllClassifiers(self, FeaturesForTraining, ClassificationsForTraining, FeaturesForTesting, ClassificationsForTesting):
self.train(FeaturesForTraining, ClassificationsForTraining)
BarGraph_x_pos = np.arange(len(self.ClassifiersToUse))
BarGraph_y_vals = []
i = 0
for Classifier in self.ClassifiersToUse:
print('\x1b[1;32m', Classifier, '\x1b[0;39m:')
print('\nConfusion Matrix:')
print('For the states: ', STATES)
print(confusion_matrix(ClassificationsForTesting, (self.classify(FeaturesForTesting)[i]), labels=STATES, normalize=True))
print('\nClassification Report:')
print(classification_report(ClassificationsForTesting, self.classify(FeaturesForTesting)[i]))
i += 1
for results in self.classify(FeaturesForTesting):
BarGraph_y_vals.append(accuracy_score(ClassificationsForTesting, results))
plt.bar(BarGraph_x_pos, BarGraph_y_vals, color='black', tick_label=(self.ClassifierNames), width=0.4)
plt.axis([-0.5, 2.5, 0.0, 1])
plt.xlabel('Classifiers used')
plt.ylabel('Fraction of results accurately predicted')
plt.show()
class ClassifierTree:
"""A tree of ClassifierNode architectures that can perform a classification."""
def __init__(self):
pass
def classify():
pass
def train():
pass
# okay decompiling classifier.cpython-38.pyc