-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
119 lines (87 loc) · 3.07 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# Pranav Minasandra
# pminasandra.github.io
# 0 Dec, 2023
"""
Runs all analyses needed sequentially.
"""
import glob
import os
import os.path
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sklearn.metrics
import sklearn.model_selection
from sklearn.preprocessing import StandardScaler
import analyses
import accreading
import config
import classifier
import extractor
import percentiler
import postprocessing
import preprocessing
import utilities
ALL_FEATURES = [f for f in extractor.ALL_FEATURES.keys()]
ALL_FEATURES.append('perc_vedba')
if not config.SUPPRESS_INFORMATIVE_PRINT:
old_print = print
print = utilities.sprint
if __name__ == "__main__":
# 0. PRE-PROCESSING
# preprocessing.exterminate_extraneous_column() # already done, don't re-do
# 1. FEATURE EXTRACTION
extractor.make_features_dir()
accfilegen = accreading.load_acc_files()
extractor.extract_all_features(accfilegen)
# 2. ADD perc_vedba COLUMN
percentiler.add_cols_to_all_files()
# 3. TRAINING DATA PREPARATION
data = classifier.load_all_training_data()
data = data[["Timestamp", "Deployment", "Individual"]
+ ALL_FEATURES + ["Behavior"]]
data.to_csv(os.path.join(config.DATA,
"ClassifierRelated",
"all_trainable_data_for_classifier.csv"),
index=False)
# 4. CLASSIFIER ANALYSES
datasource = os.path.join(config.DATA, "ClassifierRelated",
"all_trainable_data_for_classifier.csv")
data = pd.read_csv(datasource)
if config.LOG_TRANSFORM_VEDBA:
data['mean_vedba'] += 1e-10
data['mean_vedba'] = np.log(data['mean_vedba'])
train_data, test_data = sklearn.model_selection.train_test_split(
data,
test_size=0.25
)
train_data = analyses.duplicate_running(train_data, 2)
test_data = analyses.duplicate_running(test_data, 2)
train_features, train_classes = analyses._split_features_and_classes(
train_data
)
test_features, test_classes = analyses._split_features_and_classes(
test_data
)
if config.SCALE_DATA:
scaler = StandardScaler()
scaler.fit(train_features)
train_features = scaler.transform(train_features)
test_features = scaler.transform(test_features)
rfc = classifier.train_random_forest(train_features, train_classes)
fig, ax = plt.subplots()
analyses.trad_analyze_random_forest(rfc, test_features, test_classes,
fig, ax)
plt.cla()
fig, ax = plt.subplots()
analyses.indwise_analyze_random_forest(data, fig, ax)
# 5. CLASSIFY ALL AVAILABLE TOTAL
data = analyses.duplicate_running(data, 2)
data_features, data_classes = analyses._split_features_and_classes(data)
rfc_total = classifier.train_random_forest(data_features, data_classes)
analyses.classify_all_available_data(rfc_total)
# 6. POST PROCESSING
postprocessing.pool_individualwise_predictions()
postprocessing.pool_deploymentwise_predictions()
else:
raise ImportError("the module main.py is not meant for imports")