-
Notifications
You must be signed in to change notification settings - Fork 4
/
ipredicthr.py
143 lines (130 loc) · 5.18 KB
/
ipredicthr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
from modAL.models import ActiveLearner
import pdb
import matplotlib.pyplot as plt
import numpy as np
import sys
import pandas as pd
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor
from sklearn.model_selection import cross_val_score
from modAL.models import ActiveLearner, CommitteeRegressor
from modAL.disagreement import max_std_sampling
from sklearn.svm import SVR
from xgboost import XGBRegressor
data = pd.read_csv(sys.argv[1])
X = data.loc[:, data.columns[1:-3]].values
y = data['HeartRate'].values
time = data['timestamp'].values
rf= ActiveLearner(estimator=RandomForestRegressor(n_estimators=150, n_jobs=-1), )
svrl = ActiveLearner(estimator=SVR(kernel='rbf'))
ada = ActiveLearner(estimator=AdaBoostRegressor(n_estimators=250))
gbt = ActiveLearner(estimator=GradientBoostingRegressor(n_estimators=500, criterion='mae'))
xgb1 = ActiveLearner(estimator=XGBRegressor())
learner_list = [rf, svrl, ada, gbt, xgb1 ]
committee = CommitteeRegressor(
learner_list=learner_list,
query_strategy=max_std_sampling
)
from sklearn.metrics import mean_absolute_error
print(sys.argv[1])
xt, yt = X[:100], y[:100]
print(xt)
print(yt)
print(xt.shape)
print(yt.shape)
committee.teach(xt, yt)
idx = 0
stds = []
varl = []
modup = 0
diffs = []
sqdiffs =[ ]
refits =0
llidx = 0
recentX=[]
recentY=[]
learners_time = []
# the history to keep..
most_recent_n = int(sys.argv[4])
# set all learner alive time to 0
for i in range(len(learner_list)):
learners_time.append(0)
# not iterate over the remainder of the data
for i in range(len(X[100:])):
_, std, votes, var = committee.predict(X[idx].reshape(1,-1), return_std=True,)
stds.append(std)
varl.append(var)
print("----")
print("pred "+str(_))
print("true "+str(y[idx]))
print("std: "+str(std))
print("var: "+str(var))
# keep the absolute differences between the prediction and true hr
diffs.append(abs(_-y[idx]))
# keep the squared differences between the prediction and true hr
sqdiffs.append(abs(_-y[idx])**2)
diff = (abs(_-y[idx]))
# reshape the current feature vector..
xt = X[idx].reshape(1,-1)
yt = y[idx].reshape(-1, )
print(learners_time)
# all leaeners have made a prediction.. so increase alive time
for lt in range(len(learner_list)):
learners_time[lt] += 1
# we only keep the most revent history of prediction certainities
if len(stds) > most_recent_n:
stds = stds[-most_recent_n:]
if len(varl) > most_recent_n:
varl = varl[-most_recent_n:]
# if there is no variance, or its greater than some X times the standard deviation
if var ==0 or var > (int(sys.argv[3])*np.std(varl)):
#print("current std of stds is (10 his ) "+str(np.std(stds)))
print("current std of var is (10 his ) "+str(np.std(varl)))
# xt = X[idx].reshape(1,-1)
# yt = y[idx].reshape(-1, )
# since we assume we have queried the true HR - lets store it for future learning
recentX.append(X[idx])
recentY.append(y[idx])
# again only keep only most recent history
if len(recentX) > most_recent_n:
recentX = recentX[-most_recent_n:]
recentY = recentY[-most_recent_n:]
# increase the number of modelupdates (or labels queried)
modup += 1
# if the difference between the true HR and predicted is greater than some threshold
if diff > 5:
torefreshid = -1
for vid, v in enumerate(votes[0]):
# update any models greater than the threshold..
if abs(v - y[idx]) > 5:
torefreshid = vid
print("REFITTING learner "+str(torefreshid))
recentXt =np.reshape(recentX, (len(recentX), len(recentX[0])))
recentYt =np.reshape(recentY, (len(recentY),))
# retrain the model on the most recent data..
committee.learner_list[torefreshid].teach(recentXt, recentYt, only_new=True)
# set alive time to 0
learners_time[torefreshid] = 0
else:
# if we have uncertainity.. but the true difference isnt over a threshold then just update the models
print("UPDATING MODEL")
print(votes[0])
committee.teach(xt, yt)
# after making the prediction lets check if any models have been alive longer than allowed..
for torefreshid, learner in enumerate(learners_time):
if learner > int(sys.argv[5]):
print("REFITTING learner TIME EXP "+str(torefreshid))
recentXt =np.reshape(recentX, (len(recentX), len(recentX[0])))
recentYt =np.reshape(recentY, (len(recentY),))
committee.learner_list[torefreshid].teach(recentXt, recentYt, only_new=True)
learners_time[torefreshid] = 0
upds =str((float(modup) / (i+1))*100)+"% "+(str(modup)+"/"+str((i+1)))
print("upds "+upds)
mae = str(np.mean(diffs))
print("mae "+mae)
mse = str(np.mean(sqdiffs))
print("mse "+mse)
print("----")
idx+=1
with open(sys.argv[2], 'a') as f:
f.write(mae+","+str(modup)+","+str(i+1)+","+str(_)+","+str(y[idx])+","+str(mse)+","+str(time[idx])+"\n")
continue