In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import asyncio
import xgboost
from sklearn import model_selection
from sklearn.ensemble import RandomForestClassifier
from sklearn import preprocessing, metrics, linear_model, multiclass
from server import models, config
from server.kalman import KalmanRSSI
from server.heartbeat import HeratbeatGenerator
from server.learn import regenerate_heartbeats
import imblearn
import seaborn

In [2]:
await models.init_db({ 'config': config })

In [3]:
device = await models.Device.get(name='Mi Smart Band 6')

In [4]:
await models.DeviceHeartbeat.filter().delete()

0

In [5]:
await regenerate_heartbeats(device)

In [25]:
def weighted_f_score(y_true, y_pred, *, average='micro', verbose=False):
    scores = np.array(metrics.fbeta_score(y_true, y_pred, beta=10, average=None))
    if verbose:
        print(scores)
    if average is not None:
        return scores.mean() - 4 * scores.std()
    return scores

super_scoring = metrics.make_scorer(weighted_f_score)

In [27]:
scanners = [s.uuid for s in await models.Scanner.filter()]
default_heartbeat = dict(zip(scanners, [-100] * len(scanners)))
heartbeats = await models.DeviceHeartbeat.filter().prefetch_related('room')
heartbeats = [{**default_heartbeat, **h.signals, 'room': h.room.id} for h in heartbeats]
heartbeats_df = pd.DataFrame(heartbeats)
heartbeats_df.drop_duplicates(inplace=True)

In [28]:
heartbeats_target = heartbeats_df.room.values
heartbeats_data = heartbeats_df.iloc[:, :-1]
X_train, X_test, y_train, y_test = model_selection.train_test_split(heartbeats_data, heartbeats_target, stratify=heartbeats_target, random_state=42)

In [29]:
print(np.unique(y_train, return_counts=True))
print(np.unique(y_test, return_counts=True))

(array([1, 2, 3, 4, 5, 6, 7]), array([ 644,  362,  263, 1049,  227,  179,  110]))
(array([1, 2, 3, 4, 5, 6, 7]), array([215, 120,  88, 350,  76,  60,  36]))


In [30]:
estimator = multiclass.OneVsRestClassifier(RandomForestClassifier(random_state=42, n_jobs=-1))
model_selection.cross_validate(estimator, X_train, y_train, cv=5, scoring=super_scoring)

{'fit_time': array([1.1602428 , 0.66745424, 0.65914989, 0.64133883, 0.63036561]),
 'score_time': array([0.46792054, 0.48144054, 0.46253681, 0.47163916, 0.47018361]),
 'test_score': array([1.        , 0.92406911, 1.        , 0.93121184, 1.        ])}

In [31]:
estimator = multiclass.OneVsRestClassifier(RandomForestClassifier(random_state=42, n_jobs=-1))
estimator.fit(X_train, y_train)

OneVsRestClassifier(estimator=RandomForestClassifier(n_jobs=-1,
                                                     random_state=42))

In [32]:
print('AUC ROC: ', metrics.roc_auc_score(y_test, estimator.predict_proba(X_test), multi_class='ovr'))
print('Precision: ', metrics.precision_score(y_test, estimator.predict(X_test), average=None))
print('Recall: ', metrics.recall_score(y_test, estimator.predict(X_test), average=None))

AUC ROC:  1.0
Precision:  [1.         1.         1.         0.997151   1.         1.
 0.97297297]
Recall:  [1.         0.98333333 1.         1.         1.         1.
 1.        ]
