In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy.spatial.distance import euclidean, pdist, cdist, squareform
from sklearn import preprocessing
from datetime import datetime
from dataclasses import make_dataclass
from server import models
from server import config

await models.init_db({ 'config': config })

In [None]:
device = await models.Device.get(name='Mi Smart Band 4')
raw_signals = await models.DeviceHeartbeat.filter(device=device).prefetch_related('room')

In [None]:
Signal = make_dataclass('Signal',  [
    ("id", int),
    ("room", 'string'),
    ("office", float), 
    ("bathroom", float), 
    ("extra_1", float), 
    ("lobby", float), 
    ("laundry", float), 
    ("kitchen", float), 
    ("guest", float), 
    ("bedroom", float),
])

signals_df = pd.DataFrame([Signal(
    id=s.id,
    room=s.room.name,
    office=s.signals.get('office', {}).get('filtered_rssi', -100),
    bathroom=s.signals.get('bathroom', {}).get('filtered_rssi', -100),
    extra_1=s.signals.get('extra-1', {}).get('filtered_rssi', -100),
    lobby=s.signals.get('lobby', {}).get('filtered_rssi', -100),
    laundry=s.signals.get('laundry', {}).get('filtered_rssi', -100), 
    kitchen=s.signals.get('kitchen', {}).get('filtered_rssi', -100),
    guest=s.signals.get('guest', {}).get('filtered_rssi', -100),
    bedroom=s.signals.get('bedroom', {}).get('filtered_rssi', -100),
) for idx, s in enumerate(raw_signals)])

column_names_to_normalize = ['office', 'bathroom', 'extra_1', 'lobby', 'laundry', 'kitchen', 'guest', 'bedroom']
tmp_x = signals_df[column_names_to_normalize].values
tmp_x_scaled = preprocessing.normalize(tmp_x, norm='l2')
df_temp = pd.DataFrame(tmp_x_scaled, columns=column_names_to_normalize, index = signals_df.index)
signals_df[column_names_to_normalize] = df_temp

In [None]:
tmp_x_filterd = [tmp_x_scaled[0]]
tmp_x_indexes = [0]
tmp_x_removed = []

for i, s in enumerate(tmp_x_scaled):
    distances = cdist(tmp_x_filterd, [s], 'euclidean')
    close_signals = [i for i, d in enumerate(distances) if d < 0.010]
    identical_signals = [i for i, d in enumerate(distances) if d < 0.005]

    if len(close_signals) < 1 and len(identical_signals) == 0:
        tmp_x_filterd.append(s)
        tmp_x_indexes.append(i)
    else:
        tmp_x_removed.append(i)

print('Original length: {}'.format(len(tmp_x_scaled)))
print('New length: {}'.format(len(tmp_x_filterd)))

removed_signals_df = signals_df.iloc[tmp_x_removed, :]
signals_df = signals_df.iloc[tmp_x_indexes, :]

In [None]:
ids = removed_signals_df['id'].values
await models.DeviceHeartbeat.filter(id__in=ids).delete()

In [None]:
fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(111)
# ax1 = fig.add_subplot(132)
# ax2 = fig.add_subplot(133)
ax.set_xlim([-0.2, -0.5])
# ax1.set_xlim([-100, 0])
# ax2.set_xlim([-100, 0])
ax.set_ylim([-0.2, -0.5])
# ax1.set_ylim([-100, 0])
# ax2.set_ylim([-100, 0])

signals_df[(signals_df['room']=='Office')].plot.scatter(x='extra_1', y='lobby', color='red', ax=ax)
signals_df[(signals_df['room']=='Office')].plot.scatter(x='extra_1', y='bathroom', color='red', ax=ax)
signals_df[(signals_df['room']=='Office')].plot.scatter(x='extra_1', y='laundry', color='red', ax=ax)

signals_df[(signals_df['room']=='Bathroom')].plot.scatter(x='extra_1', y='lobby', color='green', ax=ax)
signals_df[(signals_df['room']=='Bathroom')].plot.scatter(x='extra_1', y='bathroom', color='green', ax=ax)
signals_df[(signals_df['room']=='Bathroom')].plot.scatter(x='extra_1', y='laundry', color='green', ax=ax)

signals_df[(signals_df['room']=='Lobby')].plot.scatter(x='extra_1', y='lobby', color='blue', ax=ax)
signals_df[(signals_df['room']=='Lobby')].plot.scatter(x='extra_1', y='bathroom', color='blue', ax=ax)
signals_df[(signals_df['room']=='Lobby')].plot.scatter(x='extra_1', y='laundry', color='blue', ax=ax)

In [None]:
fig = plt.figure(figsize=(20, 10))
ax = fig.add_subplot(111)

signals_df[(signals_df['room']=='Office')].plot(y='extra_1', color='red', ax=ax)
signals_df[(signals_df['room']=='Office')].plot(y='bathroom', color='blue', ax=ax)
signals_df[(signals_df['room']=='Office')].plot(y='lobby', color='green', ax=ax)


In [None]:
numbers_df = signals_df[column_names_to_normalize]
dists = pdist(numbers_df, 'euclidean')
euclid_df = pd.DataFrame(squareform(dists), columns=numbers_df.index, index=numbers_df.index)

In [None]:
fig, ax = plt.subplots(figsize=(20,20))
cax = ax.matshow(euclid_df, interpolation='nearest')
fig.colorbar(cax, ticks=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, .75,.8,.85,.90,.95,1])
plt.show()

In [None]:
similarity_cut = dists[dists < 1]
plt.hist(similarity_cut)
print((len(similarity_cut) / len(dists)) * 100)