In [44]:
import pandas as pd
import numpy as np
from influxdb import InfluxDBClient
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import seaborn as sns
from dateutil.parser import parse
from dateutil import tz

sns.set(style="whitegrid")
sns.set(font_scale=0.7)

In [45]:
class InfluxDB:
    def __init__(self, url: str, port: int, username: str, password: str,
                 basestations: list, beacon: str, classification: str,
                 startdatetime: str = None, enddatetime: str = None):
        self.client = InfluxDBClient(url, port, username, password)
        self.basestations = basestations
        self.beacon = beacon
        self.classification = classification
        self.timefilter = '1=1'
        self.timefilter += f"\n\t\tAND time >= '{InfluxDB.convert_to_rfc3339(startdatetime)}'" if startdatetime else ''
        self.timefilter += f"\n\t\tAND time <= '{InfluxDB.convert_to_rfc3339(enddatetime)}'" if enddatetime else ''

    @property
    def df_basestation_beacon_values(self):
        where = ''.join([f"\n\t\t    OR entity_id = 'mqtt_{self.beacon}_{basestation}_raw'" for basestation in self.basestations])
        query = f'''
            SELECT friendly_name_str as basestation, value as distance
            FROM homeassistant.autogen.m
            WHERE {self.timefilter}
                AND (
                    1<>1 {where}
                )
        '''
        df = pd.DataFrame(self.client.query(query).get_points())
        df.time = pd.to_datetime(df.time)
        return df

    @property
    def df_classifications(self):
        query = f'''
            SELECT state as y
            FROM homeassistant.autogen.state
            WHERE {self.timefilter}
                AND entity_id = '{self.classification}'
        '''
        df = pd.DataFrame(self.client.query(query).get_points())
        df.time = pd.to_datetime(df.time)
        return df

    def get_Xy(self):
        self.df = (
            pd.concat([self.df_basestation_beacon_values, self.df_classifications], axis=0)
            .pivot(index=['time', 'y'], columns='basestation', values='distance')
            .fillna(method='ffill')
            .reset_index('y')
            .fillna(method='ffill')
            .drop(np.nan, axis=1)
            .dropna()
            .query('y != "None"')
        )
        self.X = self.df.drop('y', axis=1)
        self.y = self.df.y
        return self.X, self.y

    def show_basestation_strength(self):
        fig, ax = plt.subplots(3, figsize=(10, 3), sharex=True, sharey=True)
        data = self.df_basestation_beacon_values
        for i, basestation in enumerate(data.basestation.unique()):
            sns.lineplot(
                data=data.loc[data.basestation == basestation],
                x='time', y='distance',
                linewidth=0.5, ax=ax[i]
            ).set_title(basestation)
        plt.show()

    @staticmethod
    def convert_to_rfc3339(date_string):
        dt = parse(date_string)
        dt = dt.astimezone(tz.UTC)
        return dt.strftime('%Y-%m-%dT%H:%M:%SZ')

influx = InfluxDB(
    url='192.168.50.134',
    port=8086,
    username='homeassistant',
    password='homeassistant',
    basestations=['office', 'livingroom', 'bedroomjk'],
    beacon='iphonejd',
    classification='jd_iphone_room_training_data',
    startdatetime='2023-07-17T21:51:00',
)

X, y = influx.get_Xy()

In [53]:
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_val_score, StratifiedKFold

# Define pipeline
pipeline = make_pipeline(
    PolynomialFeatures(degree=2),
    StandardScaler(),
    LogisticRegression(max_iter=100000, class_weight='balanced')
)

# Use cross_val_score for StratifiedKFold cross-validation
scores = cross_val_score(pipeline, X, y, cv=StratifiedKFold(n_splits=5))

# Print metrics
print("Cross-validation accuracy scores:", scores)
print("Mean cross-validation accuracy:", scores.mean())

Cross-validation accuracy scores: [0.56547619 0.62874251 0.5988024  0.62275449 0.5748503 ]
Mean cross-validation accuracy: 0.5981251782149986


In [54]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

# Fit and score
pipeline.fit(X_train, y_train)
print(pipeline.score(X_test, y_test))

# Predict
y_pred = pipeline.predict(X_test)

# Print metrics
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

0.6363636363636364
[[11  0  3  0  0  0  0  0  0  0  0]
 [ 0 21  0  0  0  0  5  0  0  0  0]
 [ 3  0  9  0  0  0  1  0  0  0  0]
 [ 0  0  0  5  2  2  0  6  0  0  5]
 [ 0  0  0  1 11  4  1  0  0  5  5]
 [ 0  0  0  0  0  8  1  0  5  0  2]
 [ 4  4  0  0  0  0  9  0  1  0  0]
 [ 0  0  0  1  0  0  0  6  0  0  3]
 [ 0  0  0  0  0  6  1  0 33  0  0]
 [ 0  0  0  0  3  0  0  0  0 10  0]
 [ 0  0  0  0  0  2  0  0  0  0 10]]
              precision    recall  f1-score   support

   BedroomCC       0.61      0.79      0.69        14
   BedroomJK       0.84      0.81      0.82        26
    BedroomS       0.75      0.69      0.72        13
  DiningRoom       0.71      0.25      0.37        20
    Hallway0       0.69      0.41      0.51        27
    Hallway1       0.36      0.50      0.42        16
    Hallway2       0.50      0.50      0.50        18
     Kitchen       0.50      0.60      0.55        10
  LivingRoom       0.85      0.82      0.84        40
      Office       0.67      0.77      0.71

In [55]:
pipeline.fit(X, y)
print(pipeline.score(X_test, y_test))

0.6650717703349283


In [66]:
import paho.mqtt.client as mqtt
import json

verbose = False

X_live = {
    'espresense/devices/jd_iphone/bedroomjk': 0.,
    'espresense/devices/jd_iphone/livingroom': 0.,
    'espresense/devices/jd_iphone/office': 0.,
}

def on_connect(client, userdata, flags, rc):
    print("Connected with result code "+str(rc))
    client.subscribe("espresense/devices/jd_iphone/bedroomjk")
    client.subscribe("espresense/devices/jd_iphone/office")
    client.subscribe("espresense/devices/jd_iphone/livingroom")

def on_message(client, userdata, msg):
    data = json.loads(msg.payload)
    if 'raw' in data:
        X_live[msg.topic] = data['raw']
        #x = np.array(list(X_live.values())).reshape(1, -1)
        x = pd.DataFrame(X_live.values(), index=['MQTT iPhoneJD BedroomJK Raw', 'MQTT iPhoneJD LivingRoom Raw', 'MQTT iPhoneJD Office Raw']).T
        print(pipeline.predict(x))

client = mqtt.Client()
client.username_pw_set("mqtt-user", "vlmSDF543")
client.on_connect = on_connect
client.on_message = on_message
client.connect("192.168.50.134", 1883, 60)
client.loop_forever()

Connected with result code 0
['LivingRoom']
['Office']
['Office']
['Office']
['Office']
['Office']
['Office']
['Office']
['Office']
['Office']
['Office']
['Office']
['Office']
['Hallway0']
['Office']
['Hallway0']
['Hallway0']
['Hallway0']
['Hallway0']
['Hallway0']
['Playroom']
['Playroom']
['Playroom']
['Playroom']
['Kitchen']
['Playroom']
['Playroom']
['Playroom']
['Playroom']
['Playroom']
['Playroom']
['Hallway0']
['Office']
['Office']


KeyboardInterrupt: 