In [18]:
import pandas as pd, numpy as np, os
pd.options.mode.chained_assignment = None  # default='warn'
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime
from scipy.signal import find_peaks
from sklearn.manifold import TSNE
print('...')

...


In [26]:
path = '/home/hubble/work/serenade/data/subject_3/environmentals/'
file = 'Hum_Temp_Stove_humidity.csv'
stove_humidity = os.path.join(path, file)
df_stove_humidity = pd.read_csv(stove_humidity)
df_stove_humidity = df_stove_humidity.rename(columns={'ts': 'ts_ms', 'sensor_status':'sensor_readings'})
df_stove_humidity['ts_datetime'] = pd.to_datetime(df_stove_humidity['ts_ms'], unit = 'ms')
df_stove_humidity = df_stove_humidity[~df_stove_humidity['sensor_readings'].isin(['unavailable', 'unknown'])]
print(df_stove_humidity.head())
df = df_stove_humidity.copy()

        sensor_id  sensor_readings          ts_ms  subject_id  \
0  Stove_Hum_Temp            54.77  1711064577000           3   
1  Stove_Hum_Temp            52.65  1711067842000           3   
2  Stove_Hum_Temp            52.44  1711071188000           3   
3  Stove_Hum_Temp            52.37  1711071711000           3   
4  Stove_Hum_Temp            52.58  1711074554000           3   

          ts_datetime  
0 2024-03-21 23:42:57  
1 2024-03-22 00:37:22  
2 2024-03-22 01:33:08  
3 2024-03-22 01:41:51  
4 2024-03-22 02:29:14  


# Can we detect anomaly using Interquartile Range?

In [28]:
q1_pc1, q3_pc1 = df['sensor_readings'].quantile([0.25, 0.75])
iqr_pc1 = q3_pc1 - q1_pc1
lower_pc1 = q1_pc1 - (1.5*iqr_pc1)
upper_pc1 = q3_pc1 + (1.5*iqr_pc1)
df['anomaly_pc1'] = ((df['sensor_readings']>upper_pc1) | (df['sensor_readings']<lower_pc1)).astype('float64')

In [29]:
df['ts_datetime'] = pd.to_datetime(df['ts_datetime'])
a = df[df['anomaly_pc1'] == 1] 
# normal traces
normal_trace = go.Scatter(
    x=df['ts_datetime'], 
    y=df['sensor_readings'],
    mode='lines',
    name='Normal',
    line=dict(color='blue')
)
# anomaly traces
anomaly_trace = go.Scatter(
    x=a['ts_datetime'],  
    y=a['sensor_readings'],
    mode='markers',
    name='Anomaly',
    marker=dict(color='red', size=12, symbol='x')
)

layout = go.Layout(
    title='Temperature',
    xaxis=dict(title='Date and Time'),
    yaxis=dict(title='Sensor Reading'),
    legend=dict(orientation="h", x=0, y=1.1)
)

fig = go.Figure(data=[normal_trace, anomaly_trace], layout=layout)
fig.show()


# Can we plot montly data?

In [32]:
df['ts_datetime'] = pd.to_datetime(df['ts_datetime'])
df['year_month'] = df['ts_datetime'].dt.to_period('M')
grouped_by_month = df.groupby('year_month')
print(df)

           sensor_id  sensor_readings          ts_ms  subject_id  \
0     Stove_Hum_Temp            54.77  1711064577000           3   
1     Stove_Hum_Temp            52.65  1711067842000           3   
2     Stove_Hum_Temp            52.44  1711071188000           3   
3     Stove_Hum_Temp            52.37  1711071711000           3   
4     Stove_Hum_Temp            52.58  1711074554000           3   
...              ...              ...            ...         ...   
5329  Stove_Hum_Temp            54.08  1715209231000           3   
5330  Stove_Hum_Temp            54.02  1715209884000           3   
5331  Stove_Hum_Temp            53.92  1715210739000           3   
5332  Stove_Hum_Temp            53.78  1715211492000           3   
5333  Stove_Hum_Temp            53.75  1715211533000           3   

             ts_datetime  anomaly_pc1 year_month  
0    2024-03-21 23:42:57          0.0    2024-03  
1    2024-03-22 00:37:22          0.0    2024-03  
2    2024-03-22 01:33:08      

In [33]:
df['ts_datetime'] = pd.to_datetime(df['ts_datetime'])
df['year_month'] = df['ts_datetime'].dt.to_period('M')
grouped_by_month = df.groupby('year_month')

# Plot each month
for name, group in grouped_by_month:
    # Filter anomalies
    a = group[group['anomaly_pc1'] == 1]  # Anomalies

    # Create traces
    normal_trace = go.Scatter(
        x=group['ts_datetime'],  # Use the datetime column for x-axis
        y=group['sensor_readings'],
        mode='lines',
        name='Normal',
        line=dict(color='blue')
    )

    anomaly_trace = go.Scatter(
        x=a['ts_datetime'],  # Use the datetime column for x-axis
        y=a['sensor_readings'],  # y-coordinates for the anomalies
        mode='markers',
        name='Anomaly',
        marker=dict(color='red', size=12, symbol='x')
    )

    # Layout
    layout = go.Layout(
        title=f'Temperature - {name}',
        xaxis=dict(title='Date and Time'),
        yaxis=dict(title='Sensor Reading'),
        legend=dict(orientation="h", x=0, y=1.1)
    )

    # Create figure and add traces
    fig = go.Figure(data=[normal_trace, anomaly_trace], layout=layout)

    # Show plot for each month
    fig.show()


In [39]:
df = df_stove_humidity.copy()
print(df.head(100))

         sensor_id  sensor_readings          ts_ms  subject_id  \
0   Stove_Hum_Temp            54.77  1711064577000           3   
1   Stove_Hum_Temp            52.65  1711067842000           3   
2   Stove_Hum_Temp            52.44  1711071188000           3   
3   Stove_Hum_Temp            52.37  1711071711000           3   
4   Stove_Hum_Temp            52.58  1711074554000           3   
..             ...              ...            ...         ...   
95  Stove_Hum_Temp            61.60  1711138724000           3   
96  Stove_Hum_Temp            66.16  1711138784000           3   
97  Stove_Hum_Temp            70.04  1711138865000           3   
98  Stove_Hum_Temp            69.18  1711138955000           3   
99  Stove_Hum_Temp            69.18  1711138956000           3   

           ts_datetime  
0  2024-03-21 23:42:57  
1  2024-03-22 00:37:22  
2  2024-03-22 01:33:08  
3  2024-03-22 01:41:51  
4  2024-03-22 02:29:14  
..                 ...  
95 2024-03-22 20:18:44  
96 2024

# Can we use moving average?

In [37]:
df = df_stove_humidity.copy()
df.set_index('ts_datetime', inplace=True)

window_size = 50
df['moving_average'] = df['sensor_readings'].rolling(window=window_size, center=True).mean()
threshold = 2 * df['sensor_readings'].std()
df['anomaly'] = np.abs(df['sensor_readings'] - df['moving_average']) > threshold

# Filter anomalies
anomalies = df[df['anomaly']]

# Plot the results
normal_trace = go.Scatter(
    x=df.index,
    y=df['sensor_readings'],
    mode='lines',
    name='Sensor Readings',
    line=dict(color='blue')
)

moving_avg_trace = go.Scatter(
    x=df.index,
    y=df['moving_average'],
    mode='lines',
    name='Moving Average',
    line=dict(color='orange', width=3, dash='dot')  # Changed color to orange and style to dot
)

anomaly_trace = go.Scatter(
    x=anomalies.index,
    y=anomalies['sensor_readings'],
    mode='markers',
    name='Anomalies',
    marker=dict(color='red', size=10, symbol='x')
)

layout = go.Layout(
    title='Sensor Readings with Moving Average and Anomalies',
    xaxis=dict(title='Date and Time'),
    yaxis=dict(title='Sensor Reading'),
    legend=dict(orientation="h", x=0, y=1.1)
)

fig = go.Figure(data=[normal_trace, moving_avg_trace, anomaly_trace], layout=layout)
fig.show()


In [34]:
import pandas as pd
import numpy as np
import plotly.graph_objs as go
df = df_stove_humidity.copy()

# Example DataFrame (assuming df is already loaded with 'ts_datetime' and 'sensor_readings')
df['ts_datetime'] = pd.to_datetime(df['ts_datetime'])

# Set the datetime column as index and sort
df.set_index('ts_datetime', inplace=True)
df.sort_index(inplace=True)

# Aggregate by day
daily_data = df.resample('D').mean()

# Define the window size in days
window_size = 7  # Number of days for moving average

# Compute moving average with a rolling window on daily data
daily_data['moving_average'] = daily_data['sensor_readings'].rolling(window=window_size, center=True).mean()

# Define a threshold for anomaly detection based on standard deviation of daily values
threshold = 2 * daily_data['sensor_readings'].std()

# Detect anomalies
daily_data['anomaly'] = np.abs(daily_data['sensor_readings'] - daily_data['moving_average']) > threshold

# Filter anomalies
anomalies = daily_data[daily_data['anomaly']]

# Plot the results
normal_trace = go.Scatter(
    x=daily_data.index,
    y=daily_data['sensor_readings'],
    mode='lines',
    name='Daily Average',
    line=dict(color='blue')
)

moving_avg_trace = go.Scatter(
    x=daily_data.index,
    y=daily_data['moving_average'],
    mode='lines',
    name='Moving Average',
    line=dict(color='orange', width=3, dash='dot')  # Adjust color and style as needed
)

anomaly_trace = go.Scatter(
    x=anomalies.index,
    y=anomalies['sensor_readings'],
    mode='markers',
    name='Anomalies',
    marker=dict(color='red', size=10, symbol='x')
)

layout = go.Layout(
    title='Daily Sensor Readings with Moving Average and Anomalies',
    xaxis=dict(title='Date'),
    yaxis=dict(title='Sensor Reading'),
    legend=dict(orientation="h", x=0, y=1.1)
)

fig = go.Figure(data=[normal_trace, moving_avg_trace, anomaly_trace], layout=layout)
fig.show()


TypeError: agg function failed [how->mean,dtype->object]