In [1]:
import pandas as pd
import numpy as np
import config

In [68]:
import plotly.graph_objects as go
import plotly.express as px
from datetime import datetime 
import matplotlib.pyplot as plt

Let us go through how the setpoints change over the usage of the 3 devices

In [4]:
# print(config.uuid_files)
for file in config.uuid_files:
    df = pd.read_pickle(file)
    print(file)
    print(np.unique(df['setpoint'], return_counts=True))
    print('---')

8b916a17-57b0-4449-bcaf-8f2d16c00bf7.pickle
(array([34, 42], dtype=int64), array([345600, 345600], dtype=int64))
---
de58b445-cfe9-4d0d-adae-5bef928a9dd0.pickle
(array([43], dtype=int64), array([484559], dtype=int64))
---
09ac4a10-7e8e-40f3-a327-1f93a5cf2383.pickle
(array([37], dtype=int64), array([503254], dtype=int64))
---


Looks like the setpoint doesn't change much for 2 of the 3 devices. In the first device, it seems to switch between 34 and 42 equally. Let's see that

In [5]:
df = pd.read_pickle('8b916a17-57b0-4449-bcaf-8f2d16c00bf7.pickle')
df.describe()

Unnamed: 0,compressor,door,roomTemp,setpoint,temp
count,691200.0,691200.0,691200.0,691200.0,691200.0
mean,0.044763,0.02122,73.0,38.0,36.972399
std,0.206783,0.144116,2.828429,4.000003,4.11846
min,0.0,0.0,69.0,34.0,34.0
25%,0.0,0.0,70.171573,34.0,34.0
50%,0.0,0.0,73.0,38.0,34.0
75%,0.0,0.0,75.828427,42.0,42.0
max,1.0,1.0,77.0,42.0,57.773888


In [9]:
df.head()

Unnamed: 0,compressor,door,roomTemp,setpoint,temp,timestamp
0,0,0,69.0,34,34.0,2019-01-01 00:00:00
1,0,0,69.000002,34,34.0,2019-01-01 00:00:15
2,0,0,69.00001,34,34.0,2019-01-01 00:00:30
3,0,0,69.000021,34,34.0,2019-01-01 00:00:45
4,0,0,69.000038,34,34.0,2019-01-01 00:01:00


In [6]:
## Create traces
# fig = go.Figure()
# fig.add_trace(go.Scatter(x=df['timestamp'], y=df['setpoint'], mode='lines', name='lines'))
##fig.show()

In [7]:
## Create traces
#fig = go.Figure()
#fig.add_trace(go.Scatter(x=df['timestamp'][10000:20000], y=df['compressor'][10000:20000], mode='lines',name='compressor'))
#fig.add_trace(go.Scatter(x=df['timestamp'][10000:20000], y=df['door'][10000:20000],mode='markers', name='door'))
#fig.add_trace(go.Scatter(x=random_x, y=random_y2, mode='lines', name='lines'))
#fig.show()

In [None]:
# Strategy 1: Assume that there are 2 ways in which compressor starts working. 
# First: door opens, and outside heat flows in iff roomTemp> temp> setpoint, increasing the "temp", 
#        which sets off the compressor to start cooling the temperature inside until "temp" = "setpoint"
# Second: door is closed, but "setpoint" becomes lower than "temp", which sets off compressor process 
#        to start cooling until "temp" = "setpoint" 

In [None]:
# lets try strategy of predicting door actions over a week or a day.
# If there is a pattern of how the door opens every week, or every day, we could try to figure that out

In [121]:
# open a file where there is no setpoint change. Thus, we can investigate the door opening activity, exclusively
df = pd.read_pickle('de58b445-cfe9-4d0d-adae-5bef928a9dd0.pickle')
df.describe()

Unnamed: 0,compressor,door,roomTemp,setpoint,temp
count,484559.0,484559.0,484559.0,484559.0,484559.0
mean,0.014576,0.014029,60.007602,43.0,43.044246
std,0.119849,0.117611,2.119784,0.0,0.355107
min,0.0,0.0,57.0,43.0,43.0
25%,0.0,0.0,57.890281,43.0,43.0
50%,0.0,0.0,60.022907,43.0,43.0
75%,0.0,0.0,62.116687,43.0,43.0
max,1.0,1.0,63.0,43.0,47.341429


In [122]:
np.unique(df['door'], return_counts=True)

(array([0, 1], dtype=int64), array([477761,   6798], dtype=int64))

In [123]:
df['dates'] = df['timestamp'].dt.date
df['week'] = df['timestamp'].dt.week
df['day_of_week'] = df['timestamp'].dt.weekday
df['weekend_flag'] = 0
df.loc[df['day_of_week']>4,'weekend_flag']=1
df.head()
#df['time'] = df['timestamp'].dt.time

Unnamed: 0,compressor,door,roomTemp,setpoint,temp,timestamp,dates,week,day_of_week,weekend_flag
0,0,0,57.710303,43,43.0,2019-02-01 00:19:00,2019-02-01,5,4,0
1,0,0,57.70819,43,43.0,2019-02-01 00:19:15,2019-02-01,5,4,0
2,0,0,57.706079,43,43.0,2019-02-01 00:19:30,2019-02-01,5,4,0
3,0,0,57.703972,43,43.0,2019-02-01 00:19:45,2019-02-01,5,4,0
4,0,0,57.701867,43,43.0,2019-02-01 00:20:00,2019-02-01,5,4,0


In [126]:
#daily door opened
sensor_dates, sensor_reads= np.unique(df['dates'], return_counts=True)
fig = go.Figure()
fig.add_trace(go.Scatter(x=sensor_dates, y=sensor_reads))
fig.update_layout(title='Sensor readings over the day')
fig.show()

## see how many readings we have per day
# sensor_dates,sensor_reads = np.unique(df['dates'], return_counts=True)
# fig = px.bar(x=sensor_dates, y=sensor_reads)
# fig.update_layout(title='Sensor readings over the day')
# fig.show()

In [127]:
# Check out behaviour by daily basis
door_opened_df = df[['dates','door','weekend_flag']].groupby(['dates', 'weekend_flag']).sum().reset_index()
door_opened_df.head()

Unnamed: 0,dates,weekend_flag,door
0,2019-02-01,0,83
1,2019-02-02,1,84
2,2019-02-03,1,77
3,2019-02-04,0,76
4,2019-02-05,0,59


In [128]:
#daily door opened
fig = go.Figure()
fig.add_trace(go.Scatter(x=door_opened_df['dates'], y=door_opened_df['door'],
                         name='door opened every day'))
fig.update_layout(title='Daily door opening')
fig.show()

In [129]:
fig = px.scatter(door_opened_df, x="dates", y="door", color="weekend_flag",
                hover_data=['door'])
fig.update_layout(title='Daily door opening investigating weekends')
fig.show()

In [130]:
# Check out behaviour by weekly basis
door_opened_df = df[['week','door']].groupby(['week']).sum()
door_opened_df.describe()

Unnamed: 0,door
count,14.0
mean,485.571429
std,128.35031
min,138.0
25%,504.5
50%,531.5
75%,549.25
max,567.0


In [131]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=door_opened_df.index, y=door_opened_df['door'], mode='lines',name='door opened every day'))
fig.update_layout(title='Weekly door open')
fig.show()

In [132]:
# see how many readings we have per day
weekly_nos,weekly_reads = np.unique(df['week'], return_counts=True)
fig = go.Figure()
fig.add_trace(go.Scatter(x=weekly_nos, y=weekly_reads))
fig.update_layout(title='Sensor readings over the week')
fig.show()

Looks like there are less door openings in the first and last week, because there were not enough readings for that week
So lets look at door usage excluding the first and last week

In [116]:
# plot again excluding first and last week
fig = go.Figure()
fig.add_trace(go.Scatter(x=door_opened_df.index[1:-1], y=door_opened_df['door'][1:-1], mode='lines',name='door opened every day'))
fig.update_layout(title='Weekly door opens (excluding first and last week)')
fig.show()

In [3]:
df2 = pd.read_pickle('09ac4a10-7e8e-40f3-a327-1f93a5cf2383.pickle')
df2.describe()

Unnamed: 0,compressor,door,roomTemp,setpoint,temp
count,503254.0,503254.0,503254.0,503254.0,503254.0
mean,0.014068,0.00866,67.882876,37.0,37.064505
std,0.117773,0.092653,4.600678,0.0,0.598199
min,0.0,0.0,61.4,37.0,37.0
25%,0.0,0.0,63.278806,37.0,37.0
50%,0.0,0.0,67.864548,37.0,37.0
75%,0.0,0.0,72.491178,37.0,37.0
max,1.0,1.0,74.4,37.0,51.771167


In [5]:
np.unique(df2['setpoint'], return_counts=True)

(array([37], dtype=int64), array([503254], dtype=int64))

In [6]:
df3 = pd.read_pickle('de58b445-cfe9-4d0d-adae-5bef928a9dd0.pickle')
df3.describe()

Unnamed: 0,compressor,door,roomTemp,setpoint,temp
count,484559.0,484559.0,484559.0,484559.0,484559.0
mean,0.014576,0.014029,60.007602,43.0,43.044246
std,0.119849,0.117611,2.119784,0.0,0.355107
min,0.0,0.0,57.0,43.0,43.0
25%,0.0,0.0,57.890281,43.0,43.0
50%,0.0,0.0,60.022907,43.0,43.0
75%,0.0,0.0,62.116687,43.0,43.0
max,1.0,1.0,63.0,43.0,47.341429


In [7]:
np.unique(df3['setpoint'], return_counts=True)

(array([43], dtype=int64), array([484559], dtype=int64))