In [9]:
import pandas as pd
import numpy as np
import sqlite3
from datetime import datetime, timedelta
from pytz import timezone

In [10]:
__TIMEZONE = timezone('Europe/London')
days = 1/24

In [11]:
_DB_URL = "/Users/kws/dropbox/iot/eeepc/iot-log.db"

In [12]:
start_date = datetime.utcnow() - timedelta(days=days)
start = start_date.strftime('%Y-%m-%dT%H:%M:%S')

with sqlite3.connect(_DB_URL) as conn:
    df = pd.read_sql_query(
        "select l.date,l.id,s.name,l.type,l.value,s.sort_order from log as l"
        "  left join sensors as s on l.id = s.id "
        "  where l.date>=? and l.type=? order by l.date",
        conn, params=[start, 'presence'])
    
df['date'] = pd.to_datetime(df.date)
df['date'] = df['date'].dt.tz_localize(timezone('UTC'))
df['date'] = df['date'].dt.tz_convert(__TIMEZONE)
orig = df
df

Unnamed: 0,date,id,name,type,value,sort_order
0,2020-04-18 14:41:41+01:00,47,Kitchen,presence,0,1
1,2020-04-18 14:42:15+01:00,53,Bedroom,presence,0,2
2,2020-04-18 14:42:30+01:00,47,Kitchen,presence,1,1
3,2020-04-18 14:42:31+01:00,42,Garage,presence,1,3
4,2020-04-18 14:42:48+01:00,83,Garden,presence,1,5
...,...,...,...,...,...,...
162,2020-04-18 15:27:33+01:00,47,Kitchen,presence,0,1
163,2020-04-18 15:28:24+01:00,53,Bedroom,presence,1,2
164,2020-04-18 15:28:38+01:00,53,Bedroom,presence,0,2
165,2020-04-18 15:36:02+01:00,53,Bedroom,presence,1,2


In [13]:
df = orig[orig.id==47].copy()
periods = 30
time_window = days / periods

start_date_tz = timezone('UTC').localize(start_date)
start_date_tz = start_date.astimezone(__TIMEZONE)

df = df.sort_values("date").reset_index(drop=True)
df.loc[df.value == '1', 'start'] = df.date
df.loc[df.value == '0', 'end'] = df.date
df[['end']] = df[['end']].fillna(method='bfill')
df[['start']] = df[['start']].fillna(method='ffill')

df.loc[df.start.isnull(), 'start'] = start_date_tz
df.loc[df.end.isnull(), 'end'] = start_date_tz + timedelta(days=days)

In [14]:
df['delta'] = df['start'] + timedelta(days=time_window)
df['delta_prev'] = df.delta.shift(1)
df.loc[df.delta_prev.isnull(), 'delta_prev'] = df.delta
df.head()

Unnamed: 0,date,id,name,type,value,sort_order,start,end,delta,delta_prev
0,2020-04-18 14:41:41+01:00,47,Kitchen,presence,0,1,2020-04-18 13:41:38.328362+01:00,2020-04-18 14:41:41+01:00,2020-04-18 13:43:38.328362+01:00,2020-04-18 13:43:38.328362+01:00
1,2020-04-18 14:42:30+01:00,47,Kitchen,presence,1,1,2020-04-18 14:42:30+01:00,2020-04-18 14:43:14+01:00,2020-04-18 14:44:30+01:00,2020-04-18 13:43:38.328362+01:00
2,2020-04-18 14:43:14+01:00,47,Kitchen,presence,0,1,2020-04-18 14:42:30+01:00,2020-04-18 14:43:14+01:00,2020-04-18 14:44:30+01:00,2020-04-18 14:44:30+01:00
3,2020-04-18 14:43:36+01:00,47,Kitchen,presence,1,1,2020-04-18 14:43:36+01:00,2020-04-18 14:44:04+01:00,2020-04-18 14:45:36+01:00,2020-04-18 14:44:30+01:00
4,2020-04-18 14:43:49+01:00,47,Kitchen,presence,1,1,2020-04-18 14:43:49+01:00,2020-04-18 14:44:04+01:00,2020-04-18 14:45:49+01:00,2020-04-18 14:45:36+01:00


In [15]:
df.loc[(df.start <= df.delta_prev), 'delta'] = df.delta_prev
df.head()

Unnamed: 0,date,id,name,type,value,sort_order,start,end,delta,delta_prev
0,2020-04-18 14:41:41+01:00,47,Kitchen,presence,0,1,2020-04-18 13:41:38.328362+01:00,2020-04-18 14:41:41+01:00,2020-04-18 13:43:38.328362+01:00,2020-04-18 13:43:38.328362+01:00
1,2020-04-18 14:42:30+01:00,47,Kitchen,presence,1,1,2020-04-18 14:42:30+01:00,2020-04-18 14:43:14+01:00,2020-04-18 14:44:30+01:00,2020-04-18 13:43:38.328362+01:00
2,2020-04-18 14:43:14+01:00,47,Kitchen,presence,0,1,2020-04-18 14:42:30+01:00,2020-04-18 14:43:14+01:00,2020-04-18 14:44:30+01:00,2020-04-18 14:44:30+01:00
3,2020-04-18 14:43:36+01:00,47,Kitchen,presence,1,1,2020-04-18 14:43:36+01:00,2020-04-18 14:44:04+01:00,2020-04-18 14:44:30+01:00,2020-04-18 14:44:30+01:00
4,2020-04-18 14:43:49+01:00,47,Kitchen,presence,1,1,2020-04-18 14:43:49+01:00,2020-04-18 14:44:04+01:00,2020-04-18 14:45:36+01:00,2020-04-18 14:45:36+01:00


In [18]:
df['end'] = df[['end', 'delta']].max(axis=1)
df['prev_end'] = df.end.shift(1)
df.loc[df.prev_end.isnull(), 'prev_end'] = start_date_tz
df['group_id'] = df.index
df.loc[df.start <= df.prev_end, 'group_id'] = np.nan
df['group_id'] = df['group_id'].fillna(method='ffill')
df.head()

Unnamed: 0,date,id,name,type,value,sort_order,start,end,delta,delta_prev,prev_end,group_id
0,2020-04-18 14:41:41+01:00,47,Kitchen,presence,0,1,2020-04-18 13:41:38.328362+01:00,2020-04-18 14:41:41+01:00,2020-04-18 13:43:38.328362+01:00,2020-04-18 13:43:38.328362+01:00,2020-04-18 13:41:38.328362+01:00,
1,2020-04-18 14:42:30+01:00,47,Kitchen,presence,1,1,2020-04-18 14:42:30+01:00,2020-04-18 14:44:30+01:00,2020-04-18 14:44:30+01:00,2020-04-18 13:43:38.328362+01:00,2020-04-18 14:41:41+01:00,1.0
2,2020-04-18 14:43:14+01:00,47,Kitchen,presence,0,1,2020-04-18 14:42:30+01:00,2020-04-18 14:44:30+01:00,2020-04-18 14:44:30+01:00,2020-04-18 14:44:30+01:00,2020-04-18 14:44:30+01:00,1.0
3,2020-04-18 14:43:36+01:00,47,Kitchen,presence,1,1,2020-04-18 14:43:36+01:00,2020-04-18 14:44:30+01:00,2020-04-18 14:44:30+01:00,2020-04-18 14:44:30+01:00,2020-04-18 14:44:30+01:00,1.0
4,2020-04-18 14:43:49+01:00,47,Kitchen,presence,1,1,2020-04-18 14:43:49+01:00,2020-04-18 14:45:36+01:00,2020-04-18 14:45:36+01:00,2020-04-18 14:45:36+01:00,2020-04-18 14:44:30+01:00,1.0


In [19]:
df.groupby('group_id').agg({'name': 'first', 'start': min, 'end':max, 'sort_order': 'first'}).reset_index()

Unnamed: 0,group_id,name,start,end,sort_order
0,1.0,Kitchen,2020-04-18 14:42:30+01:00,2020-04-18 14:52:41+01:00,1
1,25.0,Kitchen,2020-04-18 14:54:25+01:00,2020-04-18 14:59:04+01:00,1
2,30.0,Kitchen,2020-04-18 14:59:11+01:00,2020-04-18 15:05:49+01:00,1
3,44.0,Kitchen,2020-04-18 15:06:32+01:00,2020-04-18 15:11:21+01:00,1
4,50.0,Kitchen,2020-04-18 15:16:17+01:00,2020-04-18 15:18:17+01:00,1
5,52.0,Kitchen,2020-04-18 15:19:18+01:00,2020-04-18 15:24:16+01:00,1
6,56.0,Kitchen,2020-04-18 15:25:15+01:00,2020-04-18 15:29:23+01:00,1
