In [2]:
import os
import time

import pandas as pd
import numpy as np
import datetime
import pytz

from process_data.utils import get_energy_consumption, get_working_time

# Get data from .csv file and load them to Data Frame

In [6]:
def convert_csv_to_dataframe(
    start_day: str,
    stop_day: str
):
    
    bg_dir = os.path.join("processed_data", "Sep")
    ids = os.listdir(bg_dir)

    date = pd.to_datetime(start_day)
    track_day = start_day 

    date_list = []
    while date <= pd.to_datetime(stop_day):
        date_list.append(track_day)
        date += pd.to_timedelta(1, 'D')
        track_day = date.strftime("%Y-%m-%d")
        
    df_sensor = pd.DataFrame()
    df_energy = pd.DataFrame()
    df_activities = pd.DataFrame()

    for user_name in ids:
        print(f"[DEBUG] Tracking {user_name}")
        for date in date_list:
            print(f"[DEBUG] Date: {date}")
            df_sensor_daily = pd.read_csv(os.path.join(bg_dir, user_name, f"{date}_sensor.csv"))
            df_sensor_daily["timestamp"] = df_sensor_daily["timestamp"].apply(lambda x: pd.to_datetime(x))
            df_sensor = pd.concat([df_sensor, df_sensor_daily], ignore_index=True)

            df_energy_daily = pd.read_csv(os.path.join(bg_dir, user_name, f"{date}_energy.csv"))
            df_energy_daily["timestamp"] = df_energy_daily["timestamp"].apply(lambda x: pd.to_datetime(x))
            df_energy = pd.concat([df_energy, df_energy_daily], ignore_index=True)

            df_activities_daily = pd.read_csv(os.path.join(bg_dir, user_name, f"{date}_activities.csv"))
            df_activities_daily["timestamp"] = df_activities_daily["timestamp"].apply(lambda x: pd.to_datetime(x))
            df_activities_daily["user_name"] = user_name
            df_activities = pd.concat([df_activities, df_activities_daily], ignore_index=True)
            
    return df_sensor, df_energy, df_activities, ids

# Some extract functions

In [1]:
def get_device_list(user_name: str):
    return df_energy[df_energy["user_name"] == user_name]["device_name"].unique()

In [4]:
def get_data(
    user_name: str,
    device_name: str,
    track_day: str,
    data_type: str,
    df_sensor: pd.DataFrame = pd.DataFrame(),
    df_energy: pd.DataFrame = pd.DataFrame(),
    df_activities: pd.DataFrame = pd.DataFrame(),
):
    if data_type == "energy":
        df_energy = df_energy[df_energy["user_name"] == user_name].reset_index(drop=True)
        df_e = df_energy[
            (df_energy["device_name"] == device_name) &
            (df_energy["timestamp"] >= pd.to_datetime(track_day)) &
            (df_energy["timestamp"] < pd.to_datetime(track_day) + pd.to_timedelta(1, 'D'))
        ].reset_index(drop=True)
        return df_e
    elif data_type == "sensor":
        df_sensor = df_sensor[df_sensor["user_name"] == user_name].reset_index(drop=True)
        df_s = df_sensor[
            (df_sensor["device_name"] == device_name) &
            (df_sensor["timestamp"] >= pd.to_datetime(track_day)) &
            (df_sensor["timestamp"] < pd.to_datetime(track_day) + pd.to_timedelta(1, 'D'))
        ].reset_index(drop=True)
        return df_s
    elif data_type == "activity":
        df_activities = df_activities[df_activities["user_name"] == user_name].reset_index(drop=True)
        df_a = df_activities[
            (df_activities["device_name"] == device_name) &
            (df_activities["timestamp"] >= pd.to_datetime(track_day)) &
            (df_activities["timestamp"] < pd.to_datetime(track_day) + pd.to_timedelta(1, 'D'))
        ].reset_index(drop=True)
        return df_a
    else:
        return pd.DataFrame()

In [5]:
def extract_user_data(
    user_name,
    device_name, 
    track_day
):
    df_s = get_data(
        user_name,
        device_name,
        track_day,
        "sensor",
        df_sensor=df_sensor,
        df_energy=df_energy,
        df_activities=df_activities
    )
        
    df_e = get_data(
        user_name,
        device_name,
        track_day,
        "energy",
        df_sensor=df_sensor,
        df_energy=df_energy,
        df_activities=df_activities
    )
    
    df_act = get_data(
        user_name,
        device_name,
        track_day,
        "activity",
        df_sensor=df_sensor,
        df_energy=df_energy,
        df_activities=df_activities
    )
    
    return df_s, df_e, df_act

# Get data from local file

In [7]:
start_day = "2022-09-01"
stop_day = "2022-09-23"

In [10]:
from time import time

df_sensor, df_energy, df_activities, ids = convert_csv_to_dataframe(
    start_day = start_day,
    stop_day = stop_day
)

[DEBUG] Tracking Apache Footware - Lầu 1
[DEBUG] Date: 2022-09-01
[DEBUG] Date: 2022-09-02
[DEBUG] Date: 2022-09-03
[DEBUG] Date: 2022-09-04
[DEBUG] Date: 2022-09-05
[DEBUG] Date: 2022-09-06
[DEBUG] Date: 2022-09-07
[DEBUG] Date: 2022-09-08
[DEBUG] Date: 2022-09-09
[DEBUG] Date: 2022-09-10
[DEBUG] Date: 2022-09-11
[DEBUG] Date: 2022-09-12
[DEBUG] Date: 2022-09-13
[DEBUG] Date: 2022-09-14
[DEBUG] Date: 2022-09-15
[DEBUG] Date: 2022-09-16
[DEBUG] Date: 2022-09-17
[DEBUG] Date: 2022-09-18
[DEBUG] Date: 2022-09-19
[DEBUG] Date: 2022-09-20
[DEBUG] Date: 2022-09-21
[DEBUG] Date: 2022-09-22
[DEBUG] Date: 2022-09-23
[DEBUG] Tracking Apache Footware - Lầu 2
[DEBUG] Date: 2022-09-01
[DEBUG] Date: 2022-09-02
[DEBUG] Date: 2022-09-03
[DEBUG] Date: 2022-09-04
[DEBUG] Date: 2022-09-05
[DEBUG] Date: 2022-09-06
[DEBUG] Date: 2022-09-07
[DEBUG] Date: 2022-09-08
[DEBUG] Date: 2022-09-09
[DEBUG] Date: 2022-09-10
[DEBUG] Date: 2022-09-11
[DEBUG] Date: 2022-09-12
[DEBUG] Date: 2022-09-13
[DEBUG] Date: 2022

[DEBUG] Date: 2022-09-08
[DEBUG] Date: 2022-09-09
[DEBUG] Date: 2022-09-10
[DEBUG] Date: 2022-09-11
[DEBUG] Date: 2022-09-12
[DEBUG] Date: 2022-09-13
[DEBUG] Date: 2022-09-14
[DEBUG] Date: 2022-09-15
[DEBUG] Date: 2022-09-16
[DEBUG] Date: 2022-09-17
[DEBUG] Date: 2022-09-18
[DEBUG] Date: 2022-09-19
[DEBUG] Date: 2022-09-20
[DEBUG] Date: 2022-09-21
[DEBUG] Date: 2022-09-22
[DEBUG] Date: 2022-09-23
[DEBUG] Tracking VTM Dong Van Cong - Q2
[DEBUG] Date: 2022-09-01
[DEBUG] Date: 2022-09-02
[DEBUG] Date: 2022-09-03
[DEBUG] Date: 2022-09-04
[DEBUG] Date: 2022-09-05
[DEBUG] Date: 2022-09-06
[DEBUG] Date: 2022-09-07
[DEBUG] Date: 2022-09-08
[DEBUG] Date: 2022-09-09
[DEBUG] Date: 2022-09-10
[DEBUG] Date: 2022-09-11
[DEBUG] Date: 2022-09-12
[DEBUG] Date: 2022-09-13
[DEBUG] Date: 2022-09-14
[DEBUG] Date: 2022-09-15
[DEBUG] Date: 2022-09-16
[DEBUG] Date: 2022-09-17
[DEBUG] Date: 2022-09-18
[DEBUG] Date: 2022-09-19
[DEBUG] Date: 2022-09-20
[DEBUG] Date: 2022-09-21
[DEBUG] Date: 2022-09-22
[DEBUG] Da

# Testing algo

In [11]:
ids

['Apache Footware - Lầu 1',
 'Apache Footware - Lầu 2',
 'Apache Footware - Lầu 3',
 'GS25 16 Thảo Điền',
 'GS25 Gateway',
 'JYSK AN PHÚ',
 'JYSK THUẬN AN',
 'ONBE BOOTH',
 'REE - Togo Food',
 'The Coffee Lab',
 'Thế Giới Di Động 358 Lê Văn Việt',
 'Trà Sữa Winnie Sư Vạn Hạnh - Lầu 03, 04',
 'Trà Sữa Winnie Sư Vạn Hạnh - Lầu Trệt, 01, 02',
 'VinMart Vinhomes Central 2',
 'VTM Dong Van Cong - Q2',
 'VTM Phan Dinh Phung - PN',
 'Điên Máy Xanh 633 Lê Văn Việt']

In [13]:
user_name = "VTM Dong Van Cong - Q2"
track_day = "2022-09-20"

device_list = get_device_list(user_name)
device_list

array(['Nagakawa 01 Controller', 'Nagakawa 02 Controller'], dtype=object)

In [18]:
def find_turn_on_activities(
    df_energy,
    df_activities
):
    # Collect ALL ACTIVITIES POWER ON, including set temperature, and set_power
    filter_drop_sch = df_activities[
        (df_activities["event_type"] == "update_scheduler") &
        (df_activities["event_type"] == "add_scheduler")
    ].index
    df_activities = df_activities.drop(index=filter_drop_sch)
    
    activities_ON = np.where(df_activities['power'] == True)[0]
    
    activities_turn_ON = []

    # Filter TURN ON ACTIVITIES
    for idx in activities_ON:

        df_temp = df_energy[
            (df_energy['timestamp'] <= df_activities.iloc[idx]['timestamp']) & 
            (df_energy['timestamp'] >= df_activities.iloc[idx]['timestamp'] - pd.to_timedelta(15, 'm'))
        ]

        if not df_temp.empty:
            if df_temp['power'].mean() < 70:
                activities_turn_ON.append(idx)

    return activities_turn_ON

In [27]:
for device_name in device_list:
    
    df_s, df_e, df_act = extract_user_data(
        user_name=user_name,
        device_name=device_name,
        track_day=track_day
    )
    
    activities_turn_ON = find_turn_on_activities(df_e, df_act)
    
    df_s_cut = pd.DataFrame()
    
    # If there is no activities
    if len(activities_turn_ON) == 0:
        df_e_cut = df_e[df_e["power"] > 70].reset_index(drop=True)
        
        if len(df_e_cut) > 1:
            
            t_start = df_e_cut["timestamp"].iloc[0]
            t_end = df_e_cut["timestamp"].iloc[-1]
            
            df_s_cut = df_s[
                (df_s["timestamp"] >= t_start) &
                (df_s["timestamp"] <= t_end)
            ].reset_index(drop=True)
        else:
            pass
    elif len(activities_turn_ON) == 1:
        
        idx = activities_turn_ON[0]
        t_start = df_act["timestamp"].iloc[idx]
        
        df_e_cut = df_e[
            (df_e["timestamp"] > t_start) &
            (df_e["power"] > 70)
        ].reset_index(drop=True)
        
        t_end = df_e_cut["timestamp"].iloc[-1]
        
        df_s_cut = df_s[
            (df_s["timestamp"] >= t_start) &
            (df_s["timestamp"] <= t_end)
        ].reset_index(drop=True)
    else:
        
        for idx in range(len(activities_turn_ON) - 1):
            
            t_current = df_act["timestamp"].iloc[activities_turn_ON[idx]]
            t_next = df_act["timestamp"].iloc[activities_turn_ON[idx + 1]]
            
            df_e_cut = df_e[
                (df_e["timestamp"] > t_current) &
                (df_e["timestamp"] < t_next)
                (df_e["power"] > 70)
            ].reset_index(drop=True)
            
            t_end = df_e_cut["timestamp"].iloc[-1]
        
            df_s_cut = pd.concat(
                [df_s_cut, 
                 df_s[
                    (df_s["timestamp"] >= t_current) &
                    (df_s["timestamp"] <= t_end)
                ].reset_index(drop=True)
                ],
                ignore_index=True
            )
            
    print(df_s_cut["humidity"].max(), " ", df_s_cut["humidity"].mean())
    if (df_s_cut["humidity"].max() >= 85) or (df_s_cut["humidity"].mean() >= 75):
        print(f"Warning! The humidity of device {device_name} very high")

83.0   69.47916666666667
84.0   75.4551282051282
