<br>
<br>
 <center> <font size = "5"> Plegma Dataset </font></center>
 <br>
 <center> <font size = "4"> Activations identification </font></center>
 <br>
 <center> <font size = "3"> </font></center>
<br>
<br>

---

### Imports

In [None]:
import pandas as pd
from datetime import datetime, date, timedelta
import pandas as pd
import os
import matplotlib.pyplot as plt
import numpy as np
import plotly
from   plotly import __version__
from   plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import pandas as pd
import plotly.graph_objects as go
import plotly.graph_objs as go

### Specify Data 

In [None]:
house      = 'House_01'
appliance  = 'washing_machine'
year_month = '2022-07'

### Read data

In [None]:
# Parse data
# ----------

electric_data = pd.read_csv('clean_dataset/'+house+'/Electric_data/'+year_month+'.csv',header=0)
electric_data['timestamp'] = pd.to_datetime(electric_data['timestamp'])
electric_data.set_index('timestamp', inplace=True)

appliances_metadata = pd.read_csv('clean_dataset/'+house+'/Electric_data/'+'appliances_metadata'+'.csv',header=0)

### Plot Data and check appliance metadata

In [None]:
def make_plot(df,title): 
    
    """
    Creates a line plot for multiple time series using Plotly.

    This function takes a DataFrame and a title as inputs. It iterates through
    each column of the DataFrame, adding a line to the plot for each one. A special
    consideration is given to 'status', which is plotted with a different
    line width to showcase the appliance's activations. The function generates an interactive Plotly line plot.

    Parameters:
    df (pd.DataFrame): A pandas DataFrame containing the time series data. The DataFrame
                       index should represent the timestamp for each data point.
    title (str): The title of the plot.

    Returns:
    None: The function directly displays the plot using fig.show() and does not return
          any value.
    """
    
    # Use plotly to create a line plot for multiple time series
    fig = go.Figure()
    for col in df.columns:
        if col != 'status':
            fig.add_trace(go.Scatter(x=df.index, y=df[col], mode='lines', name=col,line=dict(width=4)))
        else: 
            fig.add_trace(go.Scatter(x=df.index, y=df[col], mode='lines', name=col,line=dict(width=2)))
    # Add title and axis labels
    fig.update_layout(title=title,
                  xaxis_title='Timestamp',
                  yaxis_title='Power (W)')
    # Show the plot
    fig.show()

### Compute status and activations

In [None]:
def compute_status(y,appliance):
    
    """
    Analyzes power consumption data to determine the operational status of a specific appliance over time.

    The function computes the operational status (on or off) of an appliance based on power consumption readings.
    It uses metadata about the appliance such as wattage, threshold, and minimum on/off durations to identify periods
    when the appliance is active. The function returns an array indicating the appliance's status over time and the
    count of distinct on-events detected.

    Parameters:
    y (numpy.ndarray): An array of power consumption readings (in Watts) over time. Each element in the array represents
                       a power reading at a given time.
    appliance (str): A string representing the name of the appliance, used to look up its specific metadata.

    Returns:
    Tuple (numpy.ndarray, int): 
        - The first element of the tuple is an array where each element is either the wattage of the appliance (when on) or zero (when off).
        - The second element is an integer representing the number of distinct on-events detected for the appliance.
    """

    wattage   = appliances_metadata.loc[appliances_metadata['appliance'] == appliance, 'wattage [W]'].iloc[0]
    threshold = appliances_metadata.loc[appliances_metadata['appliance'] == appliance, 'threshold [W]'].iloc[0]
    min_on    = appliances_metadata.loc[appliances_metadata['appliance'] == appliance, 'min_on (sec)'].iloc[0] / 10
    min_off   = appliances_metadata.loc[appliances_metadata['appliance'] == appliance, 'min_off (sec)'].iloc[0] / 10
   
 
    initial_status = y>= threshold
    
    
    status_diff    = np.diff(initial_status)
    events_idx     = status_diff.nonzero()[0].squeeze() + 1
    
    if initial_status[0]:  
        events_idx = np.insert(events_idx, 0, 0)
    if initial_status[-1]: 
        events_idx = np.insert(events_idx, events_idx.size, initial_status.size)

    events_idx = events_idx.reshape(-1,2)
    on_events  = events_idx[:,0].copy()
    off_events = events_idx[:,1].copy()
    assert len(on_events) == len(off_events), 'Length of on and off-events does not match!'

    if len(on_events):
        off_duration = on_events[1:]-off_events[:-1]
        off_duration = np.insert(off_duration,0, 1e6)
        on_events    = on_events[off_duration > min_off]
        off_events   = off_events[np.roll(off_duration, -1) > min_off]

        on_duration  = off_events - on_events
        on_events    = on_events[on_duration  >= min_on]
        off_events   = off_events[on_duration >= min_on]
    assert len(on_events) == len(off_events)

    temp_status = np.zeros(y.shape)

    for on, off in zip(on_events, off_events):
        temp_status[on: off] = wattage

    return temp_status,len(on_events)

In [None]:
status, activations = compute_status(electric_data[[appliance]].values.reshape(1,-1)[0],appliance)
print('The number of activations are: '+str(activations))

df = electric_data.loc[:, [appliance]]
df['status'] = status
make_plot(df,appliance)

### Check number of activations per device in all dataset

In [None]:
# Create a nested dictionary with all the activations per house
# and per appliance

houses = {}
for house in os.listdir('clean_dataset'):
    months = {}
    for month in os.listdir('clean_dataset/'+house+'/Electric_data'):
        if month == 'appliances_metadata.csv':
            continue
        # read data
        electric_data = pd.read_csv('clean_dataset/'+house+'/Electric_data/'+month,header=0)
        electric_data['timestamp'] = pd.to_datetime(electric_data['timestamp'])
        electric_data.set_index('timestamp', inplace=True)
        appliances_metadata = pd.read_csv('clean_dataset/'+house+'/Electric_data/'+'appliances_metadata'+'.csv',header=0)
        # Values to be excluded
        values_to_exclude = ['V','A','P_agg', 'issues']
        mask = ~np.isin(electric_data.columns.values, values_to_exclude)
        appliances = electric_data.columns.values[mask]
        devices = {}
        for appliance in appliances:
            status, activations = compute_status(electric_data[[appliance]].values.reshape(1,-1)[0],appliance)
            devices[appliance] = activations
            
        months[month] = devices
    houses[house] = months

In [None]:
def total_ac_activations(houses_data,appliance_name):
    
    """
    Calculates the total number of activations for a specific appliance across multiple houses and months.
    This function iterates through a nested dictionary structure containing data for multiple houses. 
    Each house's data includes monthly records of appliance activations. The function sums up the number of 
    activations for an appliance specified by 'appliance_name'.

    Parameters:
    houses_data (dict): A nested dictionary with the following structure:
                        {
                          'house1': {
                            'January': {'appliance1': activations, ...},
                            ...
                          },
                          ...
                        }
                        It represents data for multiple houses, each containing monthly records of appliance activations.
    appliance_name (str): The name or partial name of the appliance to count activations for. The function looks for appliances that start with this name.

    Returns:
    int: The total number of activations for the specified appliance across all houses and months.
    """
    total_activations = 0
    for house in houses_data:
        for month in houses_data[house]:
            for appliance, activations in houses_data[house][month].items():
                if appliance.startswith(appliance_name):
                    total_activations += activations
    return total_activations

In [None]:
total_ac_activations(houses,'washing_machine')