# Global import

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import time
import os
import sys
from datetime import datetime, timedelta

sys.path.append('..')
from dataset_management.redd.REDD import REDD
from dataset_management.ukdale.UKDALE import UKDALE


%matplotlib qt

In [None]:
from collections import Counter
in_path=r'D:\dataset\REDD\low_freq'
Appliances = []
Houses=[1,2,3,4,5,6]
for h in Houses:
    house_path = os.path.join(in_path, 'house_'+str(h))
    labels = pd.read_csv(os.path.join(house_path, 'labels.dat'),header=None,delimiter=' ',
                                 usecols=[0,1], names=['channel', 'appliance_name'])
    house_apps = list(labels['appliance_name'])
    print(f'House {h} apps({len(house_apps)}):{house_apps}')
    Appliances = Appliances + house_apps

In [None]:
counter = Counter(Appliances)

for element, count in counter.items():
    print(f"{element}: {count}")

# REDD batch to_dataframe

In [None]:
redd = REDD(in_path=r'D:\dataset\REDD\low_freq', out_path=r'D:\dataset\DataFrames\REDD')
redd.to_Dataframe(Houses=[1,2,3,4,5,6], sample_seconds=8)

# read dataframe

In [None]:
df = pd.read_csv(r'D:\dataset\DataFrames\REDD\house_3.csv')
df.describe()

In [None]:
p_main = df.filter(regex='^main').sum(axis=1).to_numpy()
p_sum = df.filter(regex='^(?!.*main)').sum(axis=1).to_numpy()

In [None]:
plt.plot(p_main[0:10000])
plt.plot(p_sum[0:10000])

In [None]:
filtered_columns = df.filter(regex='^(?!.*main)')
filtered_columns.describe()

In [None]:
plt.plot(df.loc[df['time'].str.startswith('2011-04-19'), 'mains'])
plt.plot(df.loc[df['time'].str.startswith('2011-04-19'), 'dishwaser'])

# UKDALE batch to_dataframe

In [None]:
ukdale = UKDALE(in_path=r'D:\dataset\uk-dale-disaggregated', out_path=r'D:\dataset\DataFrames\UKDALE')
ukdale.to_Dataframe_V2(Houses=[1,2,3,4], sample_seconds=8)

# Cut

In [None]:
import os
import sys
import math
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd


sys.path.append('..')
import data_process.util_data as util_dt



# local
file_save_dir = r'C:\Users\29492\Desktop\exp12'
file_load_dir = r'D:\dataset\DataFrames\UKDALE'
# server
# file_save_dir = r'../../data'
# file_load_dir = r'../../data'

In [None]:
dataframe_path = os.path.join(file_load_dir, r'house_1.csv')
df = pd.read_csv(dataframe_path)
df.set_index('time', inplace=True)
df = df.filter(regex=r'^(?!aggregate)')

total_len = len(df)
num_sample_per_day = int(60*60*24/8)
num_days = total_len/num_sample_per_day

In [None]:
num_day_needed = 5*7
on_threshold = 30
max_value = 0
j = 0
for i in range(int(num_days-num_day_needed-1)):
    print(i)
    df_cut = df[i*num_sample_per_day: (i+num_day_needed)*num_sample_per_day]
    data = df_cut.to_numpy()
    apps_on = (data>on_threshold).astype('int')
    # apps_on_num = apps_on.sum(axis=0)
    # apps_on_percent = apps_on.mean(axis=0)
    apps_on_percent = apps_on.mean()
    if apps_on_percent>max_value:
        max_value = apps_on_percent
        j=i

print('-'*20, j, '-'*20)

In [None]:
df_cut = df[j*num_sample_per_day: (j+num_day_needed)*num_sample_per_day]
data = df_cut.to_numpy()

apps_on = (data>on_threshold).astype('int')
apps_on_num = apps_on.sum(axis=0)
apps_on_percent = apps_on.mean(axis=0)

df_statistical = pd.DataFrame({'apps_on_num':apps_on_num, 'apps_on_percent':apps_on_percent},index=df.columns.to_list())

In [None]:
sorted_df = df_statistical.sort_values(by='apps_on_percent',ascending=False)
sorted_df['num'] = range(1,len(sorted_df)+1)
sorted_df

In [None]:
df_filtered = df_cut[sorted_df[0:30].index]

In [None]:
df_filtered.to_csv(os.path.join(file_load_dir, r'house_1_filtered_5W.csv'))

# combine

In [None]:
import os
import sys
import math
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd


sys.path.append('..')
import data_process.util_data as util_dt



# local
file_save_dir = r'C:\Users\29492\Desktop\exp12'
file_load_dir = r'D:\dataset\DataFrames\REDD'
# server
# file_save_dir = r'../../data'
# file_load_dir = r'../../data'

In [None]:
on_threshold = 30

app_name = set()
for i in range(1,7):
    print(f'House {i}')
    dataframe_path = os.path.join(file_load_dir, f'house_{i}.csv')
    df = pd.read_csv(dataframe_path)
    df.set_index('time', inplace=True)
    df = df.filter(regex=r'^(?!main)')

    data = df.to_numpy()
    apps_on = (data>on_threshold).astype('int')
    apps_on_percent = apps_on.mean(axis=0)

    df_statistical = pd.DataFrame({'apps_on_percent':apps_on_percent},index=df.columns.to_list())
    print(df_statistical)

    app_name = app_name.union(set([element.split('-')[0] for element in df.columns.to_list()]))

In [None]:
set1 = {1, 2, 3, 4}
set2 = {3, 4, 5, 6}

union_set = set1.union(set2)

print(union_set)
