In [1]:
# Function to make all possible combinations of provided datasets AKA disk destroyer 9000
def merger(datasets,min_comb, max_comb):
    import itertools
    import os
    import pickle

    # Generate all combinations of the datasets
    for i in range(min_comb, max_comb):
        for comb in itertools.combinations(datasets, i):
            merged = {}
            # Merge the datasets
            for dataset in comb:
                with open(f'processed_data_final/{dataset}', 'rb') as handle:
                    data = pickle.load(handle)
                    for key in data:
                        if key in merged:
                            merged[key].extend(data[key])
                        else:
                            merged[key] = data[key]
            # Extract the first three letters of each dataset name and join them
            file_name = '_'.join([os.path.splitext(name)[0][:3] for name in comb]) + '.pkl'
            # Save the merged dictionary as a pickled file with the first three letters of each dataset name in the file name
            with open(f'testing_sets/{i}/{file_name}', 'wb') as handle:
                pickle.dump(merged, handle)
datasets = [
        'UKDALE_processed.pkl', 
        'REFIT_processed.pkl', 
        'ECO_processed.pkl', 
        'SYND_processed.pkl', 
        'LERTA_processed.pkl', 
        'HES_processed.pkl', 
        'SMART_processed.pkl'
    ]
merger(datasets,6,7)

In [None]:
# Take excess data out of the desired dictionary to save space
import pickle

path = 'SYND_processed.pkl'

with open('processed_data/' + path, 'rb') as handle:
    dataload = pickle.load(handle)

# List of accepted device names
accepted = ['microwave', 'oven', 'fridge', 'washing machine', 'dishwasher', 'kettle', 'television', 'dish washer', 'electric oven']

# Filter data to only include accepted device names
data = dataload.copy()

for i, j in dataload.items():
    if i not in accepted:
        del data[i]

print("//////////////////")

if 'dish washer' in data.keys():
    data['dishwasher'] = data.pop('dish washer')

if 'electric oven' in data.keys():
    data['oven'] = data.pop('electric oven')


with open('processed_data_final/' + path, 'wb') as handle:
    pickle.dump(data, handle, protocol=pickle.HIGHEST_PROTOCOL)


In [None]:

def extract_daily_slices(data):

    # Extracts daily slices from the given data for a specific device and iteration.

    daily_slices = []
    current_day = None
    current_slice = []
    for i, (date, power) in enumerate(data[('power', 'active')].items()):
        day_of_year = date.day_of_year
        if day_of_year != current_day:
            if current_day is not None:
                daily_slices.append(data.iloc[current_slice])
            current_day = day_of_year
            current_slice = [i]
        else:
            current_slice.append(i)
    if current_slice:
        daily_slices.append(data.iloc[current_slice])
    return daily_slices


def daily_1(spr, length):
    
    # Extracts daily slices for each device in the given data and truncates the resulting slices to the given length.

    result = {}
    for device, data in tqdm(spr.items()):
        daily_slices = []
        for iteration in data:
            daily_slices.extend(extract_daily_slices(iteration))
        result[device] = daily_slices
    result = truncate_data(result, length)
    return result


def truncate_data(data, length):

    # Truncates the given data to the given length. Optional, for oversampling reasons

    truncated_data = {}
    for device, slices in data.items():
        truncated_data[device] = slices[:length]
    return truncated_data
