In [1]:
from utils.configuration import get_config_from_json
from utils.training_utilities import set_GPU
from utils.plotting_traces import plot_traces
from seq2point.seq2point import SEQ2POINT
from refit_loader.data_loader import REFIT_Loader
import builtins
import os
import torch
from pprint import pprint

builtins.GENERAL_CONFIG = get_config_from_json(description="General Settings", config_file="configs/general_config.json")
builtins.MODEL_CONFIG = get_config_from_json(description="Model Parameters", config_file="configs/model_config.json")
builtins.TRAINING_CONFIG = get_config_from_json(description="Training Configuration", config_file="configs/training_config.json")
builtins.PLOT_CONFIG = get_config_from_json(description="Plot Settings", config_file="configs/plot_config.json")

In [2]:
refit = REFIT_Loader()

Followings are the general configuration of your experiment..
{'DATA_FOLDER': 'data/refit/', 'DATA_TYPE': '.csv', 'README_FILE': 'refit_loader/REFIT_Readme.txt', 'REFIT_HOUSES': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21]}

Loading specified buildings: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21]
Parsing the readme file specified: refit_loader/REFIT_Readme.txt


In [3]:
kettle = refit.get_appliance_data(appliance="Kettle", houses=[2, 4])

Loading data for appliance KETTLE ...
Fetching KETTLE data for House 2
Fetching KETTLE data for House 4


In [4]:
# kettle.resample(sampling_period='8s', fill_value=0.0, window_limit=1.0)

In [5]:
# kettle.data[2].isnull().sum()
# kettle.data[2].dropna(inplace=True)

In [12]:
import torch
import numpy as np
import pandas as pd
class DataGenerator(torch.utils.data.Dataset):
    def __init__(self, data):
        
        self.sequence_length = MODEL_CONFIG['SEQUENCE_LENGTH']
        # assert self.sequence_length >= 599, f"Provided sequence length is {self.sequence_length} while it should be atleast >=599"
        
        lst = [0] * 599        
        self.time = data.index
        self.X = data['aggregate'] 

        self.y =  data[data.columns[-1]] 
#         self.X = pd.concat([ data['aggregate'] , pd.Series(lst)])

#         self.y = pd.concat([ data[data.columns[-1]] , pd.Series(lst)])

        
    def __len__(self):
        return len(self.time)
    
    def __getitem__(self, index):
        mid = int(index + (self.sequence_length/2))
        
        if index + self.sequence_length > len(self.time) and mid > len(self.time):
            print('not valid')
            # print(torch.tensor(self.X.iloc[index:index + self.sequence_length]), torch.tensor(self.y.iloc[[mid]]))
        return (torch.tensor(self.X.iloc[index:index + self.sequence_length]), torch.tensor(self.y.iloc[[mid]]))


In [13]:

def main(train_data, validation_data):
    
    print(f"Followings are the {GENERAL_CONFIG['DESCRIPTION']} of your project..")
    pprint(GENERAL_CONFIG)
    
    ###### random seed selection in progress

    random_seed = 10

    network = SEQ2POINT().to(set_GPU())

    train_generator = DataGenerator(train_data)

    train_dataloader = torch.utils.data.DataLoader(dataset=train_generator, 
                                                  batch_size=TRAINING_CONFIG['TRAIN_BATCH_SIZE'], # how many samples per batch
                                                  num_workers=0, # how many subprocesses to use for data loading (higher = more)
                                                  shuffle=False) # shuffle the data
    
    validation_generator = DataGenerator(validation_data)
    
    validation_dataloader = torch.utils.data.DataLoader(dataset=validation_generator, 
                                                  batch_size=TRAINING_CONFIG['TRAIN_BATCH_SIZE'], # how many samples per batch
                                                  num_workers=0, # how many subprocesses to use for data loading (higher = more)
                                                  shuffle=False) # shuffle the data    

    
    train_loss, validation_loss = network.run(train_dataloader, validation_dataloader)

    plot_traces(traces = [train_loss, validation_loss], labels=['training', 'validation'], axis_labels=['Epochs', 'Loss'], title='Training Loss vs Validation Loss per Epoch')


In [14]:
main(train_data = kettle.data[2].iloc[2000:3000], validation_data = kettle.data[4].iloc[2000:3000])

Followings are the General Settings of your project..
{'DATA_PATH': 'data/refit/',
 'DESCRIPTION': 'General Settings',
 'LOAD_MODEL': '',
 'PRE_TRAINED_MODEL_FLAG': False,
 'SAVE_PATH': 'models/'}

Initializing SEQ2POINT model archiecture

Followings are the Model Parameters of your network architecture..
{'CONV_KERNEL': [10, 8, 6, 5, 5],
 'CONV_LAYERS': 5,
 'CONV_PADDING': 0,
 'CONV_STRIDE': 1,
 'DESCRIPTION': 'Model Parameters',
 'INPUT_CHANNELS': [1, 30, 30, 40, 50],
 'LEFT_PAD': [4, 3, 2, 2, 2],
 'LINEAR_INPUT': [29950, 1024],
 'LINEAR_LAYERS': 2,
 'LINEAR_OUTPUT': [1024, 1],
 'OUTPUT_CHANNELS': [30, 30, 40, 50, 50],
 'POOL_KERNEL': [],
 'POOL_STRIDE': [],
 'RIGHT_PAD': [5, 4, 3, 2, 2],
 'SEQUENCE_LENGTH': 599}

SEQ2POINT model archiecture has been initialized

Followings are the Training Configuration of your experiment..
{'DESCRIPTION': 'Training Configuration',
 'EARLY_STOPPING_THRESHOLD': 3,
 'LEARNING_RATE': 0.001,
 'LOSS': 'nn.MSELoss',
 'LOSS_REDUCTION': 'mean',
 'NUM_EPOCHS

TypeError: cannot unpack non-iterable NoneType object