In [1]:
# from load_csv import CSV_Loader
from configs.configuration import general_config, dataset_config
import pandas as pd
import glob
import dask.dataframe as dd
import pickle

In [22]:

class _Loader:
    """
    Interface that loads all the data into the memory
    """

    def __init__(self):
        try:
            pass
            
        except Exception as e:
            display("Error occured in initialization of _Loader interface due to ", e)
                
        finally:
            display("Loader Interface initialized")
        
    @staticmethod
    def _load_file():
        raise NotImplementedError    


class CSV_Loader(_Loader):
    """
    
    """
    def __init__(self):
        try:
            super().__init__()
        
        except Exception as e:
            display("Error occured in initialization of CSV_Loader class due to ", e)
                
        finally:
            display("CSV_Loader initialized")
        
    @staticmethod
    def _load_file(csv_file_path,
                   index_column_name=None,
                   _nrows=None,
                   _iterator=True,
                   _chunksize=100000):
        try:
            tp = pd.read_csv(csv_file_path, nrows=_nrows, index_col=index_column_name, iterator=_iterator, chunksize=_chunksize) ## loading data in chunks reduces 90 percent execution time 
            df = pd.concat(tp, ignore_index=False)
            df.info(verbose=False, memory_usage="deep")
            return df  
        
        except Exception as e:
            display("Error occured in _load_file method of CSV_Loader class due to ", e)
    
    @staticmethod
    def _load_file_via_dask(csv_file_path,
                            fetch_houses=[1,2,3,4,5,6,7,8,9,10,11,12,13,15,16,17,18,19,20,21]):
        try:
            ls = {}
            display(f"Loading specified houses: {fetch_houses}")
            for i in fetch_houses:
                ls.update({i: dd.read_csv(f'{csv_file_path}House_{i}.csv')})
                
            return ls
        
        except Exception as e:
            display("Error occured in _load_file_via_dask method of CSV_Loader class due to ", e)
            
            
    ###### appliance wise Dict Reader


In [28]:
import re

def parser(readme_file):
    """
    
    """
    try:
        display(f'Loading the readme files specified: {readme_file}')
        with open(readme_file) as f:
            content = f.readlines()
        ls = {}
        for i, s in enumerate(content):
            if 'House' in s.capitalize():
                keys, appliances = [], []
                house = s.split()[1]
                for indx in range(1, 6):
                    if content[i+indx] == '\t!NOTES\n':
                        break
                    else:
                        target = [value.split('.') for value in [value for value in content[i+indx].split(',') if value != '\n']]
                        for t in target:
                            if len(t) > 2: ##### one comma missing caused issue
                                appliances.append(t[1])
                                appliances.append(t[2])
                            else:
                                appliances.append(t[1])
                ls.update({house: [item.split('\n')[0] for item in appliances]})
        return ls
    
    except Exception as e:
        display("Error occured in parser method due to ", e)

In [29]:
ob = CSV_Loader()

'Loader Interface initialized'

'CSV_Loader initialized'

In [30]:
%%time
collective_dataset = ob._load_file_via_dask(csv_file_path=general_config['DATA_FOLDER'])

'Loading specified houses: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21]'

Wall time: 480 ms


In [31]:
keys_of_appliances = parser(general_config['README_FILE'])

'Loading the readme files specified: data/refit/REFIT_Readme.txt'

In [32]:
for house_number in collective_dataset:
    cols = keys_of_appliances[str(house_number)]
    collective_dataset[house_number] = collective_dataset[house_number].rename(columns={"Appliance1":cols[1], "Appliance2":cols[2], "Appliance3":cols[3], "Appliance4":cols[4], "Appliance5":cols[5],
                                       "Appliance6":cols[6], "Appliance7":cols[7], "Appliance8":cols[8], "Appliance9":cols[9]})

In [131]:
def get_house_data(house_number):
    return collective_dataset[house_number].compute()
    
def get_appliance_data(target_appliance, houses='all_houses'):
    ls = {}
    if houses == 'all_houses':
        for house_number in range(1, len(collective_dataset)+1):
            print(house_number)
            if target_appliance in collective_dataset[house_number].columns:
                data = collective_dataset[house_number][['Time', target_appliance]].compute()
                ls.update({house_number: data})
    elif type(houses) == list and len(houses)!=0:
        for house_number in houses:
            if target_appliance in collective_dataset[house_number].columns:
                display(f'Fetching data for House {house_number}')
                data = collective_dataset[house_number][['Time', target_appliance]].compute()
                ls.update({house_number: data})
    else:
        raise Exception("argument 'houses' should not be an empty list or by default set should be set to 'all_houses'")
    
    return ls

In [132]:
%%time
# HOUSE_1 = get_house_data(1)
# HOUSE_2 = get_house_data(2)
# HOUSE_3 = get_house_data(3)

Wall time: 0 ns


In [133]:
%%time
KETTLE = get_appliance_data("Kettle", houses=[1,2,3,4,5,6,7,8])

'Fetching data for House 2'

'Fetching data for House 3'

'Fetching data for House 4'

'Fetching data for House 5'

'Fetching data for House 6'

'Fetching data for House 7'

'Fetching data for House 8'

In [135]:
KETTLE[1]

KeyError: 1