In [1]:
# from load_csv import CSV_Loader
from configs.configuration import general_config, dataset_config
import pandas as pd
import glob
import dask.dataframe as dd
import pickle

In [22]:

class _Loader:
    """
    Interface that loads all the data into the memory
    """

    def __init__(self):
        try:
            pass
            
        except Exception as e:
            display("Error occured in initialization of _Loader interface due to ", e)
                
        finally:
            display("Loader Interface initialized")
        
    @staticmethod
    def _load_file():
        raise NotImplementedError    


class CSV_Loader(_Loader):
    """
    
    """
    def __init__(self):
        try:
            super().__init__()
        
        except Exception as e:
            display("Error occured in initialization of CSV_Loader class due to ", e)
                
        finally:
            display("CSV_Loader initialized")
        
    @staticmethod
    def _load_file(csv_file_path,
                   index_column_name=None,
                   _nrows=None,
                   _iterator=True,
                   _chunksize=100000):
        try:
            tp = pd.read_csv(csv_file_path, nrows=_nrows, index_col=index_column_name, iterator=_iterator, chunksize=_chunksize) ## loading data in chunks reduces 90 percent execution time 
            df = pd.concat(tp, ignore_index=False)
            df.info(verbose=False, memory_usage="deep")
            return df  
        
        except Exception as e:
            display("Error occured in _load_file method of CSV_Loader class due to ", e)
    
    @staticmethod
    def _load_file_via_dask(csv_file_path,
                            fetch_houses=[1,2,3,4,5,6,7,8,9,10,11,12,13,15,16,17,18,19,20,21]):
        try:
            ls = {}
            display(f"Loading specified houses: {fetch_houses}")
            for i in fetch_houses:
                ls.update({i: dd.read_csv(f'{csv_file_path}House_{i}.csv')})
                
            return ls
        
        except Exception as e:
            display("Error occured in _load_file_via_dask method of CSV_Loader class due to ", e)
            
            
    ###### appliance wise Dict Reader


In [28]:
import re

def parser(readme_file):
    """
    
    """
    try:
        display(f'Loading the readme files specified: {readme_file}')
        with open(readme_file) as f:
            content = f.readlines()
        ls = {}
        for i, s in enumerate(content):
            if 'House' in s.capitalize():
                keys, appliances = [], []
                house = s.split()[1]
                for indx in range(1, 6):
                    if content[i+indx] == '\t!NOTES\n':
                        break
                    else:
                        target = [value.split('.') for value in [value for value in content[i+indx].split(',') if value != '\n']]
                        for t in target:
                            if len(t) > 2: ##### one comma missing caused issue
                                appliances.append(t[1])
                                appliances.append(t[2])
                            else:
                                appliances.append(t[1])
                ls.update({house: [item.split('\n')[0] for item in appliances]})
        return ls
    
    except Exception as e:
        display("Error occured in parser method due to ", e)

In [29]:
ob = CSV_Loader()

'Loader Interface initialized'

'CSV_Loader initialized'

In [30]:
%%time
collective_dataset = ob._load_file_via_dask(csv_file_path=general_config['DATA_FOLDER'])

'Loading specified houses: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21]'

Wall time: 480 ms


In [31]:
keys_of_appliances = parser(general_config['README_FILE'])

'Loading the readme files specified: data/refit/REFIT_Readme.txt'

In [32]:
for house_number in collective_dataset:
    cols = keys_of_appliances[str(house_number)]
    collective_dataset[house_number] = collective_dataset[house_number].rename(columns={"Appliance1":cols[1], "Appliance2":cols[2], "Appliance3":cols[3], "Appliance4":cols[4], "Appliance5":cols[5],
                                       "Appliance6":cols[6], "Appliance7":cols[7], "Appliance8":cols[8], "Appliance9":cols[9]})

In [126]:
def get_house_data(house_number):
    return collective_dataset[house_number].compute()
    
def get_appliance_data(target_appliance, houses='all_houses'):
    ls = {}
    if houses == 'all_houses':
        for house_number in range(1, len(collective_dataset)+1):
            print(house_number)
            if target_appliance in collective_dataset[house_number].columns:
                data = collective_dataset[house_number][['Time', target_appliance]].compute()
                ls.update({house_number: data})
    elif type(houses) == list and len(houses)!=0:
        for house_number in houses:
            print(house_number)
            if target_appliance in collective_dataset[house_number].columns:
                data = collective_dataset[house_number][['Time', target_appliance]].compute()
                ls.update({house_number: data})
    else:
        raise Exception("argument 'houses' should not be an empty list or by default set should be set to 'all_houses'")
    
    return ls

In [127]:
%%time
# HOUSE_1 = get_house_data(1)
# HOUSE_2 = get_house_data(2)
# HOUSE_3 = get_house_data(3)

Wall time: 0 ns


In [128]:
KETTLE = get_appliance_data("Kettle", houses=[])

Exception: argument 'houses' should not be an empty list or by default set should be set to 'all_houses'

In [121]:
KETTLE[2]

KeyError: 2

In [None]:
list_of_house_numbers, list_of_appliance_data = [], []
for house_data in collective_dataset:
    for key in house_data.keys():
        print(key)
        if target_appliance in keys_of_appliances[str(key)]:
            data = house_data[key][['Time',target_appliance]]
            print(data)
            list_of_house_numbers.append(key)
            list_of_appliance_data.append(data)

In [None]:
for target_appliance in dataset_config['APPLIANCES']:
    print(target_appliance)
    for house_data in collective_dataset:
        for key in house_data.keys():
            print(house_data[key].loc[target_appliance])

In [None]:

house_numbers = []
house_data = []
for target_appliance in dataset_config['APPLIANCES']:
    print(target_appliance)
    for house_number in dataset_config['HOUSES']:
        print(house_number)
        if target_appliance not in keys_of_appliances[str(house_number)]:
            print(-1)
        else:
            for idx, value in enumerate(keys_of_appliances[str(house_number)]):
                if value == target_appliance:
                    set_appliance = f"Appliance{idx}"
                    print(set_appliance)
                    itere=house_number-1
                    print(collective_dataset[itere][house_number][f"Appliance{idx}"])
                    
                    house_numbers.append(house_number)
                    house_data.append(collective_dataset[house_number-1][house_number][f"Appliance{idx}"])
                    

In [None]:
keys_of_appliances[str(5)]['appliances']

In [None]:
if "Kettle" in keys_of_appliances[str(2)]['appliances']:
    print(1)

In [None]:
for hh in [1,2,3,4,5,6,7,8,9,10]:
    print(f"House {hh}")
    if 'Kettle' in keys_of_appliances[str(hh)]['appliances'] :
        print(keys_of_appliances[str(hh)]['appliances'].index('Kettle'))
    else:
        print(-1)

In [None]:
for hh in [1,2,3,4,5,6,7,8,9,10]:
    keys_mapping = [idx if value=='Kettle' else -1 for idx, value in enumerate(keys_of_appliances[str(hh)]['appliances'])]
keys_mapping

fruits = ["idx" if value=='Kettle' else "None" for idx, value in enumerate(keys_of_appliances[str(hh)]['appliances']) ]
print(fruits)

In [None]:
# mapping_keys = [key for house_dict in collective_dataset for key in house_dict.keys()]
# mapping_keys
mapping_keys = []

for house_dict in collective_dataset:
    for key in house_dict.keys():
        print(key)
        print(house_dict[key])
        print(keys_of_appliances[str(key)])
        for i in range(1,10):
            house_dict[key].rename({f'Appliance{i}': keys_of_appliances[str(house)]['appliances'][i]}, axis=1, inplace=True)
#     mapping_key = house_dict.keys()

# mapping_keys

In [None]:
keys_of_appliances['3']

In [None]:




for house in range(0, len(collective_dataset)):
    print(house)
#     print(d)
    for i in range(1, len(keys_of_appliances[str(house+1)]['appliances'])):
        collective_dataset[house].rename({f'Appliance{i}': nested_dict[str(house+1)]['appliances'][i]}, axis=1, inplace=True)

In [None]:
for house_dict in collective_dataset:
    print(type(house_dict))
#     print(house_dict)
#     collective_dataset
    for key in house_dict.keys:
        print(key)


In [None]:
%%time
d1 = collective_dataset[0].compute().set_index('Time')
d3 = collective_dataset[1].compute().set_index('Time')
d4 = collective_dataset[2].compute().set_index('Time')

# d1 = collective_dataset[0].compute()
# d3 = collective_dataset[1].compute()
# d4 = collective_dataset[2].compute()

In [None]:
collective_dataset[0].keys

In [None]:
%%time
ob._load_file(csv_file_path=dataset_config['REFIT_DATA_PATH'], index_column_name='Time')

In [None]:


files = glob.glob(dataset_config['REFIT_DATA_FOLDER'] + '*' + dataset_config['REFIT_DATA_TYPE'])
print(files)
# data = [pd.read_csv(file, index_col='Time') for file in files]
# data
files[0]

In [None]:
%%time



In [None]:
data

In [None]:
%%time
data = pd.read_csv(files[0], index_col='Time')

In [None]:

import csv


In [None]:
files

In [None]:
%%time
data1 = dask.dataframe.read_csv(files[0])
data2 = dask.dataframe.read_csv(files[1])
data3 = dask.dataframe.read_csv(files[2])
data4 = dask.dataframe.read_csv(files[3])

df1=data1.compute()
df2=data2.compute()
df3=data3.compute()
df4=data4.compute()

In [None]:
df1

In [None]:
df

In [None]:
import logging
logging.basicConfig(level=logging.INFO, file='d://sample.log')

def hypotenuse(a, b):
    """Compute the hypotenuse"""
    return (a**2 + b**2)**0.5

kwargs = {'a':3, 'b':4, 'c':hypotenuse(3, 4)}

logging.debug("a = {a}, b = {b}".format(**kwargs))
logging.info("Hypotenuse of {a}, {b} is {c}".format(**kwargs))
logging.warning("a={a} and b={b} are equal".format(**kwargs))
logging.error("a={a} and b={b} cannot be negative".format(**kwargs))
logging.critical("Hypotenuse of {a}, {b} is {c}".format(**kwargs))

In [None]:
import parser
print("Program to demonstrate parser module in Python")
print("\n")
exp = "5 + 8"
print("The given expression for parsing is as follows:")
print(exp)
print("\n")
print("Parsing of given expression results as: ")
st = parser.expr(exp)
print(st)

In [None]:
import pickle

nested_dict  = pickle.load( open("file.pkl", "rb") )
d = [d1, d3]

In [None]:
# nested_dict

In [None]:
d[0]