In [1]:
# from load_csv import CSV_Loader
from configs.configuration import general_config, dataset_config
import pandas as pd
import glob
import dask.dataframe as dd
import pickle

In [2]:

class _Loader:
    """
    Interface that loads all the data into the memory
    """

    def __init__(self):
        try:
            pass
            
        except Exception as e:
            display("Error occured in initialization of _Loader interface due to ", e)
                
        finally:
            display("Loader Interface initialized")
        
    @staticmethod
    def _load_file():
        raise NotImplementedError    


class CSV_Loader(_Loader):
    """
    
    """
    def __init__(self):
        try:
            super().__init__()
        
        except Exception as e:
            display("Error occured in initialization of CSV_Loader class due to ", e)
                
        finally:
            display("CSV_Loader initialized")
        
    @staticmethod
    def _load_file(csv_file_path,
                   index_column_name=None,
                   _nrows=None,
                   _iterator=True,
                   _chunksize=100000):
        try:
            tp = pd.read_csv(csv_file_path, nrows=_nrows, index_col=index_column_name, iterator=_iterator, chunksize=_chunksize) ## loading data in chunks reduces 90 percent execution time 
            df = pd.concat(tp, ignore_index=False)
            df.info(verbose=False, memory_usage="deep")
            return df  
        
        except Exception as e:
            display("Error occured in _load_file method of CSV_Loader class due to ", e)
    
    @staticmethod
    def _load_file_via_dask(csv_file_path,
                            fetch_houses):
        try:
            display(f"Loading specified houses: {fetch_houses}")
            return [{i: dd.read_csv(f'{csv_file_path}House_{i}.csv')} for i in fetch_houses]
        
        except Exception as e:
            display("Error occured in _load_file_via_dask method of CSV_Loader class due to ", e)
            
            
    ###### appliance wise Dict Reader


In [3]:
def parser(readme_file):
    """
    
    """
    try:
        display(f'Loading the readme files specified: {readme_file}')
        with open(readme_file) as f:
            content = f.readlines()
        ls = {}
        for i, s in enumerate(content):
            if 'House' in s.capitalize():
                keys, appliances = [], []
                house = s.split()[1]
                for indx in range(1, 6):
                    if content[i+indx] == '\t!NOTES\n':
                        break
                    else:
                        target = [value.split('.') for value in [value for value in content[i+indx].split(',') if value != '\n']]
                        indx = [target_value[0] for target_value in target]
                        values = [target_value[1] for target_value in target]
                        keys.append([target_value[0] for target_value in target])
                        appliances.append([target_value[1] for target_value in target])
                ls.update({house: {'appliances':  [item.split('\n')[0] for sublist in appliances for item in sublist], 'keys': [item for sublist in keys for item in sublist]}})
        return ls
        
    
    except Exception as e:
        display("Error occured in parser method due to ", e)

In [4]:
ob = CSV_Loader()

'Loader Interface initialized'

'CSV_Loader initialized'

In [5]:
%%time
collective_dataset = ob._load_file_via_dask(csv_file_path=general_config['DATA_FOLDER'], fetch_houses=dataset_config['HOUSES'])

'Loading specified houses: [1, 3]'

Wall time: 71.9 ms


In [6]:
collective_dataset

[{1: Dask DataFrame Structure:
                   Time   Unix Aggregate Appliance1 Appliance2 Appliance3 Appliance4 Appliance5 Appliance6 Appliance7 Appliance8 Appliance9
  npartitions=7                                                                                                                            
                 object  int64     int64      int64      int64      int64      int64      int64      int64      int64      int64      int64
                    ...    ...       ...        ...        ...        ...        ...        ...        ...        ...        ...        ...
  ...               ...    ...       ...        ...        ...        ...        ...        ...        ...        ...        ...        ...
                    ...    ...       ...        ...        ...        ...        ...        ...        ...        ...        ...        ...
                    ...    ...       ...        ...        ...        ...        ...        ...        ...        ...        ... 

In [7]:
keys_of_appliances = parser(general_config['README_FILE'])

'Loading the readme files specified: data/refit/REFIT_Readme.txt'

In [28]:
# mapping_keys = [key for house_dict in collective_dataset for key in house_dict.keys()]
# mapping_keys
mapping_keys = []

for house_dict in collective_dataset:
    for key in house_dict.keys():
        print(key)
        print(house_dict[key])
        print(keys_of_appliances[str(key)])
#     mapping_key = house_dict.keys()

# mapping_keys

1
Dask DataFrame Structure:
                 Time   Unix Aggregate Appliance1 Appliance2 Appliance3 Appliance4 Appliance5 Appliance6 Appliance7 Appliance8 Appliance9
npartitions=7                                                                                                                            
               object  int64     int64      int64      int64      int64      int64      int64      int64      int64      int64      int64
                  ...    ...       ...        ...        ...        ...        ...        ...        ...        ...        ...        ...
...               ...    ...       ...        ...        ...        ...        ...        ...        ...        ...        ...        ...
                  ...    ...       ...        ...        ...        ...        ...        ...        ...        ...        ...        ...
                  ...    ...       ...        ...        ...        ...        ...        ...        ...        ...        ...        ...
Dask N

In [None]:
keys_of_appliances['3']

In [None]:




for house in range(0, len(collective_dataset)):
    print(house)
#     print(d)
    for i in range(1, len(keys_of_appliances[str(house+1)]['appliances'])):
        collective_dataset[house].rename({f'Appliance{i}': nested_dict[str(house+1)]['appliances'][i]}, axis=1, inplace=True)

In [None]:
for house_dict in collective_dataset:
    print(type(house_dict))
#     print(house_dict)
#     collective_dataset
    for key in house_dict.keys:
        print(key)


In [None]:
%%time
d1 = collective_dataset[0].compute().set_index('Time')
d3 = collective_dataset[1].compute().set_index('Time')
d4 = collective_dataset[2].compute().set_index('Time')

# d1 = collective_dataset[0].compute()
# d3 = collective_dataset[1].compute()
# d4 = collective_dataset[2].compute()

In [None]:
collective_dataset[0].keys

In [None]:
%%time
ob._load_file(csv_file_path=dataset_config['REFIT_DATA_PATH'], index_column_name='Time')

In [None]:


files = glob.glob(dataset_config['REFIT_DATA_FOLDER'] + '*' + dataset_config['REFIT_DATA_TYPE'])
print(files)
# data = [pd.read_csv(file, index_col='Time') for file in files]
# data
files[0]

In [None]:
%%time



In [None]:
data

In [None]:
%%time
data = pd.read_csv(files[0], index_col='Time')

In [None]:

import csv


In [None]:
files

In [None]:
%%time
data1 = dask.dataframe.read_csv(files[0])
data2 = dask.dataframe.read_csv(files[1])
data3 = dask.dataframe.read_csv(files[2])
data4 = dask.dataframe.read_csv(files[3])

df1=data1.compute()
df2=data2.compute()
df3=data3.compute()
df4=data4.compute()

In [None]:
df1

In [None]:
df

In [None]:
import logging
logging.basicConfig(level=logging.INFO, file='d://sample.log')

def hypotenuse(a, b):
    """Compute the hypotenuse"""
    return (a**2 + b**2)**0.5

kwargs = {'a':3, 'b':4, 'c':hypotenuse(3, 4)}

logging.debug("a = {a}, b = {b}".format(**kwargs))
logging.info("Hypotenuse of {a}, {b} is {c}".format(**kwargs))
logging.warning("a={a} and b={b} are equal".format(**kwargs))
logging.error("a={a} and b={b} cannot be negative".format(**kwargs))
logging.critical("Hypotenuse of {a}, {b} is {c}".format(**kwargs))

In [None]:
import parser
print("Program to demonstrate parser module in Python")
print("\n")
exp = "5 + 8"
print("The given expression for parsing is as follows:")
print(exp)
print("\n")
print("Parsing of given expression results as: ")
st = parser.expr(exp)
print(st)

In [None]:
import pickle

nested_dict  = pickle.load( open("file.pkl", "rb") )
d = [d1, d3]

In [None]:
# nested_dict

In [None]:
d[0]