# Practical Exercise: Data Preparation and Visualisation

In [None]:
import pandas as pd
import numpy as np
import h5py
import os, glob
import re
from datetime import datetime, date, time
from six import iteritems



In [None]:
class Blond(object):
    """
        class blond: attributes: date, list of files
    """
    _SD_centered = []
    _SD_calibrated = []
    
    def __init__(self, date, day_data = {}):
        self.date = date
        self._day_data = day_data

        
    def list_files(self):
        return self._day_data
    
    
    def read_files(self, start_hm, end_hm):
        """ read_files method scans the relevant folders and return a dictionary 
            with the files relevant to the timeframe (start_hm, end_hm)
                {'clear'  : [files], 
                 'medal-1': [files],
                 'medal-2': [files],
                    ...
                } 
        """    
    
        """READING CLEAR UNIT"""
        path_to_clear = './data/clear/'
        files_all = next(os.walk(path_to_clear))[2] 
        self._day_data['clear'] = []
        
        for file_name in files_all:            
            pattern = r'(\d{4}-\d{2}-\d{2}T\d{2}-\d{2}-\d{2})'
            regex_obj = re.search(pattern, file_name)
            
            if regex_obj is not None:
                ts_format = regex_obj.group(1)
                file_hm = datetime.strptime(ts_format, '%Y-%m-%dT%H-%M-%S').time()

                if start_hm <= file_hm <= end_hm:
                    self._day_data['clear'].append(h5py.File(path_to_clear + file_name,'r+'))
                    
                    
        """READING MEDAL UNITS"""
        path_to_medals = './data/medal*/'
        
        for folder in glob.glob(path_to_medals):            
            files_all = next(os.walk(folder))[2]  
            medal_name = re.search(r'(medal-\d+)', folder).group(1)
            self._day_data[medal_name] = []
            
            for file_name in files_all:            
                pattern = r'(\d{4}-\d{2}-\d{2}T\d{2}-\d{2}-\d{2})'
                regex_obj = re.search(pattern, file_name)
                
                if regex_obj is not None:
                    ts_format = regex_obj.group(1)
                    file_hm = datetime.strptime(ts_format, '%Y-%m-%dT%H-%M-%S').time()
                    
                    if start_hm <= file_hm <= end_hm:
                        self._day_data[medal_name].append(h5py.File(folder + file_name,'r+'))
                        

    def center(self, device, signal):  
        """_count is added to run centering and calibrating only once"""
        if device+signal in self._SD_centered:
            print("Signal '{}' for '{}' has been already calibrated.".format(signal, device))
            return
        else:
            self._SD_centered.append(device+signal)
            data_list = self._day_data[device]
            if device != 'clear': #NO OFFSET FOR CLEAR DEVICE
                for i, data_file in enumerate(data_list):
                    DC_offset = data_file[signal].attrs['removed_offset'] 
                    #print(DC_offset)
                    data_file[signal][:] = data_file[signal][:] + DC_offset
                    self._day_data[device][i] = data_file

            
    def calibrate(self, device, signal):
        if device+signal in self._SD_calibrated:
            print("Signal '{}' for '{}' has been already calibrated.".format(signal, device))
            return
        else:
            self._SD_calibrated.append(device+signal)
            data_list = self._day_data[device]
            for i, data_file in enumerate(data_list):
                factor = data_file[signal].attrs['calibration_factor']
                #print(factor)
                data_file[signal][:] = (data_file[signal][:] * factor)
                self._day_data[device][i] = data_file
            
            
            
    def it_read_signal(self, device, signal):
        """it_read_signal method """
        files = self._day_data[device]
        return map(lambda f: f[signal][:], files)      
            
    


## 1. Data Reading and Exploration

### Reading files

In [None]:
blond = Blond(date(2018,10,5))

""" Define a timeframe"""
start_hm = time(0,30) # start_hours_minutes
end_hm   = time(1,0)

"""Read MEDAL and CLEAR data """
blond.read_files(start_hm, end_hm)
data={}

"""Checking if files have been retrieved"""
blond.list_files()

### Exploration


In [None]:
"""signals acquisited by MEDAL"""
medal_file = blond.list_files()['medal-1'][0]
[key for key in medal_file.keys()]

In [None]:
"""signals acquisited by CLEAR"""
clear_file = blond.list_files()['clear'][0]
[key for key in clear_file.keys()]

### Centering and calibrating

In [None]:
device = 'medal-3'
signal = 'voltage'
blond.center(device, signal)
blond.calibrate(device, signal)
it_signal = blond.it_read_signal(device, signal)


"""Try to materialize map to numpy array - run this cell only once"""
data[device+'_'+signal] = np.concatenate(list(it_signal))
data