# Practical Exercise: Data Preparation and Visualisation

## Part I. Data Loading and Preparation

In [None]:
import pandas as pd
import numpy as np
import h5py
import os, glob
import re
from datetime import datetime, date, time
from six import iteritems



In [None]:
class Blond(object):
    """
        class blond: attributes: date, list of files
    """
    _SD_centered = []
    _SD_calibrated = []
    
    def __init__(self, date, day_data = {}):
        self.date = date
        self._day_data = day_data

        
    def list_files(self):
        return self._day_data
    
    
    def read_files(self, start_hm, end_hm):
        """ read_files method scans the relevant folders and return a dictionary 
            with the files relevant to the timeframe (start_hm, end_hm)
                {'clear'  : [files], 
                 'medal-1': [files],
                 'medal-2': [files],
                    ...
                } 
        """    
    
        """READING CLEAR UNIT"""
        path_to_clear = './data/clear/'
        files_all = next(os.walk(path_to_clear))[2] 
        self._day_data['clear'] = []
        
        for file_name in files_all:            
            pattern = r'(\d{4}-\d{2}-\d{2}T\d{2}-\d{2}-\d{2})'
            regex_obj = re.search(pattern, file_name)
            
            if regex_obj is not None:
                ts_format = regex_obj.group(1)
                file_hm = datetime.strptime(ts_format, '%Y-%m-%dT%H-%M-%S').time()

                if start_hm <= file_hm <= end_hm:
                    self._day_data['clear'].append(h5py.File(path_to_clear + file_name,'r+'))
                    
                    
        """READING MEDAL UNITS"""
        path_to_medals = './data/medal*/'
        
        for folder in glob.glob(path_to_medals):            
            files_all = next(os.walk(folder))[2]  
            medal_name = re.search(r'(medal-\d+)', folder).group(1)
            self._day_data[medal_name] = []
            
            for file_name in files_all:            
                pattern = r'(\d{4}-\d{2}-\d{2}T\d{2}-\d{2}-\d{2})'
                regex_obj = re.search(pattern, file_name)
                
                if regex_obj is not None:
                    ts_format = regex_obj.group(1)
                    file_hm = datetime.strptime(ts_format, '%Y-%m-%dT%H-%M-%S').time()
                    
                    if start_hm <= file_hm <= end_hm:
                        self._day_data[medal_name].append(h5py.File(folder + file_name,'r+'))
                        

    def center(self, device, signal):  
        if device+signal in self._SD_centered:
            print("Signal '{}' for '{}' has been already centered.".format(signal, device))
            return
        else:
            self._SD_centered.append(device+signal)
            data_list = self._day_data[device]
            if device != 'clear': #NO OFFSET FOR CLEAR DEVICE
                for i, data_file in enumerate(data_list):
                    DC_offset = data_file[signal].attrs['removed_offset'] 
                    print(DC_offset)
                    data_file[signal][:] = data_file[signal][:] + DC_offset
                    self._day_data[device][i] = data_file

            
    def calibrate(self, device, signal):
        if device+signal in self._SD_calibrated:
            print("Signal '{}' for '{}' has been already calibrated.".format(signal, device))
            return
        else:
            self._SD_calibrated.append(device+signal)
            data_list = self._day_data[device]
            for i, data_file in enumerate(data_list):
                factor = data_file[signal].attrs['calibration_factor']
                print(factor)
                data_file[signal][:] = (data_file[signal][:] * factor)
                self._day_data[device][i] = data_file.astype()
            
            
            
    def it_read_signal(self, device, signal):
        """it_read_signal method """
        files = self._day_data[device]
        return map(lambda f: f[signal][:], files)      
            
    


###  1. Data Reading and Exploration

### Reading files

In [None]:
blond = Blond(date(2018,10,5))

""" Define a timeframe"""
start_hm = time(0,30) # start_hours_minutes
end_hm   = time(1,0)

"""Read MEDAL and CLEAR data """
blond.read_files(start_hm, end_hm)
data={}

"""Checking if files have been retrieved"""
blond.list_files()

### Exploration


In [None]:
"""signals acquisited by MEDAL"""
medal_file = blond.list_files()['medal-1'][0]
[key for key in medal_file.keys()]

In [None]:
"""signals acquisited by CLEAR"""
clear_file = blond.list_files()['clear'][0]
[key for key in clear_file.keys()]

### Centering and calibrating

In [None]:
device = 'medal-1'
signal = 'current1'
blond.center(device, signal)
blond.calibrate(device, signal)
it_signal = blond.it_read_signal(device, signal)


"""Try to materialize map to numpy array - run this cell only once"""
data[device+'_'+signal] = np.concatenate(list(it_signal))
data

In [None]:
data['medal-2_current1'].shape

## Part II. Data Visualisation

In [None]:
"""
Function to run dash app in jupyter:
https://community.plot.ly/t/can-i-run-dash-app-in-jupyter/5235
"""
from IPython import display
def show_app(app,  # type: dash.Dash
             port=9999,
             width=700,
             height=350,
             offline=True,
             style=True,
             **dash_flask_kwargs):
    """
    Run the application inside a Jupyter notebook and show an iframe with it
    :param app:
    :param port:
    :param width:
    :param height:
    :param offline:
    :return:
    """
    url = 'http://localhost:%d' % port
    iframe = '<iframe src="{url}" width={width} height={height}></iframe>'.format(url=url,
                                                                                  width=width,
                                                                                  height=height)
    display.display_html(iframe, raw=True)
    if offline:
        app.css.config.serve_locally = True
        app.scripts.config.serve_locally = True
    if style:
        external_css = ["https://fonts.googleapis.com/css?family=Raleway:400,300,600",
                        "https://maxcdn.bootstrapcdn.com/font-awesome/4.7.0/css/font-awesome.min.css",
                        "http://getbootstrap.com/dist/css/bootstrap.min.css", ]

        for css in external_css:
            app.css.append_css({"external_url": css})

        external_js = ["https://code.jquery.com/jquery-3.2.1.min.js",
                       "https://cdn.rawgit.com/plotly/dash-app-stylesheets/a3401de132a6d0b652ba11548736b1d1e80aa10d/dash-goldman-sachs-report-js.js",
                       "http://getbootstrap.com/dist/js/bootstrap.min.js"]

        for js in external_js:
            app.scripts.append_script({"external_url": js})

    return app.run_server(debug=False,  # needs to be false in Jupyter
                          port=port,
                          **dash_flask_kwargs)

In [None]:
import dash
import dash_core_components as dcc
import dash_html_components as html

app = dash.Dash()

app.layout = html.Div(children=[
    html.H1(children='Hello Dash'),

    html.Div(children='''
        Dash: A web application framework for Python.
    '''),

    dcc.Graph(
        id='example-graph',
        figure={
            'data': [
                {'x': [1, 2, 3], 'y': [4, 1, 2], 'type': 'line', 'name': 'SF'}
            ],
            'layout': {
                'title': 'Dash Data Visualization'
            }
        }
    )
])


show_app(app)