# Practical Exercise: Data Preparation and Visualisation

## Exercise I. Data Loading and Preparation

In [1]:
import pandas as pd
import numpy as np
import h5py
import os, glob
import re
from datetime import datetime, date, time
from six import iteritems
%load_ext autoreload
%autoreload 2

###  1. Data Reading and Exploration

### Initialize Class and Read files
In blond.py file, you can find the Blond class which handles data input, output and calibration. When the Blond class is initiated, it tries to acquire the data, whose interval is specified as an input. 

**Note that:** 
* It only create h5py objects which is not actually reading the data. 

* From this point on, using this Blond object one can not read any other time which is not in the interval of the times specified. 

In [32]:
from blond import Blond,get_time_diff
blond = Blond(date(2016,10,5),start_ts=time(0,50,0),end_ts=time(1,30,10))


In [33]:
"""Checking if files have been retrieved"""
blond.list_files()

{'clear': [<HDF5 file "clear-2016-10-05T00-46-53.467073T+0200-0001315.hdf5" (mode r+)>,
  <HDF5 file "clear-2016-10-05T00-51-53.752681T+0200-0001316.hdf5" (mode r+)>,
  <HDF5 file "clear-2016-10-05T00-56-54.039123T+0200-0001317.hdf5" (mode r+)>,
  <HDF5 file "clear-2016-10-05T01-01-54.325533T+0200-0001318.hdf5" (mode r+)>,
  <HDF5 file "clear-2016-10-05T01-06-54.611818T+0200-0001319.hdf5" (mode r+)>,
  <HDF5 file "clear-2016-10-05T01-11-54.897536T+0200-0001320.hdf5" (mode r+)>,
  <HDF5 file "clear-2016-10-05T01-16-55.183843T+0200-0001321.hdf5" (mode r+)>,
  <HDF5 file "clear-2016-10-05T01-21-55.470225T+0200-0001322.hdf5" (mode r+)>,
  <HDF5 file "clear-2016-10-05T01-26-55.755842T+0200-0001323.hdf5" (mode r+)>],
 'medal-1': [<HDF5 file "medal-1-2016-10-05T00-47-13.877516T+0200-0000439.hdf5" (mode r+)>,
  <HDF5 file "medal-1-2016-10-05T01-02-13.887807T+0200-0000440.hdf5" (mode r+)>,
  <HDF5 file "medal-1-2016-10-05T01-17-13.903732T+0200-0000441.hdf5" (mode r+)>],
 'medal-2': [<HDF5 file 

### Example Data Read

In order to read a data interval, one needs to specify **device**("clear", "medal-1"...), **signal**("current1", "current2"...), start and end times. The time parameters here should be included in the time parameters of the Blond object itself.  

Note that, read_data returns data from the beginning of **start_ts** until the end of **end_ts** inclusively.

In [40]:
blond.read_data( device="medal-1",signal="current1",start_ts=time(0,50,0), end_ts=time(1,30,1))

array([ -9, -18, -11, ..., -17, -16, -16])

### Exploration


In [7]:
"""signals acquisited by MEDAL"""
medal_file = blond.list_files()['medal-1'][0]
[key for key in medal_file.keys()]

['current1',
 'current2',
 'current3',
 'current4',
 'current5',
 'current6',
 'voltage']

In [8]:
"""signals acquisited by CLEAR"""
clear_file = blond.list_files()['clear'][0]
[key for key in clear_file.keys()]

['current1', 'current2', 'current3', 'voltage1', 'voltage2', 'voltage3']

### Centering and calibrating

In [9]:
device = 'medal-2'
signal = 'current1'

"""Raw signal with offset and calibration factor attributes"""
dict_signal = blond.dict_read_signal(device, signal)
print(dict_signal)


"""calibrated signal"""
blond.center_and_calibrate(dict_signal)

{'attributes': [{'calibration_factor': 0.015151515, 'values': array([-5, -3, -5, ..., -6, -1, -6], dtype=int16), 'DC_offset': 2500}, {'calibration_factor': 0.015151515, 'values': array([ -6,  -7,  -7, ...,  -1, -11,  -1], dtype=int16), 'DC_offset': 2500}], 'signal': 'medal-2_current1'}


{'medal-2_current1': array([37.78788 , 37.772728, 37.772728, ..., 37.863636, 37.71212 ,
        37.863636], dtype=float32)}

## Exercise III. Feature Extraction

In order to run the Dash-Plotly application, you need to run **python app.py**. After the server runs, you can connect to the server and use the application.

## Exercise IV. Visualization

In [None]:
import matplotlib.pyplot as plt

sps = 6400
phase = 1
requested_time = time(1,10,10)
file_index = blond.find_corresponding_file(requested_time, blond.time_stamps['clear'])
time_diff = get_time_diff(requested_time , blond.time_stamps['clear'][file_index])
data_index_shift = time_diff * sps
temp_data = blond.list_files()['clear'][file_index]["current"+str(phase)][data_index_shift:data_index_shift + int(sps*0.1)]

curr_rms = []
for curr in temp_data:
    curr_rms.append(np.sqrt(np.mean(curr**2)))
    
fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(18, 5))

plot1 = ax[0]
plot1.plot(temp_data, color='b', label='Current')
plot1.plot(curr_rms, color='r', label='Period RMS of Current')
plot1.set_title('Current of AC Electrolux')
plot1.set_xlabel('Time [s]')
plot1.set_ylabel('Current [A]')
plot1.legend()

plot2 = ax[1]
plot2.plot(temp_data, color='b', label='Current')
plot2.plot(curr_rms, color='r', label='Period RMS of Current')
plot2.set_title('Waveform Comparison of AC Electrolux')
plot2.set_xlabel('Time [ms]')
plot2.legend()

plot3 = ax[2]
plot3.plot(temp_data, color='b', label='Current')
plot3.plot(curr_rms, color='r', label='Period RMS of Current')
plot3.set_title('Power over Time of AC Electrolux')
plot3.set_xlabel('Time [s]')
plot3.set_ylabel('Power [W]')
plot3.legend()

plt.show()