# Introduction

Below various steps of data processing are implemented. 
The starting point is the excel outputs of the tool that has already been developed in Matlab.

### Importing Required Libraries

In [34]:
import numpy as np, pandas as pd
from os import listdir
from os.path import join
import matplotlib, datetime
import matplotlib.pyplot as plt

plt.style.use('ggplot')
%matplotlib inline

from ipywidgets import widgets, interact, interact_manual, interactive, Layout
import ipywidgets as widgets
from tkinter import *
from tkinter.filedialog import askopenfilename
import xlrd
import cufflinks as cf, plotly.plotly as py

### Inputs File Selection

_Reminder: Pls select the output from the .m executable. Note: Check for open windows!!_

In [2]:
# Get file name
Tk().withdraw()
filename = askopenfilename()

# Read file
Df = pd.read_excel(filename)

In [5]:
df = Df.copy()

# Data Processing

### Remove Un-used Columns

Please select string contained in columns to be removed.

In [6]:
text = widgets.Text()

@interact
def select_text(String='write text, e.g. off'):
    text.value = str(String)

interactive(children=(Text(value='write text, e.g. off', description='String'), Output()), _dom_classes=('widg…

In [7]:
# Drop columns including string
s = text.value
df = df.loc[:,~df.columns.str.contains(s, case=False)]

In [8]:
# Drop columns including "." --> Additional Time Columns
df = df.loc[:,~df.columns.str.contains('.', regex=False)]

In [9]:
# Format time & drop un-used time columns
df['Time'] = df['Time *10^6'] * 10**(-6)
def read_date(date):
    return xlrd.xldate.xldate_as_datetime(date, 0)
df['Time'] = pd.to_datetime(df['Time'].apply(read_date), errors='coerce')

df['datetime'] = pd.to_datetime(df['Time'])
df = df.set_index('datetime')
df.drop(['Time *10^6'], axis=1, inplace=True)

df['day'], df['time'] = df.Time.dt.date, df.Time.dt.time
df.drop(['Time'], axis=1, inplace=True)

In [10]:
df.time = df.time.apply(lambda x: x.replace(microsecond=0))

In [11]:
D = df.copy()

### Remove Data from Other Days

Print available dates & ask user to select one.

In [12]:
date = widgets.RadioButtons(
            options=list(D['day'].unique()),
            description='Select date:')
display(date)

RadioButtons(description='Select date:', options=(datetime.date(2019, 6, 20), datetime.date(2019, 6, 21), date…

In [13]:
# Drop raws including other dates
d = date.value
D = D[D['day'] == d]

In [14]:
ddd = D.copy()

### Clean Outliers

In [15]:
### Clean Outliers
#
def ident_outliers(D, c):
    m = D[c].mean(); s = D[c].std()
    return np.abs(D[c] - m) / s > 3

def clean_outliers(D, c):
    D.iloc[ident_outliers(D, c), c] = np.nan
    return D[c]

In [16]:
for c in ddd.columns:
    try:
        clean_outliers(ddd, c)
    except:
        pass

### Add Air Flow & Inverter Data

Air Flow Data.

Scecify number of entries / changes in LPM.

In [17]:
e = widgets.IntText(description='Entries:')
display(e)

IntText(value=0, description='Entries:')

Specify starting LPM.

In [18]:
starting_lpm = widgets.BoundedFloatText(value=750.0, min=0, max=1000.0, step=10.0,
                                      description='Starting LPM:')
display(starting_lpm)

BoundedFloatText(value=750.0, description='Starting LPM:', max=1000.0, step=10.0)

In [19]:
dc = {}
def f(t, v, n):
    if n:
        dc[t] = v

for i in range(e.value):

    t = widgets.Text(value='00:00:00', description='Time:')
    v = widgets.FloatText(value=starting_lpm.value, description='LPM:')
    n = widgets.ToggleButton(value=False, description='Add')
    UI = widgets.HBox([t, v, n])

    out = widgets.interactive_output(f, {'t': t, 'v': v, 'n': n})

    display(UI, out)

HBox(children=(Text(value='00:00:00', description='Time:'), FloatText(value=750.0, description='LPM:'), Toggle…

Output()

In [20]:
ddd['aLPM'] = starting_lpm.value

def f(t, v):
    D = date.value; T = datetime.datetime.strptime(t, '%H:%M:%S').time()
    y, mo, d, h, mi, s = D.year, D.month, D.day, T.hour, T.minute, T.second
    
    ddd.loc["%s-%s-%s %s:%s:%s"%(y, mo, d, h, mi, s):, "aLPM"] = v

for k, v in dc.items():
    f(k, v)

Inverter Data.

Scecify number of entries / changes in inverter.

In [21]:
ei = widgets.IntText(description='Entries:')
display(ei)

IntText(value=0, description='Entries:')

Specify starting value.

In [22]:
starting_hz = widgets.BoundedFloatText(value=37.5, min=0, max=50.0, step=.5,
                                      description='Starting Hz:')
display(starting_hz)

BoundedFloatText(value=37.5, description='Starting Hz:', max=50.0, step=0.5)

In [23]:
dci = {}
def f(t, v, n):
    if n:
        dci[t] = v

for i in range(ei.value):

    t = widgets.Text(value='00:00:00', description='Time:')
    v = widgets.FloatText(value=starting_hz.value, description='Hz:')
    n = widgets.ToggleButton(value=False, description='Add')
    UI = widgets.HBox([t, v, n])

    out = widgets.interactive_output(f, {'t': t, 'v': v, 'n': n})

    display(UI, out)

In [24]:
ddd['iHz'] = starting_hz.value

def f(t, v):
    D = date.value; T = datetime.datetime.strptime(t, '%H:%M:%S').time()
    y, mo, d, h, mi, s = D.year, D.month, D.day, T.hour, T.minute, T.second
    
    ddd.loc["%s-%s-%s %s:%s:%s"%(y, mo, d, h, mi, s):, "iHz"] = v

for k, v in dci.items():
    f(k, v)

In [25]:
ddd.head()

Unnamed: 0_level_0,PDRT-06,PDRT-09A,PRT-01,PRT-16,PDRT-09B,PDRT-10,PDRT-13,TE-09A,TE-09B,TE-32,...,TE-09D,TE-09E,TE-13B,TE-14,TE-16,TE-17B,day,time,aLPM,iHz
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-06-20 10:40:57.833,2.490234,0.029839,837.44213,-9.331597,0.157335,-0.243164,88.886176,22.6,23.4,,...,24.7,25.7,23.6,23.3,23.0,26.4,2019-06-20,10:40:57,750.0,37.5
2019-06-20 10:40:58.829,2.381727,0.032552,836.400463,-9.548611,0.165473,-0.242714,88.886176,23.0,23.4,,...,24.6,25.7,23.6,23.3,23.0,26.4,2019-06-20,10:40:58,750.0,37.5
2019-06-20 10:40:59.828,2.362739,0.035265,835.9375,-8.680556,0.16276,-0.243164,88.886176,23.0,23.4,,...,24.6,25.7,23.6,23.3,23.0,26.4,2019-06-20,10:40:59,750.0,37.5
2019-06-20 10:41:00.828,2.327474,0.035265,838.136574,-8.680556,0.157335,-0.245416,88.886176,23.0,23.4,,...,24.6,25.7,23.5,23.3,23.0,26.4,2019-06-20,10:41:00,650.0,37.5
2019-06-20 10:41:01.828,2.27322,0.054253,838.078704,-8.680556,0.143772,-0.246316,88.886176,23.0,23.4,,...,24.6,25.6,23.5,23.3,23.0,26.4,2019-06-20,10:41:01,650.0,37.5


# Plotting & Visualizing

In [28]:
c = widgets.SelectMultiple(options=list(ddd.columns), value=[list(ddd.columns)[0], list(ddd.columns)[1]],
        rows=6, description='Columns:', layout=Layout(width='80%'))
t = widgets.SelectionRangeSlider(options=ddd['time'], index=(0, len(ddd['time'])-1), description='X-Axis:', 
        layout=Layout(width='80%', height='80px'))

UI = widgets.VBox([c, t])

In [29]:
def f(cols, t):
    ts = t[0]; te = t[1]
    ax = ddd.plot(x='time', y=list(cols), style=".", figsize=(14,6))
    ax.set_xlim(ts, te)
    ax.plot()
    
out = widgets.interactive_output(f, {'cols': c, 't': t})

display(UI, out)

VBox(children=(SelectMultiple(description='Columns:', index=(0, 1), layout=Layout(width='80%'), options=('PDRT…

Output()

# Save File

Specify directory.

In [56]:
dir_name = filedialog.askdirectory()

Specify file name.

In [61]:
default_v = '%s-%s-%s out.xlsx'%(date.value.year, date.value.month, date.value.day)
f_name = widgets.Text(value=default_v, description='File Name:')
display(f_name)

Text(value='2019-6-20 out.xlsx', description='File Name:')

In [74]:
button = widgets.Button(description="Save")
output = widgets.Output()

display(button, output)

def on_button_clicked(b):
    with output:
        ddd.to_excel(join(dir_name, f_name.value))
        print("File saved.")

button.on_click(on_button_clicked)

Button(description='Save', style=ButtonStyle())

Output()