# Data preparation

This notebook has the goal of reading the data in the original format and put it in a more reasonable format (pandas). So that we could actually work with them.

Version 0.1 - 2.6.2019

In [92]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re
from os import walk

## Functions

In [16]:
def read_data_file(name):
    d = {}
    with open(name, encoding="iso8859-1") as f:
        for line in f:
            if ("=" in line):
                (key, val) = line.split('=')
                d[key] = val
    return d

In [29]:
def print_dict(d):
    for key, val in d.items():
        print (key)
        print (val)
        print ("----")

In [86]:
def convert_dictionary(d):
    d_ = {'T': -1, 'O2': -1, 'iter':-1, 'freq':[0], 'data': [0]}
    d_['T'] = int(d['Temperature (°C)'].rstrip())
    d_['O2'] = int(d['Concentration O2Air (%)'].rstrip())
    d_['iter'] = int(d['Iterations'].rstrip())
    d_['freq'] = np.array(re.sub(r'\<.*\>', '', d['Frequency (Hz)']).rstrip().split(","))
    d_['data'] = np.array(re.sub(r'\<.*\>', '', d['Phi Comp (°)']).rstrip().split(",")).reshape(d_['iter'], d_['freq'].shape[0])
    
    d_['freq'] = [d_['freq']]
    d_['data'] = [d_['data']]
    
    p = pd.DataFrame.from_dict(d_)
    return p

In [106]:
def generate_dataframe_from_directory(path):
    df = pd.DataFrame()

    for (dirpath, dirnames, filenames) in walk(path):
        for filename in filenames:
            # Read the files
            if (filename.lower().endswith('txt') and filename.lower().startswith('temp')):
                #print('Reading '+filename)
                d = read_data_file("../data/"+filename)
                p = convert_dictionary(d)
                df = df.append(p)
                
    return df

## Study and test data format

In [88]:
d = read_data_file("../data/Reference.txt")
print_dict(d)

MGI RWA Section Options
2.0.1 %04Y%02m%02d %02H%02M%S%25u*~|.%d*~|.,*~|.%#_13g

----
File
C:\Projekte\LuminescenceAI\RealProbe5mm\Data\Temp_25deg_Conc_100perc.txt

----
Date, Time
26.04.2019, 12:52

----
Temperature (°C)
25

----
Concentration O2Air (%)
100

----
Iterations
1

----
Frequency (Hz)
<50>200,300,400,500,600,700,800,900,1000,1250,1500,1750,2000,2250,2500,2750,3000,3250,3500,3750,4000,4250,4500,4750,5000,5250,5500,5750,6000,6250,6500,6750,7000,7250,7500,7750,8000,8250,8500,8750,9000,9250,9500,9750,10000,11000,12000,13000,14000,15000

----
Phi Comp (°)
<50>-0.354,-0.57,-0.786,-0.99,-1.194,-1.404,-1.602,-1.8,-1.998,-2.502,-3,-3.51,-4.002,-4.506,-5.016,-5.508,-6.012,-6.516,-7.014,-7.512,-8.004,-8.502,-9,-9.492,-9.984,-10.482,-10.974,-11.466,-11.952,-12.444,-12.93,-13.416,-13.902,-14.388,-14.874,-15.36,-15.84,-16.32,-16.8,-17.274,-17.754,-18.234,-18.702,-19.176,-19.644,-21.522,-23.37,-25.2,-26.994,-28.776

----
R (V)
<50>0.298922,0.298952,0.298983,0.298922,0.298861,0.29883

In [89]:
d = read_data_file("../data/Temp_5deg_Conc_0perc.txt")
convert_dictionary(d)

Unnamed: 0,T,O2,iter,freq,data
0,5,0,20,"[200, 300, 400, 500, 600, 700, 800, 900, 1000,...","[[-4.83, -7.23, -9.594, -11.922, -14.202, -16...."


# Loop over files in the directory

In [107]:
df = generate_dataframe_from_directory('../data/')

In [108]:
df

Unnamed: 0,T,O2,iter,freq,data
0,20,35,20,"[200, 300, 400, 500, 600, 700, 800, 900, 1000,...","[[-2.424, -3.642, -4.83, -6.036, -7.236, -8.4,..."
0,20,25,20,"[200, 300, 400, 500, 600, 700, 800, 900, 1000,...","[[-2.706, -4.05, -5.388, -6.726, -8.034, -9.33..."
0,25,10,20,"[200, 300, 400, 500, 600, 700, 800, 900, 1000,...","[[-3.288, -4.926, -6.552, -8.166, -9.762, -11...."
0,10,95,20,"[200, 300, 400, 500, 600, 700, 800, 900, 1000,...","[[-1.776, -2.682, -3.564, -4.452, -5.334, -6.1..."
0,10,85,20,"[200, 300, 400, 500, 600, 700, 800, 900, 1000,...","[[-1.86, -2.808, -3.732, -4.662, -5.58, -6.48,..."
0,30,25,20,"[200, 300, 400, 500, 600, 700, 800, 900, 1000,...","[[-2.526, -3.792, -5.046, -6.276, -7.518, -8.7..."
0,30,35,20,"[200, 300, 400, 500, 600, 700, 800, 900, 1000,...","[[-2.268, -3.396, -4.524, -5.646, -6.75, -7.84..."
0,35,10,20,"[200, 300, 400, 500, 600, 700, 800, 900, 1000,...","[[-3.096, -4.644, -6.174, -7.686, -9.186, -10...."
0,15,100,20,"[200, 300, 400, 500, 600, 700, 800, 900, 1000,...","[[-1.686, -2.526, -3.36, -4.194, -5.016, -5.84..."
0,40,70,20,"[200, 300, 400, 500, 600, 700, 800, 900, 1000,...","[[-1.626, -2.454, -3.264, -4.074, -4.884, -5.6..."
