# Data exploration for joining all data sources

In this notebook we develop the functions for retrieving the tables, preparing them for the join, and then combining them based on the timestamp. 

## Imports

In [102]:
import pandas as pd
import numpy as np
import csv

## Sources
In the following section we construct the methods to read the different csv files and convert them to flat tables. 

In [103]:
location = "../data/"

### SSD2 Data
Data from the WE7000 DAQ for the SSD2

In [104]:
class SSD(): 
    """ Class for data engineering of the SSD2 data. """
    
    @classmethod
    def get_data(cls, filepath: str=location+"-20220314-100806-Slot1-In2.csv") -> pd.DataFrame: 
         return pd.read_csv(filepath_or_buffer=filepath, 
                                 skiprows=38, 
                                 header=0, 
                                 names=["TraceName", "Time", "PulseHeight"])
    @classmethod
    def get_metadata(cls, filepath: str=location+"-20220314-100806-Slot1-In2.csv"): 
        with open(filepath, newline='') as f:
            reader = csv.reader(f)
            all_data_metadata = list(reader)[:38]
            all_data_metadata =  all_data_metadata[:3] +  all_data_metadata[4:]
            columns = [line[0] for line in all_data_metadata]
            row = [line[1] for line in all_data_metadata]
            df = pd.DataFrame(data=[row], columns=columns)
            return df

    @classmethod
    def get_table(cls, filepath: str=location+"-20220314-100806-Slot1-In2.csv"): 
        data_df = cls.get_data(filepath)
        metadata_df = cls.get_metadata(filepath)
        return data_df.merge(metadata_df, how='cross')

ssd_data_df = SSD.get_data()
ssd_metadata_df = SSD.get_metadata()
ssd_df = SSD.get_table()

In [105]:
ssd_data_df

Unnamed: 0,TraceName,Time,PulseHeight
0,1,865236,2855
1,2,2486115,2516
2,3,2628986,2527
3,4,2650471,2456
4,5,3415610,2482
...,...,...,...
15256,15257,7133087349,2993
15257,15258,7133923984,2465
15258,15259,7134604904,3007
15259,15260,7135583418,2726


In [106]:
ssd_metadata_df

Unnamed: 0,Model,BlockNumber,BlockSize,VUnit,HResolution,HUnit,Date,Time,//Comment,//StartDate,...,//GateFunction,//TriggerOutput,//SignalType,//Synchronize,//StopSource,//TimeResolution,//TimeStamp,//RealTime,//LiveTime,//DeadTime(%)
0,WE7562,1,15262,cnt,1.0,?,2022/03/14,10:07:54,,2022/03/14,...,,,Trigger,OFF,,1e-06,Peak,7136.569,7136.41,0.002233544


In [107]:
ssd_df

Unnamed: 0,TraceName,Time_x,PulseHeight,Model,BlockNumber,BlockSize,VUnit,HResolution,HUnit,Date,...,//GateFunction,//TriggerOutput,//SignalType,//Synchronize,//StopSource,//TimeResolution,//TimeStamp,//RealTime,//LiveTime,//DeadTime(%)
0,1,865236,2855,WE7562,1,15262,cnt,1.000000e+000,?,2022/03/14,...,,,Trigger,OFF,,1.000000e-006,Peak,7.136569e+003,7.136410e+003,2.233544e-003
1,2,2486115,2516,WE7562,1,15262,cnt,1.000000e+000,?,2022/03/14,...,,,Trigger,OFF,,1.000000e-006,Peak,7.136569e+003,7.136410e+003,2.233544e-003
2,3,2628986,2527,WE7562,1,15262,cnt,1.000000e+000,?,2022/03/14,...,,,Trigger,OFF,,1.000000e-006,Peak,7.136569e+003,7.136410e+003,2.233544e-003
3,4,2650471,2456,WE7562,1,15262,cnt,1.000000e+000,?,2022/03/14,...,,,Trigger,OFF,,1.000000e-006,Peak,7.136569e+003,7.136410e+003,2.233544e-003
4,5,3415610,2482,WE7562,1,15262,cnt,1.000000e+000,?,2022/03/14,...,,,Trigger,OFF,,1.000000e-006,Peak,7.136569e+003,7.136410e+003,2.233544e-003
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15256,15257,7133087349,2993,WE7562,1,15262,cnt,1.000000e+000,?,2022/03/14,...,,,Trigger,OFF,,1.000000e-006,Peak,7.136569e+003,7.136410e+003,2.233544e-003
15257,15258,7133923984,2465,WE7562,1,15262,cnt,1.000000e+000,?,2022/03/14,...,,,Trigger,OFF,,1.000000e-006,Peak,7.136569e+003,7.136410e+003,2.233544e-003
15258,15259,7134604904,3007,WE7562,1,15262,cnt,1.000000e+000,?,2022/03/14,...,,,Trigger,OFF,,1.000000e-006,Peak,7.136569e+003,7.136410e+003,2.233544e-003
15259,15260,7135583418,2726,WE7562,1,15262,cnt,1.000000e+000,?,2022/03/14,...,,,Trigger,OFF,,1.000000e-006,Peak,7.136569e+003,7.136410e+003,2.233544e-003


### PMT Data

In [108]:
class PMT(): 
    
    def get_table(filepath: str=location+"all_data.csv"): 
        pmt_df = pd.read_csv(filepath_or_buffer=filepath)
        pmt_df.rename({"Unnamed: 5": "a"}, axis="columns", inplace=True)
        print(pmt_df.columns)
        return pmt_df.drop(["a"], axis=1)
    
pmt_df = PMT.get_table()
pmt_df

Index(['No.', 'Time', 'PMT Current (A)', 'ROI Sum', 'Coil (1:ON 0:OFF)', 'a'], dtype='object')


Unnamed: 0,No.,Time,PMT Current (A),ROI Sum,Coil (1:ON 0:OFF)
0,1,2022/03/15 08:18:00.266,-1.519308e-13,1491678,1
1,14,2022/03/15 08:18:01.507,9.085866e-13,1498276,1
2,18,2022/03/15 08:18:01.907,3.832096e-13,1499445,1
3,22,2022/03/15 08:18:02.308,3.028373e-15,1499770,1
4,26,2022/03/15 08:18:02.710,4.986230e-14,1498276,1
...,...,...,...,...,...
114062,457337,2022/03/15 21:00:57.162,-8.295666e-06,1491353,1
114063,457341,2022/03/15 21:00:57.563,-8.267808e-06,1491303,1
114064,457345,2022/03/15 21:00:57.963,-8.278364e-06,1491668,1
114065,457349,2022/03/15 21:00:58.362,-8.336747e-06,1491360,1


### Coil Log

In [109]:
class Coil(): 
    
    def get_table(filepath: str=location+"coil_log.txt"): 
        return pd.read_csv(filepath_or_buffer=filepath, delimiter="	")

coil_table = Coil.get_table()
coil_table     

Unnamed: 0,Time,CoilOperation
0,2022/03/14 12:09:23,ON
1,2022/03/14 12:09:23,OFF
2,2022/03/14 12:09:31,ON
3,2022/03/14 12:10:53,OFF
4,2022/03/14 12:12:57,OFF
5,2022/03/14 12:12:57,ON
6,2022/03/14 12:12:58,ON
7,2022/03/14 14:07:42,OFF
8,2022/03/14 14:07:45,ON
9,2022/03/14 15:40:46,OFF


### Heater Log

In [110]:
class Heater(): 
    """ Class for data engineering of the heater data. """
    
    @classmethod
    def get_data(cls, filepath: str=location+"HeaterLog_20220314_100740_00001.csv") -> pd.DataFrame: 
         return pd.read_csv(filepath_or_buffer=filepath, 
                                 skiprows=6, 
                                 header=0, 
                                 names=["TraceName", "Time", "PulseHeight"],
                                 encoding="cp932")
    @classmethod
    def get_metadata(cls, filepath: str=location+"HeaterLog_20220314_100740_00001.csv"): 
        with open(filepath, newline='', encoding="cp932") as f:
            reader = csv.reader(f)
            metadata_list = list(reader)[:6]
            for m in metadata_list: 
                print(len(m))
            columns = []
            rows = []
            df = pd.DataFrame(data=rows, columns=columns)
            return df

    @classmethod
    def get_table(cls, filepath: str=location+"HeaterLog_20220314_100740_00001.csv"): 
        data_df = cls.get_data(filepath)
        metadata_df = cls.get_metadata(filepath)
        return data_df.merge(metadata_df, how='cross')

heater_data_df = Heater.get_data()
heater_metadata_df = Heater.get_metadata()
heater_df = Heater.get_table()


2
2
5
5
5
5
2
2
5
5
5
5
