# Data exploration for joining all data sources

In this notebook we develop the functions for retrieving the tables, preparing them for the join, and then combining them based on the timestamp. 

## Imports

In [1]:
import os
import pandas as pd
import numpy as np
import csv
import time
import datetime
import collections
from abc import abstractmethod

## Sources
In the following section we construct the methods to read the different csv files and convert them to flat tables. 

In [2]:
location = "../data/"

In [3]:
class Helper():
    
    @classmethod
    def timestamp_to_datetimes(cls, df: pd.DataFrame): 
        """ Takes a pandas dataframe with a timestamp column (int) and also adds date datetime, datetime_ms, datetime_μs. 
        """
        # Conversion functions
        conversion_to_datetime_μs = lambda x: datetime.datetime.fromtimestamp(x/1000000000).strftime('%Y-%m-%d %H:%M:%S.%f')
        conversion_to_datetime_ms = lambda x: datetime.datetime.fromtimestamp(x/1000000000).strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]
        conversion_to_datetime = lambda x: datetime.datetime.fromtimestamp(x/1000000000).strftime('%Y-%m-%d %H:%M:%S')

        # Apply conversions
        df["datetime_μs"] = df["timestamp"].apply(conversion_to_datetime_μs)
        df["datetime_ms"] = df["timestamp"].apply(conversion_to_datetime_ms)
        df["datetime"] = df["timestamp"].apply(conversion_to_datetime)

        return

In [48]:
class Recorder(object): 
    """ Base class for mapping a csv file to a pandas dataframe in real-time when it changes. 
    """
    
    def __init__(self, filepath: str): 
        # Settings
        self.filepath = filepath
        
        # Tracking
        self.read_data_lines = 0
        self.last_updated = 0
        self.data_last_updated = 0
        self.metadata_last_updated = 0
        
        # Dataframes 
        self._table_df = None     # Data x Metadata
        self._data_df = None      # Data
        self._metadata_df = None  # Metadata

    def get_table(self) -> pd.DataFrame: 
        self._update()
        self.last_updated = self._get_mod_time()
        return self._table_df
    
    def get_data(self) -> pd.DataFrame: 
        self._update_data()
        self.data_last_updated = self._get_mod_time()
        return self._data_df
    
    def get_metadata(self) -> pd.DataFrame: 
        self._update_metadata()
        self.metadata_last_updated = self._get_mod_time()
        self.metadata_columns = list(self._metadata_df.columns)
        return self._metadata_df
    
    def table_is_up_to_date(self) -> bool:
        return self.last_updated == self._get_mod_time()
    
    def data_is_up_to_date(self) -> bool: 
        return self.data_last_updated == self._get_mod_time()
    
    def metadata_is_up_to_date(self) -> bool: 
        return self.metadata_last_updated == self._get_mod_time()
    
    def _update(self): 
        if self.table_is_up_to_date(): 
            return 
        data_df = self.get_data()
        metadata_df = self.get_metadata()
        self._table_df = self._data_df.merge(self._metadata_df, how='cross') if self._metadata_df is not None else self._data_df
        self._harmonize_time()
        self._table_df = self._table_df.sort_values(by="timestamp")
        self.last_updated = self._get_mod_time()
        
    def _get_mod_time(self): 
        return time.ctime(os.path.getmtime(self.filepath))
        
    def _update_data(self):
        """ Updates self._data_df and self.data_last_updated incrementally. Makes use of _load_new_data(). 
        """
        
        # Case first loading 
        if self.read_data_lines == 0: 
            self._data_df = self._load_new_data()
            return 
        
        # Case reloading
        new_data_df = self._load_new_data()
        if len(new_data_df.index) == 0: 
            return

        # Concatenate old and new
        self._data_df = pd.concat(
            [self._data_df, new_data_df],
            axis=0,
            join="outer",
            ignore_index=True,
            copy=True
        )
        return
    
    @abstractmethod
    def _load_new_data(self) -> pd.DataFrame: 
        """ Returns the rows which have not been loaded so far. 
        """
    
        pass
    
    @abstractmethod
    def _update_metadata(self): 
        """ Updates self.metadata_df and self.metadata_last_updated. We do a full reload. 
        """
        pass
    
    @abstractmethod
    def _harmonize_time(self): 
        pass

### SSD2 Data
Data from the WE7000 DAQ for the SSD2. The PMT current from the MOT will be obtained like this in our next experiment.

In [45]:
class RecorderSSD(Recorder): 
    """ Class for data engineering of the SSD2 data. """

    def __init__(self, filepath: str):
        super(RecorderSSD, self).__init__(filepath)
        self.nr_meta_data_rows = 37

    def _load_new_data(self) -> pd.DataFrame: 
        new_data_df = pd.read_csv(filepath_or_buffer=self.filepath, 
                                 skiprows=self.nr_meta_data_rows + self.read_data_lines, 
                                 header=0, 
                                 names=["TraceName", "Time_x", "PulseHeight"])        
        self.read_data_lines += len(new_data_df.index)
        return new_data_df
        
    def _update_metadata(self): 
        with open(self.filepath, newline='') as f:
            reader = csv.reader(f)
            metadata = list(reader)[:(self.nr_meta_data_rows + 1)]
            metadata =  metadata[:3] +  metadata[4:]
            columns = [line[0] for line in metadata]
            row = [line[1] for line in metadata]
            self._metadata_df = pd.DataFrame(data=[row], columns=columns)
                
    def _harmonize_time(self): 
        """ Convert the relative time and start time to the real time. """
        
        def harmonize_table(df: pd.DataFrame) -> pd.DataFrame: 
            # Start time
            helper_df = pd.DataFrame()
            helper_df["start_datetime_str"] = df["//StartDate"].apply(lambda s: s.replace("/", "-")) + " " + df["//StartTime"]
            helper_df["start_datetime"] = pd.to_datetime(helper_df["start_datetime_str"]) 

            # Conversion parameter: Time_x * rel_time_to_ns = rel. time in ns
            time_resolution = df['//TimeResolution'][0]
            rel_time_to_ns = {
                '1.000000e-009': 1e-0,
                '1.000000e-006': 1e+3,
                '1.000000e-003': 1e+6
            }[time_resolution]

            # Real time
            helper_df["relative_time_ns"] = df["Time_x"] * rel_time_to_ns
            helper_df["start_ns"] = helper_df.start_datetime.values.astype(np.int64)
            helper_df["timestamp"] = helper_df["start_ns"] + helper_df["relative_time_ns"]

            # Add datetimes
            df["timestamp"] = helper_df["timestamp"]
            Helper.timestamp_to_datetimes(df)
            return df
        
        self._table_df = harmonize_table(self._table_df)
        self._data_df = self._table_df[self._data_df.columns]

In [46]:
recorderSDD = RecorderSSD(location+"-20220314-100806-Slot1-In2.csv")
ssd_df = recorderSDD.get_table()

In [None]:
recorderSDD.get_table()
recorderSDD.read_data_lines

### SSD Histogram representation
A very useful representation of the SSD data is when we group by time (primary key) and then count the number of pulses with a certain hight. The columns then correspond to voltage ranges in 12-bit (0, 1, 2, 3, ..., 4094, 4095) encoding. 

In [7]:
class SSDRepresentation(): 
    
    @classmethod
    def get_hist_rep(cls, ssd_df: pd.DataFrame) -> pd.DataFrame: 
        nr_of_channels = ssd_df["//Gain"][0]
        
        # Groupby timestamp and PulseHeight (channel)
        grouped = ssd_df.groupby(by=["datetime", "PulseHeight"])
        df = grouped.agg({"TraceName": 'count'}).rename({"TraceName": "PulseCount"}, axis=1)
        df = df.unstack(level='PulseHeight').fillna(0, downcast="infer")
        
        # Fix column names
        df.columns = df.columns.to_flat_index()
        df.columns = [str(col[1]) for col in df.columns.values]
            
        return df
    
    @classmethod
    def get_time_aggregated_hist_rep(cls, ssd_df: pd.DataFrame) -> pd.DataFrame: 
        df = cls.get_hist_rep(ssd_df) 
        df_sum = df.sum()
        df = pd.DataFrame(data=[df_sum.values], columns=df_sum.index.values)
        return df
    
    def get_channel_aggregated_hist_rep(cls, ssd_df: pd.DataFrame) -> pd.DataFrame: 
        """ TODO: Implement aggregation over all channels. 
        """

In [8]:
ssd_hist_df = SSDRepresentation.get_hist_rep(ssd_df)
ssd_hist_df

Unnamed: 0_level_0,500,501,502,503,504,505,506,507,508,509,...,3543,3556,3661,3676,3719,3842,3929,3977,3984,4000
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-03-14 19:07:54,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2022-03-14 19:07:56,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2022-03-14 19:07:57,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2022-03-14 19:07:58,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2022-03-14 19:07:59,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-03-14 21:06:45,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2022-03-14 21:06:47,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2022-03-14 21:06:48,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2022-03-14 21:06:49,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [9]:
ssd_full_agg_hist_df = SSDRepresentation.get_fully_aggregated_hist_rep(ssd_df)
ssd_full_agg_hist_df

AttributeError: type object 'SSDRepresentation' has no attribute 'get_fully_aggregated_hist_rep'

### PMT Data
In the table, we have the following columns. 
- No. : Frame number of CMOS camera
- Time: When the frame is obtained
- PMT Current: Current from photomultiplier at the time the frame obtained
- ROI Sum: The sum of signal values in ROI
- Coil (1:ON 0:ODD): The current of the coil at that time

Comment: 
- Will be depraced and replaced by the SSD. 

In [None]:
class RecorderPMT(): 
    
    def __init__(self, filepath: str):
        super(RecorderPMT, self).__init__(filepath)
    
    def _update_data(self, filepath: str=location+"all_data.csv"): 
        df = pd.read_csv(filepath_or_buffer=filepath)
        df.rename({"Unnamed: 5": "a"}, axis="columns", inplace=True)
        df = df.drop(["a"], axis=1)
        cls._harmonize_time(df)
        df = df.sort_values(by="timestamp")
        return df
    
    @classmethod
    def _harmonize_time(self): 
        
        self._table_df["datetime_μs"] = self._table_df["Time"].apply(lambda s: s+"000")
        self._table_df["datetime_ms"] = self._table_df["Time"]
        df["datetime"] = df["Time"].apply(lambda s: s[:-4])
        df["timestamp"] = df["Time"].apply(pd.Timestamp).values.astype(np.int64)
        return 

In [None]:
recorderPMT = RecorderPMT("all_data.csv")
pmt_df = recorderPMT.get_table()
pmt_df 

### Coil Log
The current of the MOT coil is controlled by a relay switch. This text file is the log of the relay switch.

In [None]:
class Coil(): 
    
    @classmethod
    def get_table(cls, filepath: str=location+"coil_log.txt"): 
        df = pd.read_csv(filepath_or_buffer=filepath, delimiter="	")
        cls._harmonize_time(df)
        df = df.sort_values(by="timestamp")
        return df
    
    @classmethod
    def _harmonize_time(cls, df: pd.DataFrame): 
        df["datetime_μs"] = df["Time"].apply(lambda s: s+"000")
        df["datetime_ms"] = df["Time"]
        df["datetime"] = df["Time"].apply(lambda s: s[:-4])
        df["timestamp"] = df["Time"].apply(pd.Timestamp).values.astype(np.int64)
        return 

coil_table = Coil.get_table()
coil_table 

### Heater Log
Log of the IR heater output percentage for target heating. 

In [None]:
class Heater(): 
    """ Class for data engineering of the heater data. """
    
    @classmethod
    def get_table(cls, filepath: str=location+"HeaterLog_20220314_100740_00001.csv", with_metadata: bool=False): 
        """ Load full table, consisting of data and metadata in a flat format. """
        data_df = cls._get_data(filepath)
        metadata_df = cls._get_metadata(filepath)
        df = data_df.merge(metadata_df, how='cross')
        cls._harmonize_time(df)
        if with_metadata: 
            return df
        return df[[col for col in df.columns if col not in metadata_df.columns]]    
    
    @classmethod
    def _get_data(cls, filepath: str=location+"HeaterLog_20220314_100740_00001.csv") -> pd.DataFrame: 
        df = pd.read_csv(filepath_or_buffer=filepath, 
                                 skiprows=6, 
                                 header=0, 
                                 names=["Date", "Time", "Unknown", "TargetPercentage", "MeasuredPercentage"],
                                 encoding="cp932")
        cls._harmonize_time(df)
        df = df.sort_values(by="timestamp")
        return df
    
    @classmethod
    def _get_metadata(cls, filepath: str=location+"HeaterLog_20220314_100740_00001.csv"): 
        with open(filepath, newline='', encoding="cp932") as f:
            reader = csv.reader(f)
            metadata_list = list(reader)[:6]
            columns = [m[0] for m in metadata_list]
            row = [metadata_list[i][1] for i in range(2)] +  [f"{metadata_list[i][3]},{metadata_list[i][4]}" for i in range(2, 6)]
            df = pd.DataFrame(data=[row], columns=columns)
            return df
    
    @classmethod
    def _harmonize_time(cls, df: pd.DataFrame) -> pd.DataFrame: 
        df["datetime"] = df["Date"].apply(lambda s: s.replace("/", "-")) + " " + df["Time"]
        df["datetime_μs"] = df["datetime"].apply(lambda s: s+".000000")
        df["datetime_ms"] = df["datetime"].apply(lambda s: s+".000")
        df["timestamp"] = df["datetime_ms"].apply(pd.Timestamp).values.astype(np.int64)
        return df

In [None]:
heater_df = Heater.get_table(with_metadata=False)
heater_df

### Ion Beam Control
Log of the Fr ion source. First day of experiment, right after the end of primary beam check, just starting the Fr ion extraction. The column "FC" is the current from either one of the faraday cups, or the sum of both.
The columns "Center" and "Surrounding" are the voltages applied to the mechanical relay switches that connects the faraday cups to the picoammeter. For example, if "Center" = 24 and "Surrounding" = 0, the value at "FC" is the current observed on FC Center in nA.

In [None]:
class IonBeamControl(): 
    
    @classmethod
    def get_table(cls, filepath: str=location+"IonBeamControl1.5_DESKTOP-8ICG2TJ_20220314_114132.csv"): 
        df = pd.read_csv(filepath_or_buffer=filepath)
        cls._harmonize_time(df)
        df = df.sort_values(by="timestamp")
        return df
    
    @classmethod
    def _harmonize_time(cls, df: pd.DataFrame) -> pd.DataFrame: 
        df["datetime"] = pd.to_datetime(df["Timestamp"])
        df["datetime_μs"] = df["Timestamp"].apply(lambda s: s+".000000")
        df["datetime_ms"] = df["Timestamp"].apply(lambda s: s+".000")
        df["timestamp"] = df["datetime"].values.astype(np.int64)
        
ion_beam_control_df = IonBeamControl.get_table()
ion_beam_control_df

### Gauge Monitor

In [None]:
class Gauge(): 
    
    @classmethod
    def get_table(cls, filepath: str=location+"TPG256GaugeMonitor_Single_DESKTOP-BEF5FI4_20220312_203214.csv"): 
        df = pd.read_csv(filepath_or_buffer=filepath)
        cls._harmonize_time(df)
        df = df.sort_values(by="timestamp")
        return df
    
    @classmethod
    def _harmonize_time(cls, df: pd.DataFrame): 
        df["datetime"] = pd.to_datetime(df["Timestamp"])
        df["datetime_μs"] = df["Timestamp"].apply(lambda s: s+".000000")
        df["datetime_ms"] = df["Timestamp"].apply(lambda s: s+".000")
        df["timestamp"] = df["datetime"].values.astype(np.int64)
        return df

gauge_df = Gauge.get_table()
gauge_df

### Laser data

In [None]:
class Laser(): 
    
    @classmethod
    def get_table(cls, filepath: str=location+"15.03.2022, 21.30, 384.22817013 THz.lta", with_metadata: bool=False): 
        """ Load full table, consisting of data and metadata in a flat format. """
        data_df = cls._get_data(filepath)
        metadata_df = cls._get_metadata(filepath)
        if not with_metadata: 
            metadata_df = metadata_df[["StartTime"]]
        df = data_df.merge(metadata_df, how='cross')
        cls._harmonize_time(df)
        df = df.sort_values(by="timestamp")
        if with_metadata: 
            return df
        else:
            return df[[col for col in df.columns if col not in metadata_df.columns]]     
    
    @classmethod
    def _get_data(cls, filepath: str=location+"15.03.2022, 21.30, 384.22817013 THz.lta") -> pd.DataFrame:
        """ Loads the data and makes sure that each row has all laser measurements by aggregating rows. """
        original_df = pd.read_csv(filepath_or_buffer=filepath, 
                                  skiprows=119,
                                  delimiter="	")
        
        df = cls._aggregate_laser_rows(original_df)
        return df
        
    @classmethod
    def _aggregate_laser_rows(cls, original_df: pd.DataFrame): 
        """ Originally, one measurement of the six laser wavelengths is distributed over six rows. We aggregate 
            these rows into one row. The only tradeoff is that we have to approximate the time with the time
            of the last measurement. 
        """
        
        n = len(original_df.index)
        
        time_column = 'Time  [ms]'
        laser_columns = original_df.columns[1:]
        n_laser_columns = len(laser_columns)
        
        laser_column_lookup = {col: i for i, col in enumerate(laser_columns)}
        data_lookup = collections.defaultdict(list)
        row_lookup = {}
        row_list = []
        
        for index, row in original_df.iterrows():
            column_index = pd.Series.first_valid_index(row[1:])
            row_lookup[column_index] = row[column_index]
            # Count if all 6 lasers have been measured
            if len(row_lookup) == 6:     
                item = [row[time_column]] + [row_lookup[col] for col in laser_columns]
                row_list.append(item)
                row_lookup = {}

        df = pd.DataFrame(data=row_list, columns=original_df.columns)
        return df
        
    @classmethod
    def _get_metadata(cls, filepath: str=location+"15.03.2022, 21.30, 384.22817013 THz.lta"): 
        with open(filepath, newline='', encoding="cp932") as f:
            reader = csv.reader(f, delimiter="	")
            metadata_list = list(reader)[:119]
            
            # Title
            title_column = ["Title"]
            title_row = [metadata_list[0][0]]
            
            # General info
            gi_columns = [m[0] for m in metadata_list[1:7]]
            gi_rows = [cls._combine(m[1:]) for m in metadata_list[1:7]]
            
            # General settings
            gs_columns = [m[0] for m in metadata_list[9:20]]
            gs_rows = [cls._combine(m[1:]) for m in metadata_list[9:20]]

            # Frames 1-6
            frame_columns = (
                [m[0] for m in metadata_list[22:36]]
                + [m[0] for m in metadata_list[38:52]]
                + [m[0] for m in metadata_list[54:68]]
                + [m[0] for m in metadata_list[70:84]]
                + [m[0] for m in metadata_list[86:100]]
                + [m[0] for m in metadata_list[102:116]]
            )
            
            frame_rows = (
                [cls._combine(m[1:]) for m in metadata_list[22:36]]
                + [cls._combine(m[1:]) for m in metadata_list[38:52]]
                + [cls._combine(m[1:]) for m in metadata_list[54:68]]
                + [cls._combine(m[1:]) for m in metadata_list[70:84]]
                + [cls._combine(m[1:]) for m in metadata_list[86:100]]
                + [cls._combine(m[1:]) for m in metadata_list[102:116]]
            )
            
            columns = title_column + gi_columns + gs_columns + frame_columns
            row = title_row + gi_rows + gs_rows + frame_rows
            df = pd.DataFrame(data=[row], columns=columns)
            return df                                    
    
    @classmethod
    def _combine(cls, entries: list): 
        """ If entries has length 1, then it returns the entry. 
            Otherwise, it converts the list to a comma separated string. 
        """
        if len(entries) == 0: 
            return None
        
        if len(entries) == 1: 
            return entries[0]
        
        return ",".join(entries)
    
    @classmethod
    def _harmonize_time(cls, df: pd.DataFrame):

        # Convert 15.03.2022, 08:46:39.387 to 15-03-2022 08:46:39.387
        helper_df = pd.DataFrame()
        helper_df["StartTime"] = df["StartTime"].apply(lambda s: s.replace(",", "").replace(".", "-", 2))
        
        # Calculate absolute time based on relative time
        helper_df["start_datetime"] = pd.to_datetime(helper_df["StartTime"])
        helper_df["start_timestamp"] = helper_df["start_datetime"].values.astype(np.int64)
        helper_df["timestamp"] = helper_df["start_timestamp"] + df["Time  [ms]"] * 1e3
        
        # Add datetimes
        df["timestamp"] = helper_df["timestamp"]
        Helper.timestamp_to_datetimes(df)
        
        return

In [None]:
laser_df = Laser.get_table(with_metadata=False)
laser_df

### Image data
We have a folder which contains the picture data from the CMOS camera. A part of the pixels (region of interest, ROI) selected in advance is extracted. Each picture data is a csv file and records signals from each pixel.

In [None]:
class Image(): 
    
    def get_array(filepath: str=location+"cmos_000039.csv"): 
        with open(filepath) as file_name:
            return np.loadtxt(file_name, delimiter=",")
        
    def get_metadata(filepath: str=location+"cmos_000039.csv") -> pd.DataFrame: 
        
        columns = ["size", "ctime"] # Size and creation time
        row = [os.path.getsize(filepath), os.path.getctime(filepath)]
        return pd.DataFrame(data=[row], columns=columns)
        
image_array = Image.get_array()
image_table = Image.get_metadata()
image_table

In [None]:
image_array

## Joins
For a refresher on joins, checkout this article: https://pandas.pydata.org/docs/user_guide/merging.html. Now our goal is to join the tables into a main table. We join on the timestamp and define the following rules: 
- We join on timestamp in seconds with an outer join.
- As the values do not exactly match, we first sort the dataframes, such that we then can join on nearly-matching values with some threshold (https://pandas.pydata.org/pandas-docs/version/0.22/generated/pandas.merge_asof.html). 

Problem: 
- As the sampling method varies a lot, we would lose a lot of data with this method. 

Solution A: 
- Treat some tables as parameters and settings, and some tables as measurements. Then we first sample the parameters and settings on level second, and then use a finer sampling for the measurements. 
- For this, we first find all unique times in seconds. Then we generate the rows for each unique second, taking the value if it exists or nan if not. 
- Last, we do an outer join on the time in seconds. 

In [None]:
class Main(): 

    def limit_tables_to_timespan(dfs, start, stop): 
        """ Takes a list of dfs, only keeps the rows between start and stop time and returns the modified list. 
        """
        pass
    
    def build_table(dfs: list, prefixes: list): 
        """ Concatenates the dataframes to a single dataframe, ignoring the indices. The names are used as prefixes of the
            columns, such that we can have similar column names but still know from which df it came. 
        """   
          
        # Add prefixes
        dfs = [df.rename(columns={col: prefix+"_"+col for col in df.columns}) for df, prefix in zip(dfs, prefixes)]
        
        # Outer join 
        main_df = pd.concat(dfs, axis=1, join="outer")
        
        return main_df

In [None]:
main_table = Main.build_table(
    dfs=[laser_df, gauge_df, ion_beam_control_df, heater_df, coil_table, pmt_df, ssd_df, ssd_hist_df],
    prefixes=["laser", "gauge", "ion_beam_control", "heater", "coil", "pmt", "ssd", "ssd_hist"]
)
main_table

### Analysis 
In the following we want to test some simple analysis as proof of concept.