In [1]:
import os.path

import h5py
import pandas as pd
import yaml

In [2]:
# load data from image files according the the yaml file
path = ["..","..","run_data","2023_11_17"]

run_data = yaml.safe_load(open(os.path.join(*path, "data.yml")))

In [3]:
run_data = pd.DataFrame(run_data)
run_data

Unnamed: 0,ICT1,ICT2,dipole_name,quad_pv_name,save_filename,tdc_name
0,"[1.0427096863, 1.0504931731, 1.0825825874, 0.9...","[1.0814401273, 0.9198483482, 1.1175731477, 0.9...",0,0.0,../run_data/2023_11_17/13ARV1_1701445341.h5,0
1,"[1.0676237845, 0.9739818218, 0.9364733119, 0.9...","[1.080357061, 1.0241084938, 1.0472555984, 1.03...",0,2.5,../run_data/2023_11_17/13ARV1_1701445355.h5,0
2,"[1.0460149425, 1.0618973806, 1.0127526811, 1.0...","[1.2751877304, 0.929357092, 1.1565823723, 1.06...",0,5.0,../run_data/2023_11_17/13ARV1_1701445373.h5,0
3,"[1.090784506, 1.0681228329, 0.9262877618, 0.96...","[1.1095946306, 0.8587251235, 0.9177845897, 1.0...",0,7.5,../run_data/2023_11_17/13ARV1_1701445389.h5,0
4,"[1.0490607443, 0.9502193834, 0.9653901883, 1.0...","[1.1298700375, 1.1682520977, 0.9567565038, 0.9...",0,10.0,../run_data/2023_11_17/13ARV1_1701445408.h5,0
5,"[0.9110713952, 1.0075914083, 1.0202011958, 0.9...","[0.9549402666, 0.9914910579, 1.0659089712, 0.7...",0,0.0,../run_data/2023_11_17/13ARV1_1701445427.h5,1
6,"[0.9901213174, 1.0696060622, 0.9750447836, 1.0...","[0.9334081647, 1.1118080778, 0.9468162725, 1.0...",0,2.5,../run_data/2023_11_17/13ARV1_1701445441.h5,1
7,"[1.0177320169, 0.9980530839, 1.0033897109, 1.0...","[1.0415143617, 0.9508604815, 1.0718967041, 0.9...",0,5.0,../run_data/2023_11_17/13ARV1_1701445459.h5,1
8,"[0.9229568337, 0.9825185177, 1.0987707074, 0.9...","[0.8736014362, 0.8242793075, 0.9892559941, 0.8...",0,7.5,../run_data/2023_11_17/13ARV1_1701445472.h5,1
9,"[0.9580267246, 1.0433826054, 1.0311460607, 1.0...","[1.0112351844, 0.9328646501, 0.8445165334, 1.0...",0,10.0,../run_data/2023_11_17/13ARV1_1701445489.h5,1


In [4]:
# create a pivot table out of the data to collect settings
pivot_data = pd.pivot(
    run_data, index=["tdc_name", "dipole_name"],
    columns="quad_pv_name",
    values="save_filename"
)

In [5]:
# get numpy values in the correct shape
num_rows, num_cols = pivot_data.shape

fnames = pivot_data.values.reshape(2,2,5)

In [6]:
# create function to get data from h5 files
import numpy as np
def read_file(fname):
    with h5py.File(fname) as f:
        data_x = np.array((
            f["images"].attrs["quad_pv_name"],
            f["images"].attrs["tdc_name"],
            f["images"].attrs["dipole_name"],
        ))
        images = f["images"][:]

    return data_x, images

In [None]:
p0c = 62.0e6 # design momentum eV/c
C_LIGHT = 299792458
TDC_V = 3.7e6 # need to double check this value // verify calibration value
BEND_G = 20.0*3.1416/180.0/0.3018 # double check 0.3018 value
def quad_pv_to_k(quad_pv_value, p0c):
    gradient = quad_pv_value*100*1.32e-2/1.29*1.04
    beam_rigidity = p0c / C_LIGHT
    return gradient / beam_rigidity

def tdc_pv_to_v(tdc_pv_value):
    if tdc_pv_value == 0:
        return 0
    elif tdc_pv_value == 1:
        return TDC_V
    else: 
        raise ValueError("TDC PV value must be 0 or 1")
    
def bend_pv_to_g(bend_pv_value):
    if bend_pv_value == 0:
        return 2.22e-16 #machine epsilon for differentiability
    elif bend_pv_value == 2:
        return BEND_G
    else:
        raise ValueError("BEND PV value must be 0 or 1")

In [7]:
train_x = []
train_x_physical_units = []
train_y = []
for i in range(len(fnames)):
    tx = []
    txp = []
    ty = []
    for j in range(len(fnames[0])):
        ttx = []
        ttxp = []
        tty = []
        for k in range(len(fnames[0,0])):
            data_x, images = read_file("../"+fnames[i,j,k])

            print(data_x)
            k = quad_pv_to_k(data_x[0])
            v = tdc_pv_to_v(data_x[1])
            g = bend_pv_to_g(data_x[2])
            data_x_physical_units = [k, v, g]
            print(data_x_physical_units)
            print(images.shape)

            ttx += [data_x]
            ttxp += [data_x_physical_units]
            tty += [images]

        tx += [ttx]
        txp += [ttxp]
        ty += [tty]

    train_x += [tx]
    train_x_physical_units += [txp]
    train_y += [ty]

train_x = np.array(train_x).transpose([2,0,1,3])
train_x_physical_units = np.array(train_x_physical_units).transpose([2,0,1,3])
train_y = np.array(train_y).transpose([2,0,1,3,4,5])

##
# params: [number of quad strengths, number of tdc voltages (2, off/on), number of dipole angles (2, off/on),
# number of scanning elements (3)]
# images: [number of quad strengths, number of tdc voltages (2, off/on), number of
# dipole angles (2, off/on), number of images per parameter configuration, width in pixels, height in pixels]

[0. 0. 0.]
(10, 2000, 2000)
[2.5 0.  0. ]
(10, 2000, 2000)
[5. 0. 0.]
(10, 2000, 2000)
[7.5 0.  0. ]
(10, 2000, 2000)
[10.  0.  0.]
(10, 2000, 2000)
[0. 0. 2.]
(10, 2000, 2000)
[2.5 0.  2. ]
(10, 2000, 2000)
[5. 0. 2.]
(10, 2000, 2000)
[7.5 0.  2. ]
(10, 2000, 2000)
[10.  0.  2.]
(10, 2000, 2000)
[0. 1. 0.]
(10, 2000, 2000)
[2.5 1.  0. ]
(10, 2000, 2000)
[5. 1. 0.]
(10, 2000, 2000)
[7.5 1.  0. ]
(10, 2000, 2000)
[10.  1.  0.]
(10, 2000, 2000)
[0. 1. 2.]
(10, 2000, 2000)
[2.5 1.  2. ]
(10, 2000, 2000)
[5. 1. 2.]
(10, 2000, 2000)
[7.5 1.  2. ]
(10, 2000, 2000)
[10.  1.  2.]
(10, 2000, 2000)


In [8]:
train_x.shape

(5, 2, 2, 3)

In [None]:
train_x_physical_units.shape

In [9]:
train_y.shape


(5, 2, 2, 10, 2000, 2000)

In [10]:
train_x[0]

array([[[0., 0., 0.],
        [0., 0., 2.]],

       [[0., 1., 0.],
        [0., 1., 2.]]])