# Create Emittance DataFrames 
~ N. Karastathis (2017)

The idea behind this notebook is to create a (large) flat dataframe that contains all the emittance estimated by the `LuminosityFollowUp` analysis for further visualization and analysis.

The script is looping in your fill list pulling the data out of the stored pickles and creates more informative features such as the growth and the percentage difference of the emittance measured at different instances during the LHC cycle.

You could also add fill numbers in the `blacklist` which means that these specific fills will be skipped.

In [1]:
import numpy as np
import glob
import pandas as pd
import matplotlib.pyplot as plt
import os
import gzip
import pickle



___

# Main info for file making

In [2]:
input_folder     = '/eos/project/l/lhc-lumimod/LuminosityFollowUp/2018/procdata/'
output_folder    = '/eos/user/n/nkarast/LHC/2018/FollowUp/pickles/'
summary_filename = 'summaryEmittanceDataFrame_2018_periodB.pkl.gz'
growth_filename  = 'summaryEmittanceGrowthDataFrame_2018_periodB.pkl.gz'

In [4]:
flist = [int(x.split('_')[-1]) for x in glob.glob(input_folder+"*")];
print(flist)

[6570, 6573, 6574, 6579, 6583, 6584, 6592, 6594, 6595, 6611, 6612, 6613, 6614, 6615, 6616, 6617, 6618, 6620, 6621, 6624, 6628, 6629, 6633, 6636, 6638, 6639, 6640, 6641, 6642, 6643, 6645, 6646, 6648, 6650, 6654, 6659, 6662, 6663, 6666, 6672, 6674, 6675, 6677, 6681, 6683, 6688, 6690, 6693, 6694, 6696, 6699, 6700, 6702, 6706, 6709, 6710, 6711, 6712, 6714, 6719, 6724, 6729, 6731, 6733, 6737, 6738, 6741, 6744, 6747, 6749, 6751, 6752, 6755, 6757, 6759, 6761, 6762, 6763, 6768, 6770, 6772, 6773, 6774, 6776, 6778, 6819, 6843, 6847, 6850, 6854, 6858, 6860, 6864, 6868, 6874, 6877, 6879, 6881, 6882, 6884, 6885, 6890, 6891, 6892, 6901, 6904, 6909, 6911, 6912, 6913, 6919, 6921, 6923, 6924, 6925, 6927, 6929, 6931, 6939, 6940, 6942, 6944, 6946, 6953, 6956, 6957, 6960, 6961, 6966, 6998, 7003, 7005, 7006, 7008, 7013, 7017, 7018, 7020, 7024, 7026, 7031, 7033, 7035, 7036, 7037]


In [5]:
blacklist = []

___
# Summary Cycle DataFrame

In [6]:
outfilename   = output_folder+summary_filename
# does the dataframe exists?
df_exists = False
if os.path.exists(outfilename):
    print('[WARN] Filename of output file given for dataframe exists. Checking if it is up-to-date...')
    df_exists = True
    #df_old = pd.read_hdf(outfilename, 'df', format='table')
    with gzip.open(outfilename, 'rb') as fid:
        df_old = pickle.load(fid)
    fills_existing = np.unique(df_old['Fill'].values)
    overlap_fills = list(set(fills_existing.tolist()).intersection(flist))
    print('[WARN] The old dataframe has already the fills : [{}] dropping them.'.format(overlap_fills))
    newflist = [fil for fil in flist if fil not in overlap_fills]
    flist = newflist
    
    if len(flist) == 0:
        raise KeyboardInterrupt('[INFO] No new fills.') 

# Create the dataframe to store the loop items
df_loop = pd.DataFrame() 

# Loop over the flist 
for filln in flist:
    print('Working on fill {}'.format(filln))
    
    try:
        # read cycle 
        with gzip.open('{}/fill_{}/fill_{}_cycle.pkl.gz'.format(input_folder, filln, filln), 'rb') as fid:
            dcycle = pickle.load(fid)

        with gzip.open('{}/fill_{}/fill_{}_cycle_model.pkl.gz'.format(input_folder, filln, filln), 'rb') as fid:
            dmodel = pickle.load(fid)
        with gzip.open('{}/fill_{}/fill_{}.pkl.gz'.format(input_folder, filln, filln), 'rb') as fid:
            dsb = pickle.load(fid)
    except:
        print('--> Files for fill {} not found. Skipping it'.format(filln))
        continue
      
    i_bunches = len(dsb['slots_filled_coll'][1])+len(dsb['slots_filled_noncoll'][1])
  
    
    for nbeam in ['beam_1', 'beam_2']:
        for cstep in ['Injection', 'he_before_SB']:
            for tstep in ['at_start', 'at_end']:
                for lplane in ['Horizontal', 'Vertical']:
                    
                    # keep a few lists for storing
                    list_emit    = []
                    list_time    = []
                    list_cycle   = []
                    list_slots   = []
                    list_plane   = []
                    list_beam    = []
                    list_fill    = []
                    list_bunches = []
                    list_kind    = []

                    list_intensity    = []
                    list_bunch_length = []
                    list_brightness   = []

                    
                    # fix string for Beam column
                    if nbeam=='beam_1':
                        i_beam = 'B1'
                    elif nbeam=='beam_2':
                        i_beam = 'B2'
                    
                    # fix string for plane column
                    i_plane = lplane
                    
                    # get the correct emittance
                    if lplane == 'Horizontal':
                        emit_var = 'emith'
                    elif lplane == 'Vertical':
                        emit_var = 'emitv'
                        
                    # injection + at start / at end....
                    if cstep == 'Injection' and tstep=='at_start':
                        cycle_tag = 'Injection'
                    elif cstep=='Injection' and tstep=='at_end':
                        cycle_tag = 'Start Ramp'
                    elif cstep=='he_before_SB' and tstep=='at_start':
                        cycle_tag = 'End Ramp'
                    elif cstep=='he_before_SB' and tstep=='at_end':
                        cycle_tag = 'Start Stable'
                    
                    
                    # measurements first
                    list_emit.append(np.array(dcycle[nbeam][cstep][tstep][emit_var]))
                    list_time.append(np.array(dcycle[nbeam][cstep][tstep]['time_meas']))
                    list_slots.append(np.array(dcycle[nbeam][cstep]['filled_slots']))
                    list_intensity.append(np.array(dcycle[nbeam][cstep][tstep]['intensity']))
                    list_bunch_length.append(np.array(dcycle[nbeam][cstep][tstep]['blength']))
                    list_brightness.append(np.array(dcycle[nbeam][cstep][tstep]['brightness']))
                    list_cycle.append(np.array([cycle_tag]*len(np.array(dcycle[nbeam][cstep][tstep][emit_var]))))
                    list_plane.append(np.array([lplane]*len(np.array(dcycle[nbeam][cstep][tstep][emit_var]))))
                    list_beam.append(np.array([i_beam]*len(np.array(dcycle[nbeam][cstep][tstep][emit_var]))))
                    list_fill.append(np.array([filln]*len(np.array(dcycle[nbeam][cstep][tstep][emit_var]))))
                    list_bunches.append(np.array([i_bunches]*len(np.array(dcycle[nbeam][cstep][tstep][emit_var]))))
                    list_kind.append(np.array(['Measurement']*len(np.array(dcycle[nbeam][cstep][tstep][emit_var]))))
                    
                    # now for the model
                    list_emit.append(np.array(dmodel[nbeam][cstep][tstep][emit_var]))
                    list_time.append(np.array(dmodel[nbeam][cstep][tstep]['time_meas']))
                    list_slots.append(np.array(dmodel[nbeam][cstep]['filled_slots']))
                    list_cycle.append(np.array([cycle_tag]*len(np.array(dmodel[nbeam][cstep][tstep][emit_var]))))
                    list_plane.append(np.array([lplane]*len(np.array(dmodel[nbeam][cstep][tstep][emit_var]))))
                    list_beam.append(np.array([i_beam]*len(np.array(dmodel[nbeam][cstep][tstep][emit_var]))))
                    list_fill.append(np.array([filln]*len(np.array(dmodel[nbeam][cstep][tstep][emit_var]))))
                    list_bunches.append(np.array([i_bunches]*len(np.array(dmodel[nbeam][cstep][tstep][emit_var]))))
                    list_kind.append(np.array(['Model']*len(np.array(dmodel[nbeam][cstep][tstep][emit_var]))))
                    list_intensity.append(np.array([np.nan]*len(np.array(dmodel[nbeam][cstep][tstep][emit_var]))))
                    list_bunch_length.append(np.array([np.nan]*len(np.array(dmodel[nbeam][cstep][tstep][emit_var]))))
                    list_brightness.append(np.array([np.nan]*len(np.array(dmodel[nbeam][cstep][tstep][emit_var]))))
                                        
                    # now convert stuff into lists:
                    list_emit              = np.concatenate(list_emit).ravel().tolist()
                    list_time              = np.concatenate(list_time).ravel().tolist()
                    list_cycle             = np.concatenate(list_cycle).ravel().tolist()
                    list_plane             = np.concatenate(list_plane).ravel().tolist()
                    list_beam              = np.concatenate(list_beam).ravel().tolist()
                    list_fill              = np.concatenate(list_fill).ravel().tolist()
                    list_bunches           = np.concatenate(list_bunches).ravel().tolist()
                    list_kind              = np.concatenate(list_kind).ravel().tolist()
                    list_slots             = np.concatenate(list_slots).ravel().tolist()
                    list_intensity         = np.concatenate(list_intensity).ravel().tolist()
                    list_bunch_length      = np.concatenate(list_bunch_length).ravel().tolist()
                    list_brightness        = np.concatenate(list_brightness).ravel().tolist()
                    
                    # create a temporary dataframe
                    df_emit = pd.DataFrame()
                    df_emit['Emittance']   = pd.Series(list_emit, dtype='float')
                    df_emit['Intensity']   = pd.Series(list_intensity, dtype='float')
                    df_emit['BunchLength'] = pd.Series(list_bunch_length, dtype='float')
                    df_emit['Brightness']  = pd.Series(list_brightness, dtype='float')
                    df_emit['Time']        = pd.Series(list_time, dtype='float')
                    df_emit['Cycle']       = pd.Series(list_cycle, dtype='category')
                    df_emit['Plane']       = pd.Series(list_plane, dtype='category')
                    df_emit['Beam']        = pd.Series(list_beam, dtype='category')
                    df_emit['Fill']        = pd.Series(list_fill, dtype='int')
                    df_emit['Bunches']     = pd.Series(list_bunches, dtype='int')
                    df_emit['Kind']        = pd.Series(list_kind, dtype='category')
                    df_emit['Slot']        = pd.Series(list_slots, dtype='float')
                    
                    # append it into the total df
                    df_loop = df_loop.append(df_emit, ignore_index=True)

# this is the total dataframe:
df_total = pd.DataFrame()
if df_exists:
    #df_old = pd.read_hdf(outfilename, 'df', format='table')
    with gzip.open(outfilename,'rb') as fid:
        df_old = pickle.load(fid)
    df_total = df_old.append(df_loop, ignore_index=True)
else:
    df_total = df_loop

################ WRITE THE TOTAL DF INTO A HDFS FILE #####################
#df_total.to_hdf(outfilename, 'df', format='table')
with gzip.open(outfilename, 'wb') as fid:
    pickle.dump(df_total, fid)
print('#makeCycleDataFrame : Writing file [{}]'.format(outfilename))

[WARN] Filename of output file given for dataframe exists. Checking if it is up-to-date...
[WARN] The old dataframe has already the fills : [[6659, 6662, 6663, 6666, 6672, 6744, 6674, 6675, 6677, 6681, 6683, 6688, 6690, 6693, 6694, 6757, 6696, 6570, 6699, 6700, 6573, 6574, 6749, 6731, 6706, 6579, 6709, 6710, 6583, 6584, 6714, 6702, 6719, 6592, 6752, 6594, 6595, 6724, 6729, 6711, 6733, 6712, 6738, 6611, 6612, 6613, 6614, 6615, 6616, 6617, 6618, 6747, 6620, 6621, 6751, 6624, 6755, 6628, 6629, 6737, 6633, 6759, 6636, 6638, 6639, 6640, 6641, 6642, 6643, 6645, 6646, 6648, 6650, 6654, 6741]] dropping them.
Working on fill 6761
Working on fill 6762
Working on fill 6763
Working on fill 6768
Working on fill 6770
Working on fill 6772
Working on fill 6773
Working on fill 6774
Working on fill 6776
Working on fill 6778
Working on fill 6819
--> Files for fill 6819 not found. Skipping it
Working on fill 6843
Working on fill 6847
Working on fill 6850
Working on fill 6854
Working on fill 6858
Working o

---

## Create a dictionary that contains the total number of bunches

This information is useful especially after the technical stops when the accelerator slowly ramps up and it also gives you some information on the dependency on the total intensity of the observed effects.

In [7]:
dict_bunches = pd.Series(df_total[['Fill','Bunches']].groupby('Fill').aggregate(np.mean)['Bunches'].values, index=df_total[['Fill','Bunches']].groupby('Fill').aggregate(np.mean).index).to_dict()

def getBunches(x):
    return dict_bunches[x]

---

_____

# Create a Growth DataFrame

To ease up your life, the growth dataframe offers flattened information on the relative emittance blow up (or growth in um/h) between different steps of the LHC cycle:

- INJ2FB : From INJECTION to start of RAMP (i.e. during Flat Bottom (FB))
- FB2FT  : From Start of RAMP to end of RAMP (i.e. during RAMP)
- FT2SB  : From the end of RAMP to the start of collisions (i.e. Flat Top (FT) before collisions)
- INJ2SB : The total duration from INJECTION to Start of Stable Beams

The dataframe includes both measured emittances as well as modelled emittances.


In [1]:
#!rm -rf /eos/user/n/nkarast/LHC/2018/FollowUp/pickles/summaryEmittanceGrowthDataFrame_2018_periodB.pkl.gz

In [11]:

infilename = output_folder+summary_filename
#df = pd.read_hdf(infilename, 'df', format='table')
with gzip.open(infilename, 'rb') as fid:
    df = pickle.load(fid)

outfilename = output_folder + growth_filename

# check if another growth file exists:
growthDF_exists = False
if os.path.exists(outfilename):
    print('[WARN] Filename of output file given for growth dataframe exists. Checking if it is up-to-date...')
    growthDF_exists = True
    infilename = output_folder+growth_filename
    #df_tmp_growth = pd.read_hdf(infilename, 'df', format='table')
    with gzip.open(infilename, 'rb') as fid:
        df_tmp_growth = pickle.load(fid)
    fills_existing = np.unique(df_tmp_growth['Fill'].values)
    fills_new = np.unique(df['Fill'].values)
    overlap_fills = list(set(fills_existing.tolist()).intersection(fills_new.tolist()))
    print('#[WARN] The old growth dataframe has already the fills : [{}] dropping them.'.format(overlap_fills))
    df = df[~df['Fill'].isin(overlap_fills)]
    
    if len(df) == 0:
        raise KeyboardInterrupt('[WARN] No new fills to update DF. Exiting...') 

#########
# split dataframe into measurements and model
df_measure = df[(df['Kind']=='Measurement') & (~df['Fill'].isin(blacklist)) ]
df_model   = df[(df['Kind']=='Model')       & (~df['Fill'].isin(blacklist)) ]


# Total growth dataframe
df_growth = pd.DataFrame()

cycle_steps = ['Injection', 'Start Ramp', 'End Ramp', 'Start Stable']

for i_cycle in range(len(cycle_steps)-1):
    for i_beam in ['B1', 'B2']:
        for i_plane in ['Horizontal', 'Vertical']:
            
            if cycle_steps[i_cycle] == 'Injection':
                cycle_tag = 'INJ2FB'
            elif cycle_steps[i_cycle] == 'Start Ramp':
                cycle_tag = 'FB2FT'
            elif cycle_steps[i_cycle] == 'End Ramp':
                cycle_tag = 'FT2SB'           
            
            # create a local copy of the measured dfs
            tmp_df_measure1 = df_measure[(df_measure['Beam']==i_beam) & (df_measure['Plane']==i_plane)& (df_measure['Cycle']==cycle_steps[i_cycle])].copy()
            tmp_df_measure2 = df_measure[(df_measure['Beam']==i_beam) & (df_measure['Plane']==i_plane)& (df_measure['Cycle']==cycle_steps[i_cycle+1])].copy()
            
            dt_meas         = tmp_df_measure2['Time'].values             - tmp_df_measure1['Time'].values
            demit_meas      = tmp_df_measure2['Emittance'].values        - tmp_df_measure1['Emittance'].values
            demit_rel_meas  = ((tmp_df_measure2['Emittance'].values      - tmp_df_measure1['Emittance'].values)/(tmp_df_measure1['Emittance'].values))*100.
            growth_meas     = 3600.*(tmp_df_measure2['Emittance'].values - tmp_df_measure1['Emittance'].values)/dt_meas

            
            
            # create a local copy of the model dfs
            tmp_df_model1   = df_model[(df_model['Beam']==i_beam) & (df_model['Plane']==i_plane)& (df_model['Cycle']==cycle_steps[i_cycle])].copy()
            tmp_df_model2   = df_model[(df_model['Beam']==i_beam) & (df_model['Plane']==i_plane)& (df_model['Cycle']==cycle_steps[i_cycle+1])].copy()
            
            dt_model        = tmp_df_model2['Time'].values             - tmp_df_model1['Time'].values
            demit_model     = tmp_df_model2['Emittance'].values        - tmp_df_model1['Emittance'].values
            demit_rel_model = ((tmp_df_model2['Emittance'].values      - tmp_df_model1['Emittance'].values)/(tmp_df_model1['Emittance'].values))*100.
            growth_model    = 3600.*(tmp_df_model2['Emittance'].values - tmp_df_model1['Emittance'].values)/dt_model

            
            df_tmp = pd.DataFrame()
            df_tmp['Fill']                           = tmp_df_measure1['Fill'] #[i_plane]*len(df_tmp)
            df_tmp['Beam']                           = tmp_df_measure1['Beam'] #[i_beam]*len(df_tmp)
            df_tmp['Plane']                          = tmp_df_measure1['Plane'] #[i_plane]*len(df_tmp)
            df_tmp['Cycle']                          = [cycle_tag]*len(df_tmp)
            df_tmp['Slot']                           = tmp_df_measure1['Slot']
            df_tmp['MeasuredDeltaEmittance']         = demit_meas
            df_tmp['MeasuredDt']                     = dt_meas
            df_tmp['MeasuredRelativeDeltaEmittance'] = demit_rel_meas
            df_tmp['MeasuredEmittanceGrowth']        = growth_meas
            df_tmp['ModelDeltaEmittance']            = demit_model
            df_tmp['ModelDt']                        = dt_model
            df_tmp['ModelRelativeDeltaEmittance']    = demit_rel_model
            df_tmp['ModelEmittanceGrowth']           = growth_model
                        
            df_growth = df_growth.append(df_tmp, ignore_index=True)
            
##### CREATE THE INJ2SB PART:
for i_beam in ['B1', 'B2']:
    for i_plane in ['Horizontal', 'Vertical']:

        cycle_tag = 'INJ2SB'

        # create a local copy of the measured dfs
        tmp_df_measure1  = df_measure[(df_measure['Beam']==i_beam) & (df_measure['Plane']==i_plane)& (df_measure['Cycle']=='Injection')].copy()
        tmp_df_measure2  = df_measure[(df_measure['Beam']==i_beam) & (df_measure['Plane']==i_plane)& (df_measure['Cycle']=='Start Stable')].copy()

        dt_meas          = tmp_df_measure2['Time'].values             - tmp_df_measure1['Time'].values
        demit_meas       = tmp_df_measure2['Emittance'].values        - tmp_df_measure1['Emittance'].values
        demit_rel_meas   = ((tmp_df_measure2['Emittance'].values      - tmp_df_measure1['Emittance'].values)/(tmp_df_measure1['Emittance'].values))*100.
        growth_meas      = 3600.*(tmp_df_measure2['Emittance'].values - tmp_df_measure1['Emittance'].values)/dt_meas
        bunches_meas     = tmp_df_measure2['Bunches']


        # create a local copy of the model dfs
        tmp_df_model1    = df_model[(df_model['Beam']==i_beam) & (df_model['Plane']==i_plane)& (df_model['Cycle']=='Injection')].copy()
        tmp_df_model2    = df_model[(df_model['Beam']==i_beam) & (df_model['Plane']==i_plane)& (df_model['Cycle']=='Start Stable')].copy()

        dt_model         = tmp_df_model2['Time'].values             - tmp_df_model1['Time'].values
        demit_model      = tmp_df_model2['Emittance'].values        - tmp_df_model1['Emittance'].values
        demit_rel_model  = ((tmp_df_model2['Emittance'].values      - tmp_df_model1['Emittance'].values)/(tmp_df_model1['Emittance'].values))*100.
        growth_model     = 3600.*(tmp_df_model2['Emittance'].values - tmp_df_model1['Emittance'].values)/dt_model


        df_tmp = pd.DataFrame()
        df_tmp['Fill']                           = tmp_df_measure1['Fill'] #[i_plane]*len(df_tmp)
        df_tmp['Beam']                           = tmp_df_measure1['Beam'] #[i_beam]*len(df_tmp)
        df_tmp['Plane']                          = tmp_df_measure1['Plane'] #[i_plane]*len(df_tmp)
        df_tmp['Cycle']                          = [cycle_tag]*len(df_tmp)
        df_tmp['Slot']                           = tmp_df_measure1['Slot']

        df_tmp['MeasuredDeltaEmittance']         = demit_meas
        df_tmp['MeasuredDt']                     = dt_meas
        df_tmp['MeasuredRelativeDeltaEmittance'] = demit_rel_meas
        df_tmp['MeasuredEmittanceGrowth']        = growth_meas

        df_tmp['ModelDeltaEmittance']            = demit_model
        df_tmp['ModelDt']                        = dt_model
        df_tmp['ModelRelativeDeltaEmittance']    = demit_rel_model
        df_tmp['ModelEmittanceGrowth']           = growth_model

        df_growth = df_growth.append(df_tmp, ignore_index=True)
        df_growth['Bunches'] = df_growth['Fill'].apply(lambda x: getBunches(x))

# kill all nan/inf 
df_growth = df_growth.replace([np.inf, -np.inf], np.nan)
df_growth = df_growth[df_growth['MeasuredDt']>=5.0*60].dropna(axis=0, how='any')


# save
#df_growth.to_hdf(outfilename, 'df', format='table')
if growthDF_exists:
    with gzip.open(outfilename, "rb") as fid:
        df_growth_old = pickle.load(fid)
    df_growth_total = df_growth_old.append(df_growth, ignore_index=True)
    df_growth = df_growth_total

with gzip.open(outfilename, 'wb') as fid:
    pickle.dump(df_growth, fid)
print('done')

done
