# Clean DataFrames
~ N. Karastathis (2017)

Well data are always messy... To reduce the tails of the distributions of emittances, the dataframes created by the `createDataFrames.ipynb` are then cleaned based on the 2% quantile level.

In [1]:
import numpy as np
import pandas as pd  # you need pyTables if you're working with HDFS
import gzip
import pickle

In [7]:
FILEDIR    = '/eos/user/n/nkarast/LHC/2018/FollowUp/pickles/'
insummary  = FILEDIR+'summaryEmittanceDataFrame.pkl'
outsummary = FILEDIR+'cleanSummaryEmittanceDataFrame.pkl'

In [5]:
#########  LOAD THE PICKLE or H5 FILE 
#df = pd.read_hdf(insummary, 'df')
df = pd.read_pickle(insummary)

In [8]:
df = df[~df['Fill'].isin(blacklist)]

In [9]:
clean_df = pd.DataFrame()

# Define the quantiles (I take here that the good data are between the 2% and 98% quantile.)
q1 = 0.02
q2 = 0.98

for cycle_step in ['Injection', 'Start Ramp', 'End Ramp', 'Start Stable']:
    print 'Working at {}'.format(cycle_step)
    
    #########  BASIC SELECTION
    df2_measurement = df[(df['Cycle']==cycle_step) & (df['Kind']=='Measurement') ]
    df2_model       = df[(df['Cycle']==cycle_step) & (df['Kind']=='Model') ]
    
    for filln in flist:
        print '-> Fill ', filln
        
        print '---> Cleaning measured stuff...'
        # B1H
        df_fill_1h_m = df2_measurement[(df2_measurement['Fill']==filln) & (df2_measurement['Beam']=='B1') & (df2_measurement['Plane']=='Horizontal')]
        clean_df     = clean_df.append(df_fill_1h_m[(df_fill_1h_m['Emittance']>df_fill_1h_m['Emittance'].quantile(q1)) & (df_fill_1h_m['Emittance']<df_fill_1h_m['Emittance'].quantile(q2))] )

        # B1V
        df_fill_1v_m   = df2_measurement[(df2_measurement['Fill']==filln) & (df2_measurement['Beam']=='B1') & (df2_measurement['Plane']=='Vertical')]
        clean_df     = clean_df.append(df_fill_1v_m[(df_fill_1v_m['Emittance']>df_fill_1v_m['Emittance'].quantile(q1)) & (df_fill_1v_m['Emittance']<df_fill_1v_m['Emittance'].quantile(q2))] )

        # B2H
        df_fill_2h_m    = df2_measurement[(df2_measurement['Fill']==filln) & (df2_measurement['Beam']=='B2') & (df2_measurement['Plane']=='Horizontal')]
        clean_df = clean_df.append(df_fill_2h_m[(df_fill_2h_m['Emittance']>df_fill_2h_m['Emittance'].quantile(q1)) & (df_fill_2h_m['Emittance']<df_fill_2h_m['Emittance'].quantile(q2))] )

        # B2V
        df_fill_2v_m = df2_measurement[(df2_measurement['Fill']==filln) & (df2_measurement['Beam']=='B2') & (df2_measurement['Plane']=='Vertical')]
        clean_df = clean_df.append(df_fill_2v_m[(df_fill_2v_m['Emittance']>df_fill_2v_m['Emittance'].quantile(q1)) & (df_fill_2v_m['Emittance']<df_fill_2v_m['Emittance'].quantile(q2))] )

    
#------------------------        ------------------------      ------------------------       ------------------------       ------------------------ 
        
        print '---> Cleaning model stuff...'
        # B1H
        df_fill_1h_mod = df2_model[(df2_model['Fill']==filln) & (df2_model['Beam']=='B1') & (df2_model['Plane']=='Horizontal')]
        clean_df     = clean_df.append(df_fill_1h_mod[(df_fill_1h_m['Emittance']>df_fill_1h_mod['Emittance'].quantile(q1)) & (df_fill_1h_mod['Emittance']<df_fill_1h_mod['Emittance'].quantile(q2))] )

        # B1V
        df_fill_1v_mod   = df2_model[(df2_model['Fill']==filln) & (df2_model['Beam']=='B1') & (df2_model['Plane']=='Vertical')]
        clean_df     = clean_df.append(df_fill_1v_mod[(df_fill_1v_mod['Emittance']>df_fill_1v_mod['Emittance'].quantile(q1)) & (df_fill_1v_mod['Emittance']<df_fill_1v_mod['Emittance'].quantile(q2))] )

        # B2H
        df_fill_2h_mod    = df2_model[(df2_model['Fill']==filln) & (df2_model['Beam']=='B2') & (df2_model['Plane']=='Horizontal')]
        clean_df = clean_df.append(df_fill_2h_mod[(df_fill_2h_mod['Emittance']>df_fill_2h_mod['Emittance'].quantile(q1)) & (df_fill_2h_mod['Emittance']<df_fill_2h_mod['Emittance'].quantile(q2))] )

        # B2V
        df_fill_2v_mod = df2_model[(df2_model['Fill']==filln) & (df2_model['Beam']=='B2') & (df2_model['Plane']=='Vertical')]
        clean_df = clean_df.append(df_fill_2v_mod[(df_fill_2v_mod['Emittance']>df_fill_2v_mod['Emittance'].quantile(q1)) & (df_fill_2v_mod['Emittance']<df_fill_2v_mod['Emittance'].quantile(q2))] )

        
        
        
print '---> Done'


# Storing the clean df into an H5 file.
clean_df.to_hdf(outsummary, 'df', format='table')

Working at Injection
-> Fill  5837
---> Cleaning measured stuff...
---> Cleaning model stuff...
-> Fill  5838
---> Cleaning measured stuff...
---> Cleaning model stuff...
-> Fill  5839
---> Cleaning measured stuff...
---> Cleaning model stuff...
-> Fill  5842
---> Cleaning measured stuff...
---> Cleaning model stuff...
-> Fill  5845
---> Cleaning measured stuff...
---> Cleaning model stuff...
-> Fill  5848
---> Cleaning measured stuff...
---> Cleaning model stuff...
-> Fill  5849
---> Cleaning measured stuff...
---> Cleaning model stuff...
-> Fill  5856
---> Cleaning measured stuff...
---> Cleaning model stuff...
-> Fill  5862
---> Cleaning measured stuff...
---> Cleaning model stuff...
-> Fill  5864
---> Cleaning measured stuff...
---> Cleaning model stuff...
-> Fill  5865
---> Cleaning measured stuff...
---> Cleaning model stuff...
-> Fill  5868
---> Cleaning measured stuff...
---> Cleaning model stuff...
-> Fill  5873
---> Cleaning measured stuff...
---> Cleaning model stuff...
-> F



-> Fill  5966
---> Cleaning measured stuff...
---> Cleaning model stuff...
-> Fill  5971
---> Cleaning measured stuff...
---> Cleaning model stuff...
-> Fill  5974
---> Cleaning measured stuff...
---> Cleaning model stuff...
-> Fill  5976
---> Cleaning measured stuff...
---> Cleaning model stuff...
-> Fill  5979
---> Cleaning measured stuff...
---> Cleaning model stuff...
-> Fill  5984
---> Cleaning measured stuff...
---> Cleaning model stuff...
-> Fill  6020
---> Cleaning measured stuff...
---> Cleaning model stuff...
-> Fill  6021
---> Cleaning measured stuff...
---> Cleaning model stuff...
-> Fill  6024
---> Cleaning measured stuff...
---> Cleaning model stuff...
-> Fill  6026
---> Cleaning measured stuff...
---> Cleaning model stuff...
-> Fill  6030
---> Cleaning measured stuff...
---> Cleaning model stuff...
-> Fill  6031
---> Cleaning measured stuff...
---> Cleaning model stuff...
-> Fill  6035
---> Cleaning measured stuff...
---> Cleaning model stuff...
-> Fill  6041
---> Cleani