In [1]:
import numpy as np
import pandas as pd
import os
import re

startRampPath = 'data/BLM/start_ramp/'
startAdjustPath = 'data/BLM/start_adjust/'
startSqueezePath = 'data/BLM/start_squeeze/'
flatTopPath = 'data/BLM/flat_top/'

In [2]:
def getMaxLossRow(fill):
    """
    Extract from the fill the row with the maximum loss.
    
    Parameters
        fill : DataFrame -- DataFrame loaded from the BLM data fill file, without timestamp column
    Returns
        DataFrame with row with the max BLM value in the provided data
    """
    maxes = fill.max(axis=1)  # row index -> max for that row
    maxOverallIdx = maxes.idxmax() # row index where max overall in fill can be found
    
    return fill.loc[maxOverallIdx] # row containing max overall

In [3]:
def getFillFilenames(phasePath):
    files = sorted(os.listdir(phasePath))
    files = [file for file in files if file.endswith('.txt')] # keep only text files - filter out files such as desktop.ini
    
    return files

In [4]:
def getMergedPhaseLosses(phasePath, maxFills):
    """
        In each fill file in phasePath (up to maxFills) the "max loss row" is found (i.e. searching for the highest 
        BLM value registered and taking the row within which it occurs).  This row is appended to a DataFrame.  After this task
        is completed, B1 BLM and B2 BLM columns of interest (symmetric BLMs) are filtered out.  A new DataFrame is created whose 
        columns are "logical" BLM names - each representing a pair of symmetric BLMs.  The rows are therefore hierachically 
        indexed by beam number (B1 or B2) and fill number.
    Parameters
        phasePath : string - path containing fill files for a particular phase
        maxFills : int - maximum number of fills to process.  -1 to process all fill files.
    Returns
        DataFrame hierarchically indexed by fill number and beam number (B1 or B2); columns are logical BLM names.  Each row is
        the max loss row taken from the corresponding fill file. eg row B1, 7207 and row B2, 7207 both originate from
        the same row in fill 7207 - the max loss row.  It was split into two separate rows differing by beam index (B1 and B2)
        so that symmetric B1 BLMs and B2 BLMs could be merged under the same logical BLM column names.  
    """
    filenames = getFillFilenames(phasePath)
    phaseLosses = pd.DataFrame() # in each fill the row with the highest loss is chosen and appended to this DataFrame
    maxLossRows = []
    prevFillNo = -1 # used to assert that filenames are in chronological order of fill number
    for i, filename in enumerate(filenames):
        if i == maxFills:
            break

        # load fill file into a DataFrame
        fill = pd.read_csv(phasePath + filename, sep=' ', header=None)
        # Delete first column (contains timestamps)
        fill = fill.drop(columns=0)
        # Extract the fill number from the filename (used eventually as the key for the row extracted from this fill)
        fillNo = re.search(r'\d+', filename).group()
        assert int(fillNo) > prevFillNo, "Fills not loaded in chronological order!  Fill " + fillNo + " came after " + str(prevFillNo)

        # extract from the fill the row with the maximum loss
        rowWithMaxLoss = getMaxLossRow(fill)
        rowWithMaxLoss.name = fillNo
        maxLossRows.append(rowWithMaxLoss)
        prevFillNo = int(fillNo)
        
    phaseLosses = phaseLosses.append(maxLossRows)
        
    blmLabels = np.genfromtxt('data/blm_labels.txt', dtype='str')
    beam1BlmLabels = np.genfromtxt('data/beam1_blm_labels.txt', dtype='str')
    beam2BlmLabels = np.genfromtxt('data/beam2_blm_labels.txt', dtype='str')
    # NB: Each BLM in beam1BlmLabels corresponds row by row to its symmetric BLM in  beam2BlmLabels
    logicalBlmLabels = np.genfromtxt('data/logical_blm_labels.txt', dtype='str')
    assert len(beam1BlmLabels) == len(beam2BlmLabels) == len(logicalBlmLabels)

    phaseLosses.columns = blmLabels

    # Separate beam 1 losses and beam 2 losses
    beam1PhaseLosses = phaseLosses.filter(items=beam1BlmLabels); 
    beam2PhaseLosses = phaseLosses.filter(items=beam2BlmLabels);

    # merge beam 1 losses and beam 2 losses under the same logical BLM labels
    beam1PhaseLosses.columns = logicalBlmLabels 
    beam2PhaseLosses.columns = logicalBlmLabels
    mergedPhaseLosses = pd.concat([beam1PhaseLosses, beam2PhaseLosses], keys=['B1', 'B2'])
    
    return mergedPhaseLosses

In [5]:
maxFills = -1
startRampLosses = getMergedPhaseLosses(startRampPath, maxFills)
startAdjustLosses = getMergedPhaseLosses(startAdjustPath, maxFills)
startSqueezeLosses = getMergedPhaseLosses(startSqueezePath, maxFills)
flatTopLosses = getMergedPhaseLosses(flatTopPath, maxFills)

In [6]:
# save the DataFrames

startRampLosses.to_pickle("data/pickles/startRampLosses.pkl")
startAdjustLosses.to_pickle("data/pickles/startAdjustLosses.pkl")
startSqueezeLosses.to_pickle("data/pickles/startSqueezeLosses.pkl")
flatTopLosses.to_pickle("data/pickles/flatTopLosses.pkl")