In [1]:
def zeros(shape):
    retval = []
    for x in range(shape[0]):
        retval.append([])
        for y in range(shape[1]):
            retval[-1].append(0)
    return retval

match_award      = 20
mismatch_penalty = -1000000
gap_penalty      = -5 # both for opening and extanding

def match_score(alpha, beta):
    if alpha == beta:
        return match_award
    elif alpha == '-' or beta == '-':
        return gap_penalty
    else:
        return mismatch_penalty

def align(pipeline, subhyperpipeline):
    seq1, meta1 = pipeline 
    seq2, meta2 = subhyperpipeline
    "seq2 is the longer one with multiple keys in the dictionary"
    seq1.reverse()    #reverse sequence 1
    seq2.reverse()    #reverse sequence 2 
    meta1.reverse()
    meta2.reverse()
 
    m, n = len(seq1), len(seq2)  # length of two sequences
    
    # Generate DP table and traceback path pointer matrix
    score = zeros((m+1, n+1))      # the DP table
   
    # Calculate DP table
    for i in range(0, m + 1):
        score[i][0] = gap_penalty * i
    for j in range(0, n + 1):
        score[0][j] = gap_penalty * j
    for i in range(1, m + 1):
        for j in range(1, n + 1):
            match = score[i - 1][j - 1] + match_score(seq1[i-1], seq2[j-1])
            delete = score[i - 1][j] + gap_penalty
            insert = score[i][j - 1] + gap_penalty
            score[i][j] = max(match, delete, insert)

    # Traceback and compute the alignment 
    join = []
    joined_meta = []
    
    i,j = m,n # start from the bottom right cell
    while i > 0 and j > 0: # end toching the top or the left edge
        score_current = score[i][j]
        score_diagonal = score[i-1][j-1]
        score_up = score[i][j-1]
        score_left = score[i-1][j]

        if score_current == score_diagonal + match_score(seq1[i-1], seq2[j-1]):
            join.append(seq1[i-1])
            
            temp_dict = meta2[j-1]
            meta1_key = list(meta1[i-1].keys())[0]
            temp_dict[meta1_key] = meta1[i-1][meta1_key]
            joined_meta.append(temp_dict)
            
            i -= 1
            j -= 1
        elif score_current == score_left + gap_penalty:
            join.append(seq1[i-1])
            
            temp_dict = dict.fromkeys(list(meta2[0].keys()), getFPI("")) 
            #keys are the same for all the dictionaries in meta2, so get keys from the 0th one
            meta1_key = list(meta1[i-1].keys())[0]
            temp_dict[meta1_key] = meta1[i-1][meta1_key]
            joined_meta.append(temp_dict)
            
            i -= 1
        elif score_current == score_up + gap_penalty:
            join.append(seq2[j-1])
            
            temp_dict = meta2[j-1]
            meta1_key = list(meta1[i-1].keys())[0]
            temp_dict[meta1_key] = getFPI("")
            joined_meta.append(temp_dict)
        
            j -= 1


    # Finish tracing up to the top left cell
    while i > 0:
        join.append(seq1[i-1])
        
        temp_dict = dict.fromkeys(list(meta2[0].keys()), getFPI("")) 
        #keys are the same for all the dictionaries in meta2, so get keys from the 0th one
        meta1_key = list(meta1[i-1].keys())[0]
        temp_dict[meta1_key] = meta1[i-1][meta1_key]
        joined_meta.append(temp_dict)
        
        i -= 1
    while j > 0:
        join.append(seq2[j-1])
        
        temp_dict = meta2[j-1]
        meta1_key = list(meta1[i-1].keys())[0]
        temp_dict[meta1_key] = getFPI("")
        joined_meta.append(temp_dict)
        
        j -= 1    
        
    return [join, joined_meta]

In [2]:
def getFPI(fpi):
    if fpi == "":
        return dict.fromkeys(functionPropertiesNames, np.nan)
    
    fpiList = fpi.strip().split('\n')
    fpiDict = {}
    for fp in fpiList:
        fp = fp.strip().split(':')
        fpiDict[fp[0].strip()] = fp[1].strip()

    return fpiDict

def alignHyperPipeline(buffer):
    """
    buffer = { 
                fnA : [ [passA1, passA2, ...., passAN],
                        [{A: fpiA1},  {A: fpiA2},  ...., {A: fpiAN} ] ],
                fnB : [ [passB1, passB2, ...., passBM],
                        [{B: fpiB1},  {B: fpiB2},  ...., {B: fpiBN} ] ]
             }

    subHyperPipeline = [
                            [pass1, pass2, ...., passN],
                            [   {fnA: fpiA1, fnB: fpiB1},
                                {fnA: fpiA2, fnB: fpiB2},
                                ....
                                {fnA: fpiAN, fnB: fpiBN}
                            ]
                       ]
    """
    """print("*"*10," buffer ", "*"*10)
    print(buffer)"""
    _, maxKey= max((len(v), k) for k,v in buffer.items())
    subHyperPipeline = buffer.pop(maxKey)
    
    for key in buffer:
        subHyperPipeline = align(buffer[key], subHyperPipeline)
       
    """
    print("*"*10," subHyperPipeline ", "*"*10)
    print(subHyperPipeline)
    print()"""

    if len(subHyperPipeline[0]) == len(subHyperPipeline[1]):
        return subHyperPipeline
    else: 
        sys.exit("ERROR!!!! FPI MISSING. FIX IT")
        
def getHyperPassPipeline(rawDataList):
    hyperPassPipeline = [[],[]]
    buffer = {}

    for i in range(len(rawDataList)):
        passName = rawDataList[i][1]
        functionName = rawDataList[i][2]
        functionProperties = {functionName: getFPI(rawDataList[i][3])}

        """
        Ignore llvm.lifetim~ functions.
        """
        if functionName.startswith('llvm.lifetime.'):
            continue

        if functionName not in buffer:
            buffer[functionName] = [[],[]]
        buffer[functionName][0].append(passName)
        buffer[functionName][1].append(functionProperties)

    hyperPassPipeline = alignHyperPipeline(buffer)
        
    return hyperPassPipeline

In [3]:
def getDataFrame(hyperPassPipelineDict):
    functionPropertiesNames = ['BasicBlockCount', 'BlocksReachedFromConditionalInstruction',
                               'Uses', 'DirectCallsToDefinedFunctions', 'LoadInstCount', 
                               'StoreInstCount', 'MaxLoopDepth', 'TopLevelLoopCount']

    cols = []
    data = []
    fnNameIndexPointer = {}

    cols.append(('Pass Number','Function Name'))

    for passNo, fpiSet in hyperPassPipelineDict.items():
        for fp in functionPropertiesNames:
            cols.append((passNo, fp))
        for fnName, fpi in fpiSet.items():
            if passNo == 0:
                data.append([fnName])
                fnNameIndexPointer[fnName] = len(data)-1
            for _, value in fpi.items():
                data[fnNameIndexPointer[fnName]].append(value)

    return pd.DataFrame(data, columns=pd.MultiIndex.from_tuples(cols))

In [4]:
import sys
import numpy as np
    
fileName = 'test.c.txt'
functionPropertiesNames = ['BasicBlockCount', 'BlocksReachedFromConditionalInstruction',
                           'Uses', 'DirectCallsToDefinedFunctions', 'LoadInstCount', 
                           'StoreInstCount', 'MaxLoopDepth', 'TopLevelLoopCount']
numFunctionProperties = len(functionPropertiesNames)
    
with open(fileName) as inFile:
    rawData = inFile.read()

rawDataList = list(map(str.strip, rawData.strip().split('***')))[1:]

for i in range(len(rawDataList)):
    rawDataList[i] = list(map(str.strip, rawDataList[i].strip().split('#')))

In [5]:
hyperPassPipeline = getHyperPassPipeline(rawDataList)

In [6]:
hyperPassPipelineDict = dict(zip(list(range(len(hyperPassPipeline[0]))), hyperPassPipeline[1]))

In [16]:
passPipelineDF = getDataFrame(hyperPassPipelineDict)

In [17]:
passPipelineDF

Unnamed: 0_level_0,Pass Number,0,0,0,0,0,0,0,0,1,...,134,134,135,135,135,135,135,135,135,135
Unnamed: 0_level_1,Function Name,BasicBlockCount,BlocksReachedFromConditionalInstruction,Uses,DirectCallsToDefinedFunctions,LoadInstCount,StoreInstCount,MaxLoopDepth,TopLevelLoopCount,BasicBlockCount,...,MaxLoopDepth,TopLevelLoopCount,BasicBlockCount,BlocksReachedFromConditionalInstruction,Uses,DirectCallsToDefinedFunctions,LoadInstCount,StoreInstCount,MaxLoopDepth,TopLevelLoopCount
0,multiply,,,,,,,,,13,...,,,,,,,,,,
1,main,,,,,,,,,1,...,,,,,,,,,,


In [9]:
import pandas as pd

d = {"key1":[0.], "key2":{"a":[1.],"b":[2.],"c":[3.]}, "key3":[4.]}

cols, data = [], []
for k, v in d.items():
    if not isinstance(v, dict):
        cols.append((k, 0))
        data.append(v)
    else:
        for k2, v2 in v.items():
            cols.append((k, k2))
            data.append(v2)

print(cols)

df = pd.DataFrame(list(zip(*data)), columns=pd.MultiIndex.from_tuples(cols))
df

[('key1', 0), ('key2', 'a'), ('key2', 'b'), ('key2', 'c'), ('key3', 0)]


Unnamed: 0_level_0,key1,key2,key2,key2,key3
Unnamed: 0_level_1,0,a,b,c,0
0,0.0,1.0,2.0,3.0,4.0


In [10]:
list(zip(*data))

[(0.0, 1.0, 2.0, 3.0, 4.0)]

In [11]:
dataTest = [(0,1,2,3,4), (0,1,2,3,4)]
df = pd.DataFrame(dataTest, columns=pd.MultiIndex.from_tuples(cols))
df

Unnamed: 0_level_0,key1,key2,key2,key2,key3
Unnamed: 0_level_1,0,a,b,c,0
0,0,1,2,3,4
1,0,1,2,3,4


In [12]:
functionPropertiesNames = ['BasicBlockCount', 'BlocksReachedFromConditionalInstruction',
                           'Uses', 'DirectCallsToDefinedFunctions', 'LoadInstCount', 
                           'StoreInstCount', 'MaxLoopDepth', 'TopLevelLoopCount']

passNo = 0
cols = []
cols.append(('Pass Number','Function Name'))
for fpi in functionPropertiesNames:
    cols.append((passNo, fpi))

print(cols)

[('Pass Number', 'Function Name'), (0, 'BasicBlockCount'), (0, 'BlocksReachedFromConditionalInstruction'), (0, 'Uses'), (0, 'DirectCallsToDefinedFunctions'), (0, 'LoadInstCount'), (0, 'StoreInstCount'), (0, 'MaxLoopDepth'), (0, 'TopLevelLoopCount')]


In [13]:
dataTest = [["main",0,1,2,3,1,2,3,4], ["mult",0,1,2,3,1,2,3,4]]
df = pd.DataFrame(dataTest, columns=pd.MultiIndex.from_tuples(cols))
df

Unnamed: 0_level_0,Pass Number,0,0,0,0,0,0,0,0
Unnamed: 0_level_1,Function Name,BasicBlockCount,BlocksReachedFromConditionalInstruction,Uses,DirectCallsToDefinedFunctions,LoadInstCount,StoreInstCount,MaxLoopDepth,TopLevelLoopCount
0,main,0,1,2,3,1,2,3,4
1,mult,0,1,2,3,1,2,3,4


In [14]:
functionPropertiesNames = ['BasicBlockCount', 'BlocksReachedFromConditionalInstruction',
                           'Uses', 'DirectCallsToDefinedFunctions', 'LoadInstCount', 
                           'StoreInstCount', 'MaxLoopDepth', 'TopLevelLoopCount']

cols = []
data = []
fnNameIndexPointer = {}

cols.append(('Pass Number','Function Name'))

for passNo, fpiSet in hyperPassPipelineDict.items():
    for fp in functionPropertiesNames:
        cols.append((passNo, fp))
    for fnName, fpi in fpiSet.items():
        if passNo == 0:
            data.append([fnName])
            fnNameIndexPointer[fnName] = len(data)-1
        for _, value in fpi.items():
            data[fnNameIndexPointer[fnName]].append(value)

In [15]:
df = pd.DataFrame(data, columns=pd.MultiIndex.from_tuples(cols))
df

Unnamed: 0_level_0,Pass Number,0,0,0,0,0,0,0,0,1,...,134,134,135,135,135,135,135,135,135,135
Unnamed: 0_level_1,Function Name,BasicBlockCount,BlocksReachedFromConditionalInstruction,Uses,DirectCallsToDefinedFunctions,LoadInstCount,StoreInstCount,MaxLoopDepth,TopLevelLoopCount,BasicBlockCount,...,MaxLoopDepth,TopLevelLoopCount,BasicBlockCount,BlocksReachedFromConditionalInstruction,Uses,DirectCallsToDefinedFunctions,LoadInstCount,StoreInstCount,MaxLoopDepth,TopLevelLoopCount
0,multiply,,,,,,,,,13,...,,,,,,,,,,
1,main,,,,,,,,,1,...,,,,,,,,,,
