In [9]:
import json
import numpy as np
import pandas as pd
import regex

In [10]:
def get_tracing(path):
    with open(path, 'r') as f:
        json_data = json.load(f)
    columns=["Thread","Time","Event"]
    result_df=pd.DataFrame(columns=columns)
    def _get_df_row(term):
        df_row={}
        if term["name"] == "Initialize" and term["pid"] == "All":
            df_row["Thread"]= term["pid"]
            df_row["Time"]= term["ts"]
            df_row["Event"]= "Start"
        elif term["name"] == "Finalize" and term["pid"] == "All":
            df_row["Thread"]= term["pid"]
            df_row["Time"]= term["ts"]
            df_row["Event"]= "End"
        elif term["tid"] == "Parallel Region" and term["pid"] == "All":
            df_row["Thread"]= term["pid"]
            df_row["Time"]= term["ts"]
            df_row["Event"]= term["tid"]
        else:
            thread_id = int(term["pid"].split(" ")[1])
            df_row["Thread"]= thread_id
            df_row["Time"]= term["ts"]
            df_row["Event"]= term["tid"]
        return df_row
    for term,i in zip(json_data,range(len(json_data))):
        result_df.loc[i]=_get_df_row(term)
    return result_df

In [11]:
def thread_ratio(df):
    df=df[df["Event"]=="Thread Region"]
    df = df.sort_values(by=["Thread","Time"])
    df = df.reset_index(drop=True)
    num_threads=np.sort(np.int64(df["Thread"].unique()))
    def _thread_time(df,i):
        tmp_df=df[df["Thread"]==i]
        tmp_df.reset_index(drop=True)
        tmp_time=0
        assert len(tmp_df)%2==0
        for j in range(0,len(tmp_df),2):
            tmp_time+=tmp_df.iloc[j+1]["Time"]-tmp_df.iloc[j]["Time"]
        return tmp_time
    seq_time=_thread_time(df,0)
    worker_time=[]
    for i in num_threads[1:]:
        worker_time.append(_thread_time(df,i))
    return max(worker_time)/seq_time

In [12]:
def sync_ratio(df):
    run_time=df[df["Event"]=="End"]["Time"].values[0]-df[df["Event"]=="Start"]["Time"].values[0]
    df=df[df["Event"]=="Sync Region"]
    df = df.sort_values(by=["Thread","Time"])
    df = df.reset_index(drop=True)
    num_threads=np.sort(np.int64(df["Thread"].unique()))
    def _thread_time(df,i):
        tmp_df=df[df["Thread"]==i]
        tmp_df.reset_index(drop=True)
        tmp_time=0
        assert len(tmp_df)%2==0
        for j in range(0,len(tmp_df),2):
            tmp_time+=tmp_df.iloc[j+1]["Time"]-tmp_df.iloc[j]["Time"]
        return tmp_time
    sync_time=[]
    for i in num_threads:
        sync_time.append(_thread_time(df,i))
    return max(sync_time)/run_time

In [13]:
def parallel_region_ratio(df):
    run_time=df[df["Event"]=="End"]["Time"].values[0]-df[df["Event"]=="Start"]["Time"].values[0]
    tmp_df=df[(df["Thread"]=="All") & (df["Event"]=="Parallel Region")]
    tmp_df=tmp_df.sort_values(by=["Time"])
    assert(tmp_df.shape[0]%2==0)
    parallel_time=0
    for i in range(0,tmp_df.shape[0],2):
        start_time=tmp_df.iloc[i]["Time"]
        end_time=tmp_df.iloc[i+1]["Time"]
        parallel_time+=end_time-start_time
    return parallel_time/run_time

In [14]:
def get_valgrind(path):
    # path="/home/yiwei/Bottleneck-Predictions-of-Parallel-Programs/valgrind_sample.txt"
    logfile=open(path,"r")
    line_list=logfile.readlines()
    line_len=len(line_list)
    columns=["Ir","I1mr","ILmr","Dr","D1mr","DLmr","Dw","D1mw","DLmw","Function"]
    df_counts=pd.DataFrame(columns=columns,dtype='object')
    df_ratios=pd.DataFrame(columns=columns,dtype='object')
    # The stats of all data
    head_index=0
    for line_index in range(line_len):
        m=regex.match(r"Ir                 I1mr           ILmr           Dr               D1mr           DLmr           Dw               D1mw           DLmw",line_list[line_index])
        if m!=None:
            head_index=line_index
            break
    assert head_index!=0
    
    def _read_valgrind_line(line):
        tmp_list=regex.split(r"\(|\)| ",line)
        tmp_list = list(filter(None, tmp_list))
        if len(tmp_list)<10:
            return None,None,None
        counts=[]
        ratios=[]
        k=0
        for i in range(9):
            counts.append(int(tmp_list[k].replace(",","")))
            k=k+1
            tmp_ratio=tmp_list[k]
            if tmp_ratio[-1]=="%":
                ratios.append(float(tmp_ratio[:-1])/100)
                k=k+1
            else:
                ratios.append(0.0)
        if len(counts)==0:
            return None,None,None
        func=tmp_list[-1].replace("\n","")
        if len(func)>0 and func[0]==".":
            func=None
        return counts,ratios,func
    
    counts,ratios,func=_read_valgrind_line(line_list[head_index+2])
    df_counts.loc[0]=pd.Series(counts,index=columns[:-1],dtype='object')
    df_counts.loc[0]["Function"]=func
    df_ratios.loc[0]=pd.Series(ratios,index=columns[:-1],dtype='object')
    df_ratios.loc[0]["Function"]=func

    # The stats of functions
    begin_index=head_index+2
    for line_index in range(head_index+2,line_len):
        m=regex.match(r"Ir                 I1mr         ILmr         Dr               D1mr         DLmr         Dw              D1mw           DLmw          file:function",line_list[line_index])
        if m!=None:
            begin_index=line_index
            break
    assert begin_index!=0


    for line_index in range(begin_index+3,line_len):
    # for line_index in [23]:
        counts,ratios,func=_read_valgrind_line(line_list[line_index])
        if counts==None:
            end_index=line_index
            break
        else:
            k=len(df_counts)
            df_counts.loc[k]=pd.Series(counts,index=columns[:-1],dtype='object')
            df_counts.loc[k]["Function"]=func
            df_ratios.loc[k]=pd.Series(ratios,index=columns[:-1],dtype='object')
            df_ratios.loc[k]["Function"]=func
    return df_counts,df_ratios


In [15]:
result=get_tracing("./omptracing_57.json")
print(parallel_region_ratio(result))
print(sync_ratio(result))
print(thread_ratio(result))

0.008937530155114611
0.01680021055586547
0.018196762571825554


In [16]:
df_counts,df_ratios=get_valgrind("./valgrind_sample.txt")