## Data Reading (G5K DATA)

In [26]:
import pandas as pd
import os

csvs_dir='../data/submission2/native/g5k'

col_names=['version','app','class','cpu','threads','metric','value']
lst_metrics=['PAPI-TOT-INS', 
             'PAPI-TOT-CYC',             
             'L2-RQSTS-DEMAND-DATA-RD-HIT', 
             'L2-RQSTS-DEMAND-DATA-RD-MISS', 
             'L2-RQSTS-ALL-PF', 
             'L2-RQSTS-PF-HIT', 
             'L2-RQSTS-PF-MISS', 
             'L2-LINES-OUT-USELESS-HWPREF', 
             'L2-LINES-OUT-USELESS-HWPF', 
             'OFFCORE-RESPONSE-0-PF-L2-DATA-RD-ANY-RESPONSE', 
             'OFFCORE-RESPONSE-0-PF-L2-DATA-RD-L3-HIT-SNP-ANY', 
             'OFFCORE-RESPONSE-0-PF-L2-DATA-RD-L3-HIT-SNP-HITM', 
             'OFFCORE-RESPONSE-0-PF-L2-RFO-ANY-RESPONSE', 
             'OFFCORE-RESPONSE-0-PF-L2-RFO-L3-HIT-SNP-ANY', 
             'OFFCORE-RESPONSE-0-PF-L2-RFO-L3-HIT-SNP-HITM', 
             'OFFCORE-REQUESTS-DEMAND-DATA-RD', 
             'OFFCORE-REQUESTS-ALL-DATA-RD', 
             'OFFCORE-REQUESTS-L3-MISS-DEMAND-DATA-RD']

lst_csv_dfs=[]

for csv_filename in os.listdir(csvs_dir):
    if os.path.isdir(csvs_dir+'/'+csv_filename)==True:
        continue
    lst_csv_dfs.append(pd.read_csv(csvs_dir+'/'+csv_filename, names=col_names))

df_all_g5k=pd.concat(lst_csv_dfs)
df_all_g5k=df_all_g5k.sort_values(['version','app','class','threads','cpu'])
                          
df_all_g5k


Unnamed: 0,version,app,class,cpu,threads,metric,value
775,native-native-L1,bt,A,0,1,OFFCORE-REQUESTS-ALL-DATA-RD,17645340
870,native-native-L1,bt,A,0,1,OFFCORE-RESPONSE-0-PF-L2-DATA-RD-ANY-RESPONSE,0
1211,native-native-L1,bt,A,0,1,L2-LINES-OUT-USELESS-HWPF,0
3031,native-native-L1,bt,A,0,1,L2-RQSTS-DEMAND-DATA-RD-HIT,17459554
4961,native-native-L1,bt,A,0,1,PAPI-TOT-CYC,2610928444
...,...,...,...,...,...,...,...
11811,native-native-none,ua,W,11,12,OFFCORE-REQUESTS-ALL-DATA-RD,1593721
12073,native-native-none,ua,W,11,12,PAPI-TOT-INS,2699355750
13321,native-native-none,ua,W,11,12,OFFCORE-RESPONSE-0-PF-L2-DATA-RD-L3-HIT-SNP-ANY,0
13456,native-native-none,ua,W,11,12,OFFCORE-REQUESTS-DEMAND-DATA-RD,1559163


## Data Preprocessing (G5K DATA)

In [27]:
## removing the double native in the version value
import re
pattern='native-(native-.*)'

def remove_native(row):
    res=re.search(pattern,row)
    return res.group(1)

df_all_g5k.loc[:,'version']=df_all_g5k['version'].apply(lambda row: remove_native(row))

######## Computing IPC###########################
df_cycles=df_all_g5k.loc[df_all_g5k['metric']=='PAPI-TOT-CYC']
df_instructions=df_all_g5k.loc[df_all_g5k['metric']=='PAPI-TOT-INS']

cycles_mean_per_threads=df_cycles.groupby(['version','app','class','cpu','threads']).mean().reset_index()

df_processed_g5k=cycles_mean_per_threads[['version','app','class','cpu','threads']].copy()
df_processed_g5k['MEAN_CYC']=cycles_mean_per_threads['value']

instr_mean_per_threads=df_instructions.groupby(['version','app','class','cpu','threads']).mean().reset_index()
df_processed_g5k['MEAN_INS']=instr_mean_per_threads['value']

df_processed_g5k['IPC']=df_processed_g5k['MEAN_INS']/df_processed_g5k['MEAN_CYC']
######## End Computing IPC ######################


### FILTERING CORE 1 TO TEST (DUE TO G5K DAEMON THREAD INSERTION)
### IT IS NOT GUARANTEED THAT CORE 1 IS ACTUALLY THE DAEMON THREAD OF ALL APPS
### THOUGH IT SEEMS TO BE THE CASE DOR CG
### WE CALL THIS DAEMON THREAD AS 'thread-G5K'

df_processed_g5k=df_processed_g5k.loc[df_processed_g5k['cpu']!=1]

### END FILLTERING CORE 1 TO TEST (DUE TO G5K DEAMON THREAD INSERTION)


df_processed_g5k

Unnamed: 0,version,app,class,cpu,threads,MEAN_CYC,MEAN_INS,IPC
0,native-L1,bt,A,0,1,2.616812e+09,7.291827e+09,2.786531
1,native-L1,bt,A,0,2,2.632047e+09,7.526421e+09,2.859531
2,native-L1,bt,A,0,4,2.642294e+09,7.380510e+09,2.793220
3,native-L1,bt,A,0,8,2.643249e+09,7.211175e+09,2.728149
4,native-L1,bt,A,0,12,2.630648e+09,7.202715e+09,2.738001
...,...,...,...,...,...,...,...,...
2911,native-none,ua,W,7,12,2.243831e+09,2.699117e+09,1.202906
2912,native-none,ua,W,8,12,2.243840e+09,2.699123e+09,1.202903
2913,native-none,ua,W,9,12,2.243863e+09,2.699128e+09,1.202894
2914,native-none,ua,W,10,12,2.243871e+09,2.699134e+09,1.202891


## Standard Metrics (Mean Cycles, Mean Instructions, Instructions per Cycle)

In [28]:
import seaborn as sns
import matplotlib.pyplot as plt
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual
from IPython.display import Javascript,display

#hw_counters=['offcore-response-pf-l2-rfo-l3-hit-any-snoop']
metrics=['MEAN_CYC','MEAN_INS','IPC']
nb_threads=['all','1','2','4','8','12']
classes=['W','A','B']

#df_miss_non0=df_miss.loc[df_miss['cpu']==0]
def plot_metrics_graph(metric='IPC',nb_threads='all',_class='A'):

    apps=['bt','cg','ep','ft','is','lu','mg','sp','ua']
    
    df_metric=df_processed_g5k.loc[df_processed_g5k['class']==_class]
    if nb_threads!='all':
        df_metric=df_metric.loc[df_metric['threads']==int(nb_threads)]
    #print(df_metric)

    #sns.catplot(x='threads',y='value',hue='version',data=cycles_max_per_threads,kind='bar',col='app',col_wrap=3)
    sns.set(font_scale=1.5)
    fig, axs=plt.subplots(nrows=3,ncols=3,figsize=(15,15))
    plt.tight_layout(pad=3.5)

    for i,app in enumerate(apps):
        row=int(i/3)
        col=int(i%3)
        cycles_app=df_metric.loc[df_metric['app']==app]
        #print(app)        
        if nb_threads=='all':
            sns.barplot(x='threads',y=metric,hue='version',data=cycles_app,ax=axs[row][col],
                        hue_order=['native-none', 'native-L1', 'native-L2', 'native-L1-L2'],
                        palette='cubehelix')
            # legend title
            axs[row][col].legend(framealpha=0.4)
            new_title = 'Execution'
            axs[row][col].legend_.set_title(new_title)            
            # replace labels
            new_labels = ['Real-NoPref', 'Real-L1', 'Real-L2', 'Real-L1-L2']
            for t, l in zip(axs[row][col].legend_.texts, new_labels): t.set_text(l)
            #sns.barplot(x='threads',y=metric,hue='version',data=cycles_app,ax=axs[row][col], hue_order=['native-L1-L2','native-L2','native-L1','native-none','zsim','sniper-none','sniper-l1','sniper-l1-l2'])
        else:
            sns.barplot(x='cpu',y=metric,hue='version',data=cycles_app,ax=axs[row][col],
                        hue_order=['native-none', 'native-L1', 'native-L2', 'native-L1-L2'],
                        palette='cubehelix')
            axs[row][col].legend_.remove()
        
        #sns.barplot(x='threads',y='value',hue='version',data=cycles_app,ax=axs[row][col])
        #sns.catplot(x='threads',y='value',hue='version',data=cycles_app,kind='bar',ax=axs[row][col])
        axs[row][col].set_title(app.upper())
        axs[row][col].set_ylabel('IPC')
        if nb_threads=='all':
            axs[row][col].set_xlabel('Number of Threads')
        else:
            axs[row][col].set_xlabel('Core Index')
        #axs[row][col].set_yscale('log')
        #break

    plt.savefig('../figures/submission2/'+metric+'_'+_class+'_'+nb_threads+'_subm2_g5k.pdf',type='pdf',bbox_inches='tight')
    
dropdown_metrics=widgets.Dropdown(
    options=metrics,
    value='IPC',
    description='metric',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True
)   
dropdown_classes=widgets.Dropdown(
    options=classes,
    value='A',
    description='class',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True
)
dropdown_nb_threads=widgets.Dropdown(
    options=nb_threads,
    value='all',
    description='nb_threads',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True
)

interact_manual(plot_metrics_graph,                
                metric=dropdown_metrics, 
                nb_threads=dropdown_nb_threads, 
                _class=dropdown_classes)

interactive(children=(Dropdown(description='metric', index=2, options=('MEAN_CYC', 'MEAN_INS', 'IPC'), value='…

<function __main__.plot_metrics_graph(metric='IPC', nb_threads='all', _class='A')>

# Calculating Serpa's IPC

sum -> soma o valor de cada core 

para cada step (cei1.1, cei1.2, cei1.3, cei1.4)
..... para cada prefetch,app,class
..........IPC = sum(PAPI-TOT-INS) / sum(PAPI-TOT-CYC)

assim cria uma "métrica" nova

################# 
agora vai pro teu código já existente e faz a média do IPC (vai ter 1 IPC por step, prefetch, app, class)

In [21]:
import re
pattern='native-(native-.*)'

def remove_native(row):
    res=re.search(pattern,row)
    return res.group(1)

# lst_csv_dfs is in order of cei1.1 cei1.2 and so on
import pandas as pd
import os

lst_dfs=[]

for csv_filename in os.listdir(csvs_dir):
    if os.path.isdir(csvs_dir+'/'+csv_filename)==True:
        continue
    _df=pd.read_csv(csvs_dir+'/'+csv_filename, names=col_names)

    # remove repeated entries because of the execution script repeated counters
    # for 'PAPI-TOT-INS' and 'PAPI-TOT-CYC'
    _df=_df.groupby(['version','app','class','cpu','threads','metric']).max().reset_index()


    # sum the metric values for each [prefetch, app, class, threads] 
    df_sum=_df.groupby(['version','app','class','threads','metric']).sum().reset_index()

    # extract the instructions ans cycles
    df_sum_ins=df_sum.loc[df_sum['metric']=='PAPI-TOT-INS'].copy()
    df_sum_cyc=df_sum.loc[df_sum['metric']=='PAPI-TOT-CYC'].copy()


    # create a new dataframe with 'version','app','class','threads'
    # and data about the sum of instructions and cycles and its IPC
    df_new=df_sum[['version','app','class','threads']].groupby(['version','app','class','threads']).max().reset_index().copy()
    df_new.loc[:,'SUM_INS']=df_sum_ins.loc[:,'value'].reset_index(drop=True)
    df_new.loc[:,'SUM_CYC']=df_sum_cyc.loc[:,'value'].reset_index(drop=True)
    df_new['SERPA_IPC']=df_new['SUM_INS']/df_new['SUM_CYC']
    lst_dfs.append(df_new)
    
df_all_serpa=pd.concat(lst_dfs)
df_all_serpa.loc[:,'version']=df_all_serpa['version'].apply(lambda row: remove_native(row))
df_all_serpa

Unnamed: 0,version,app,class,threads,SUM_INS,SUM_CYC,SERPA_IPC
0,native-L1,bt,A,1,7333697937,2636849732,2.781235
1,native-L1,bt,A,2,7571090266,2647390979,2.859831
2,native-L1,bt,A,4,7365191316,2648422248,2.780973
3,native-L1,bt,A,8,7201578837,2649583733,2.718004
4,native-L1,bt,A,12,7297280358,2643188729,2.760787
...,...,...,...,...,...,...,...
130,native-L2,ua,W,1,5833664734,2647261556,2.203660
131,native-L2,ua,W,2,5273184456,2607980010,2.021942
132,native-L2,ua,W,4,4708138405,2623146343,1.794844
133,native-L2,ua,W,8,3751696473,2615890945,1.434195


## Plotting SERPA_IPC

In [24]:
import seaborn as sns
import matplotlib.pyplot as plt
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual
from IPython.display import Javascript,display

#hw_counters=['offcore-response-pf-l2-rfo-l3-hit-any-snoop']
metrics=['SUM_CYC','SUM_INS','SERPA_IPC']
nb_threads=['all','1','2','4','8','12']
classes=['W','A','B']

#df_miss_non0=df_miss.loc[df_miss['cpu']==0]
def plot_metrics_graph(metric='SERPA_IPC',nb_threads='all',_class='A'):

    apps=['bt','cg','ep','ft','is','lu','mg','sp','ua']

    df_metric=df_all_serpa.loc[df_all_serpa['class']==_class]
    if nb_threads!='all':
        df_metric=df_metric.loc[df_metric['threads']==int(nb_threads)]
    #print(df_metric)

    #sns.catplot(x='threads',y='value',hue='version',data=cycles_max_per_threads,kind='bar',col='app',col_wrap=3)
    sns.set(font_scale=1.5)
    fig, axs=plt.subplots(nrows=3,ncols=3,figsize=(15,15))
    plt.tight_layout(pad=3.5)

    for i,app in enumerate(apps):
        row=int(i/3)
        col=int(i%3)
        cycles_app=df_metric.loc[df_metric['app']==app]
        #print(app)        
        if nb_threads=='all':
            sns.barplot(x='threads',y=metric,hue='version',data=cycles_app,ax=axs[row][col],
                        hue_order=['native-none', 'native-L1', 'native-L2', 'native-L1-L2']
                        ,palette='cubehelix')
            # legend title
            axs[row][col].legend(framealpha=0.4)
            new_title = 'Execution'
            axs[row][col].legend_.set_title(new_title)            
            # replace labels
            new_labels = ['Real-NoPref', 'Real-L1', 'Real-L2', 'Real-L1-L2']
            for t, l in zip(axs[row][col].legend_.texts, new_labels): t.set_text(l)
            #sns.barplot(x='threads',y=metric,hue='version',data=cycles_app,ax=axs[row][col], hue_order=['native-L1-L2','native-L2','native-L1','native-none','zsim','sniper-none','sniper-l1','sniper-l1-l2'])
        else:
            sns.barplot(x='cpu',y=metric,hue='version',data=cycles_app,ax=axs[row][col],
                        hue_order=['native-none', 'native-L1', 'native-L2', 'native-L1-L2'],
                        palette='cubehelix')
            axs[row][col].legend_.remove()
        
        #sns.barplot(x='threads',y='value',hue='version',data=cycles_app,ax=axs[row][col])
        #sns.catplot(x='threads',y='value',hue='version',data=cycles_app,kind='bar',ax=axs[row][col])
        axs[row][col].set_title(app.upper())
        axs[row][col].set_ylabel('SERPA IPC')
        if nb_threads=='all':
            axs[row][col].set_xlabel('Number of Threads')
        else:
            axs[row][col].set_xlabel('Core Index')
        #axs[row][col].set_yscale('log')
        #break

    plt.savefig('../figures/submission2/'+metric+'_'+_class+'_'+nb_threads+'_subm2_g5k.pdf',type='pdf',bbox_inches='tight')
    
dropdown_metrics=widgets.Dropdown(
    options=metrics,
    value='SERPA_IPC',
    description='metric',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True
)   
dropdown_classes=widgets.Dropdown(
    options=classes,
    value='A',
    description='class',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True
)
dropdown_nb_threads=widgets.Dropdown(
    options=nb_threads,
    value='all',
    description='nb_threads',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True
)

interact_manual(plot_metrics_graph, metric=dropdown_metrics, nb_threads=dropdown_nb_threads, _class=dropdown_classes)

interactive(children=(Dropdown(description='metric', index=2, options=('SUM_CYC', 'SUM_INS', 'SERPA_IPC'), val…

<function __main__.plot_metrics_graph(metric='SERPA_IPC', nb_threads='all', _class='A')>