In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from os.path import join
from matplotlib.backends.backend_pdf import PdfPages

In [2]:
#%matplotlib tk

# Prettier plots

In [3]:
# plt.style.use('seaborn-paper')
# plt.style.available

In [4]:
font_size = 14
#A small dictionary to set the font style
font = {'family': 'sans-serif',
        'weight': 'medium',
        'style': 'normal',
        'size': font_size}
plt.rc('font', **font)
plt.rc('text', usetex=True) #For the LaTeX rendering

plt.rc('xtick', labelsize=font_size)
plt.rc('ytick', labelsize=font_size)

In [5]:
# from matplotlib import rcParams
# rcParams.update({'figure.autolayout': True})
# rcParams['lines.linewidth'] = 2
# rcParams['lines.markeredgewidth'] = 1
# rcParams['lines.markersize'] = 7
# rcParams['text.usetex'] = True

# plt.autoscale(enable=True,axis='both',tight=True)

# font = {'family': 'sans-serif','sans-serif': 'Arial', 'weight': 'normal', 'size': 12}
# plt.rc('font', **font)

# Import the dataframes

Choose path

In [6]:
path = '.'

Read the pickle files

In [7]:
multiply_data = pd.read_pickle(join(path,'multiply_speed.p'))
inversion_data = pd.read_pickle(join(path,'inversion_speed.p'))
inversion_data_special = pd.read_pickle(join(path,'inversion_special.p'))
power_multiply_data = pd.read_pickle(join(path,'power_multiply.p'))
power_inversion_data = pd.read_pickle(join(path,'power_inversion.p'))

In [8]:
data_dic = {'multiply_speed':multiply_data,
            'inversion_speed':inversion_data,
            'inversion_special':inversion_data_special,
            'power_multiply':power_multiply_data,
            'power_inversion':power_inversion_data}

## Dataframe extra manipulation

Convert NaN's to zero

In [9]:
for key,data in data_dic.items():
    data.fillna(0, inplace=True)

### Bytes

Append a column with the total bytes. This is $Gen\_Size \cdot m$

In [10]:
for key,data in data_dic.items():
    if 'multiply' in key:
        data['Bytes'] = data['Gen_Size'].mul(data['m'], fill_value=0)

### Total time

Add a column with the total spent time

In [11]:
# Create column with total values
multiply_data["Total_time"]= multiply_data.Act_1.add(multiply_data.Act_2, fill_value=0)
inversion_data["Total_time"]= inversion_data.Act_1.add(inversion_data.Act_2, fill_value=0)

power_multiply_data["Total_time"]= power_multiply_data.Act_1.add(power_multiply_data.Act_2, fill_value=0)
power_inversion_data["Total_time"]= power_inversion_data.Act_1.add(power_inversion_data.Act_2, fill_value=0)


### Decoding time and inversion time

In [12]:
multiply_data['Time_decod'] = np.nan
multiply_data['Time_inv'] = np.nan
for gen_size in multiply_data['Gen_Size'].unique():
    for block_size in multiply_data['Block_Size'].unique():
        for threads in multiply_data['Threads'].unique():
            index_multiply = (multiply_data['Gen_Size'] == gen_size) & \
                (multiply_data['Block_Size'] == block_size) & (multiply_data['Threads']== threads)
            index_inversion = (inversion_data['Gen_Size'] == gen_size) & \
                (inversion_data['Block_Size'] == block_size) & (inversion_data['Threads']== threads)
            
            multiply_data.loc[index_multiply,'Time_inv'] = inversion_data[index_inversion]['Total_time'].mean()
            multiply_data.loc[index_multiply,'Inv_err'] = inversion_data[index_inversion]['Total_time'].sem()
            multiply_data.loc[index_multiply,'Time_decod'] = multiply_data[index_multiply]['Total_time'] + \
                multiply_data.loc[index_multiply,'Time_inv']

### Throughput

MiB/s

In [13]:
# # Encoding throughput
# multiply_data["Throughput"] = (multiply_data['Bytes'] / (1024**2)) / multiply_data["Total_time"]

# # Decoding throughput
# multiply_data["Throughput_decod"] = (multiply_data['Bytes'] / (1024**2)) / multiply_data["Time_decod"]

MB/s

In [14]:
# Encoding throughput
multiply_data["Throughput"] = (multiply_data['Bytes'] / (1000**2)) / multiply_data["Total_time"]

# Decoding throughput
multiply_data["Throughput_decod"] = (multiply_data['Bytes'] / (1000**2)) / multiply_data["Time_decod"]

Add a strategy called Decoding, so we can plot it together

In [15]:
a = multiply_data[multiply_data['Strat']=='sch_rec']
a.loc[:,'Throughput'] = a.Throughput_decod
a.loc[:,'Strat']='Decoding'
multiply_data = multiply_data.append(a,ignore_index=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


### Rename strategies

In [16]:
multiply_data.loc[multiply_data['Strat']=='base','Strat'] = 'Base'
multiply_data.loc[multiply_data['Strat']=='basebl','Strat'] = 'BaseBl'
multiply_data.loc[multiply_data['Strat']=='sch_rec','Strat'] = 'Encoding'

# Plots

## Encoding / Decoding speed

### Choose the fastest block size

In [17]:
# Steps:
# * Take just the data that matters: 4 threads, encoding, throughput
# * Do the regular groupby and find the means
# * Reset the index
# * Sort by Throughput and gen_size. Group by Gen_size and take the head(1)

df = multiply_data[(multiply_data["Threads"]==4) & (multiply_data["Strat"]=="Encoding")][["Throughput","Gen_Size","Block_Size"]]

df = df.groupby(["Gen_Size","Block_Size"]).mean().reset_index()

best_blocks = df.sort(["Throughput","Gen_Size"], ascending=[0,1]).groupby('Gen_Size').head(1)[["Gen_Size","Block_Size"]]

best_blocks



Unnamed: 0,Gen_Size,Block_Size
0,16.0,16.0
2,32.0,32.0
5,64.0,64.0
9,128.0,128.0
13,256.0,128.0
19,512.0,256.0
24,1024.0,128.0


### Filter the data to use just the best blocks

In [18]:
df = multiply_data[["Threads", "Strat", "Throughput", "Throughput_decod","Gen_Size","Block_Size","m"]]
# df = df.groupby(["Threads", "Strat","Gen_Size","Block_Size"]).mean()
# df.drop(level=)

i = 0
for index, row in best_blocks.iterrows():
    if i == 0:
        ndf = df[(df["Gen_Size"]==row["Gen_Size"]) & (df["Block_Size"]==row["Block_Size"])]
        i+=1
    else:
        ndf = ndf.append(df[(df["Gen_Size"]==row["Gen_Size"]) & (df["Block_Size"]==row["Block_Size"])],ignore_index=True)


### Plot throughput

In [20]:
plt.ioff()
# i=0
with PdfPages('throughput_per_strat.pdf') as pdf:
    for name,groups in ndf.groupby(["Gen_Size",'m']):
        # The data are the mean values
        data = groups.groupby(["Strat","Threads"]).mean()
        data.rename(index={"Base":"Base enc.","BaseBl":"Base blocked enc.","Decoding":"Multithread dec.","Encoding":"Multithread enc.",},inplace=True)
        
#         if i == 3:
#             break
#         i+=1
        
        figure=data["Throughput"][["Base enc.","Base blocked enc.","Multithread enc.","Multithread dec."]].unstack("Threads").plot(kind="Bar",rot=0)

        # Plot information
        save_titulo= "GenSize_" + name[0].astype('str') + '_SymbolSize_' + name[1].astype('str')
#         titulo= "Gen Size = " + name[0].astype('str') + ' ; m = ' + name[1].astype('str')
#         plt.title(titulo)
#         plt.xlabel("Strategy")
        plt.xlabel(r"$Strategy$")
        plt.ylabel(r"$Goodput\ [MB/s]$")
        plt.grid(True)
        fig = figure.get_figure()
        fig.savefig(save_titulo+".pdf",format="pdf",dpi=300)
        pdf.savefig()
             

## Finding the ratio between Multiplication time and inversion time

In [None]:
df = multiply_data[["Threads", "Strat", "Throughput", "Throughput_decod","Gen_Size","Block_Size","m","Total_time","Time_inv","Act_2"]]
# df = df.groupby(["Threads", "Strat","Gen_Size","Block_Size"]).mean()
# df.drop(level=)

i = 0
for index, row in best_blocks.iterrows():
    if i == 0:
        ndf = df[(df["Gen_Size"]==row["Gen_Size"]) & (df["Block_Size"]==row["Block_Size"])]
        i+=1
    else:
        ndf = ndf.append(df[(df["Gen_Size"]==row["Gen_Size"]) & (df["Block_Size"]==row["Block_Size"])],ignore_index=True)

a = df[(df["Strat"]=='Decoding') & (df["Threads"]==1)].groupby('Gen_Size').mean()
a['Ratio'] = a["Act_2"] / a["Time_inv"]

In [None]:
a