In [16]:
import os, glob
import math
import pickle
import numpy as np
import xarray as xr
import matplotlib
matplotlib.use('Agg')
from matplotlib.transforms import Affine2D
from matplotlib.patches import Polygon
import matplotlib.pyplot as plt
%matplotlib inline


In [2]:
base = "/p/project/deepacf/deeprain/video_prediction_shared_folder/models/"+ \
       "era5-Y2010toY2222M01to12-160x128-2970N1500W-T2_MSL_gph500/convLSTM/"
fname_timing_train = "/timing_training_time.pkl"
fname_timing_total = "/timing_total_time.pkl"

fname_timing_iter = "timing_per_iteration_time.pkl"

In [3]:
# some auxiliary functions
def orderOfMagnitude(number):
    return np.floor(np.log(number, 10))

def total_times(infile):
    with open(infile,'rb') as tfile:
        #print("Opening pickle time: '{0}'".format(infile))
        total_time_sec = pickle.load(tfile)
    return np.asarray(total_time_sec/60)

def log_total_times(infile):
    total_time_min = total_times(infile)
    return np.log(total_time_min)


def get_time_dict(base, wildcardspec, tfilename, gpu_id_str="gpu", llog = False):
    time_dict = {}
    flist_hpc = sorted(glob.glob(base + wildcardspec))
    wrapper = total_times
    if llog: wrapper = log_total_times
    for tfile in flist_hpc: 
        ngpus = get_ngpus(tfile, gpu_id_str)
        time_dict["{0:d} GPU(s)".format(ngpus)] = wrapper(tfile + tfilename)
    return time_dict

def get_ngpus(fname, search_str, max_order=3):
    """
    Tries to get numbers in the vicinty of search_str which is supposed to be a substring in fname.
    First seaches for numbers right before the occurence of search_str, then afterwards.
    :param fname: file name from which number should be inferred
    :param search_str: seach string for which number identification is considered to be possible
    :param max_order: maximum order of retrieved number (default: 3 -> maximum number is 999 then)
    :return num_int: integer of number in the vicintity of search string. 
    """
    
    ind_gpu_info = fname.lower().find(search_str)
    if ind_gpu_info == -1:
        raise ValueError("Unable to find search string '{0}' in file name '{1}'".format(search_str, fname))
    
    # init loops
    fname_len = len(fname)
    success, flag = False, True
    indm = 1
    ind_sm, ind_sp = 0, 0

    # check occurence of numbers in front of search string
    while indm < max_order and flag:
        if ind_gpu_info - indm > 0:
            if fname[ind_gpu_info - indm].isnumeric():
                ind_sm += 1
                success = True
            else:
                flag = False
        else:
            flag = False
        indm += 1
  

    if not success: # check occurence of numbers after search string
        ind_gpu_info = ind_gpu_info + len(search_str)
        flag = True
        indm = 0
        while indm < max_order and flag: 
            if ind_gpu_info + indm < fname_len:
                if fname[ind_gpu_info + indm].isnumeric():
                    ind_sp += 1
                    success = True
                else:
                    flag = False
            else:
                flag = False
            indm += 1
            
        if success:
            return(int(fname[ind_gpu_info:ind_gpu_info+ind_sp]))
        else:
            raise ValueError("Search string found in fname, but unable to infer number of GPUs.")

    else:
        return(int(fname[ind_gpu_info-ind_sm:ind_gpu_info]))
        
        
    

In [4]:
# Juwels
wildcard_juwels = '20210115T135325_langguth1_test_venv_juwels_container*old'
total_time_min_juwels = get_time_dict(base, wildcard_juwels, fname_timing_total, "gpus")
training_time_min_juwels = get_time_dict(base, wildcard_juwels, fname_timing_train, "gpus")
for key in training_time_min_juwels.keys():
    print("Total computation with {0}: {1}".format(key, training_time_min_juwels[key]))

overhead_time_juwels = {}
for key in training_time_min_juwels.keys() & total_time_min_juwels.keys():
    overhead_time_juwels[key] = total_time_min_juwels[key] - training_time_min_juwels[key]
    
#print('Juwels total time in minutes', get_time_d)
#print('Juwels total training time in minutes', training_time_min_juwels)
#overhead_time_juwels = np.array(total_time_min_juwels) - np.array(training_time_min_juwels)
#print('Juwels overhead time in minutes', overhead_time_juwels)

Total computation with 16 GPU(s): 152.50984706878663
Total computation with 32 GPU(s): 81.80640578667322
Total computation with 4 GPU(s): 554.5182513117791
Total computation with 64 GPU(s): 45.01537701288859
Total computation with 8 GPU(s): 287.91878341039023


In [5]:
# Juwels booster
wildcard_booster = '2020*gong1_booster_gpu*'
total_time_min_booster = get_time_dict(base, wildcard_booster, fname_timing_total)
training_time_min_booster = get_time_dict(base, wildcard_booster, fname_timing_train)
for key in training_time_min_booster.keys():
    print("Total computation with {0}: {1}".format(key, training_time_min_booster[key]))

#print('Juwels Booster total time in minutes', list_times(base, wildcard_booster, filename_timing_total))
#print('Juwels Booster total training time in minutes', list_times(base, wildcard_booster, filename_timing_train))
overhead_time_booster = {}
for key in training_time_min_booster.keys() & total_time_min_booster.keys():
    overhead_time_booster[key] = total_time_min_booster[key] - training_time_min_booster[key]
#print('Juwels overhead time in minutes', overhead_time_booster)

Total computation with 1 GPU(s): 566.7376739541689
Total computation with 4 GPU(s): 159.4931242307027
Total computation with 8 GPU(s): 92.15467914342881
Total computation with 16 GPU(s): 46.11619712909063
Total computation with 32 GPU(s): 33.09077355464299
Total computation with 64 GPU(s): 23.24405464331309


In [6]:
def time_per_iteration_mean_std(infile):
    with open(infile, 'rb') as tfile:
        time_per_iteration_list = pickle.load(tfile) 
        
    time_per_iteration = np.array(time_per_iteration_list)
    return np.mean(time_per_iteration), np.std(time_per_iteration)

def iter_stat(base, wildcardspec, gpu_id_str="gpu"):
    stat_iter_dict = {}
    flist_hpc = sorted(glob.glob(base + wildcardspec))
    for tdir in flist_hpc: 
        ngpus = get_ngpus(tdir, gpu_id_str)
        ftname = os.path.join(tdir, fname_timing_iter)
        mean_loc, std_loc = time_per_iteration_mean_std(ftname)
        stat_iter_dict["{0:d} GPU(s)".format(ngpus)] = {"mean": mean_loc , "std": std_loc}
    return stat_iter_dict

def time_per_iteration_all(infile):
    with open(infile,'rb') as tfile:
        time_per_iteration_list = pickle.load(tfile)
    return np.asarray(time_per_iteration_list)

def all_iter(base, wildcardspec, gpu_id_str="gpu"):
    iter_dict = {}
    flist_hpc = sorted(glob.glob(base + wildcardspec))
    for tdir in flist_hpc: 
        ngpus = get_ngpus(tdir, gpu_id_str)
        ftname = os.path.join(tdir, fname_timing_iter)
        iter_dict["{0:d} GPU(s)".format(ngpus)] = time_per_iteration_all(ftname)
    return iter_dict    
    

In [30]:
# Juwels
print('JUWELS', time_per_iteration_mean_std('/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2010toY2222M01to12-160x128-2970N1500W-T2_MSL_gph500/convLSTM/20201210T140958_stadtler1_comparison_1node_1gpu/timing_per_iteration_time.pkl'))
# Booster
print('Booster', time_per_iteration_mean_std('/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2010toY2222M01to12-160x128-2970N1500W-T2_MSL_gph500/convLSTM/20201210T141910_gong1_booster_gpu1/timing_per_iteration_time.pkl'))

JUWELS (0.6151515198034729, 0.20104178037750603)
Booster (0.3521572324468615, 0.3656996619706779)


In [31]:
# Juwels
print('Juwels mean and standart deviation',iter_stat(base, wildcard_juwels))

Juwels mean and standart deviation {'16 GPU(s)': {'mean': 0.8209993402058342, 'std': 0.2627643291319852}, '32 GPU(s)': {'mean': 0.8590118098249986, 'std': 0.4078450977768068}, '4 GPU(s)': {'mean': 0.7445914211655112, 'std': 0.13789611351045}, '64 GPU(s)': {'mean': 0.9353915504630987, 'std': 0.6640973670265782}, '8 GPU(s)': {'mean': 0.7804724221628322, 'std': 0.21824334555299446}}


In [9]:
# Booster
print('Booster mean and standart deviation',iter_stat(base, wildcard_booster))

Booster mean and standart deviation {'1 GPU(s)': {'mean': 0.3521572324468615, 'std': 0.3656996619706779}, '4 GPU(s)': {'mean': 0.41844419631014446, 'std': 0.5273198599590724}, '8 GPU(s)': {'mean': 0.48867375665101026, 'std': 0.4378652997442439}, '16 GPU(s)': {'mean': 0.4786909431320202, 'std': 0.49638173862734053}, '32 GPU(s)': {'mean': 0.6439339113469129, 'std': 1.4395666886291258}, '64 GPU(s)': {'mean': 0.8176603168024377, 'std': 2.1044189535471185}}


In [34]:
# Plotting 
# Bar plot of total time and training time --> overhead time

# dictionaries with the total times
tot_time_juwels_dict = get_time_dict(base, wildcard_juwels, fname_timing_total)
tot_time_booster_dict= get_time_dict(base, wildcard_booster, fname_timing_total)

# dictionaries with the training times
train_time_juwels_dict = get_time_dict(base, wildcard_juwels, fname_timing_train)
train_time_booster_dict = get_time_dict(base, wildcard_booster, fname_timing_train)

# get sorted arrays
# Note: The times for Juwels are divided by 2, since the experiments have been performed with an epoch number of 20
#       instead of 10 (as Bing and Scarlet did)
ngpus_sort = sorted([int(ngpu.split()[0]) for ngpu in tot_time_juwels_dict.keys()])
nexps = len(ngpus_sort)
tot_time_juwels = np.array([tot_time_juwels_dict["{0:d} GPU(s)".format(key)] for key in ngpus_sort])/2.
tot_time_booster = np.array([tot_time_booster_dict["{0:d} GPU(s)".format(key)] for key in ngpus_sort])

train_time_juwels = np.array([train_time_juwels_dict["{0:d} GPU(s)".format(key)] for key in ngpus_sort])/2.
train_time_booster = np.array([train_time_booster_dict["{0:d} GPU(s)".format(key)] for key in ngpus_sort])

overhead_juwels = tot_time_juwels - train_time_juwels 
overhead_booster= tot_time_booster - train_time_booster

names = ["Juwels", "Juwels Booster"]

In [31]:
plot_computation_times(tot_time_juwels, tot_time_booster, labels, ["Juwels", "Juwels Booster"], \
                       "./total_computation_time", log_yvals=False)

plot_computation_times(overhead_juwels, overhead_booster, labels, ["Juwels", "Juwels Booster"], \
                       "./overhead_time")

400.0
278.0
100.0
2.0


In [12]:
#print(labels)
#raise ValueError("Stop!")
#x = np.arange(len(labels))  # the label locations
#width = 0.35  # the width of the bars

#fig, ax = plt.subplots()
#rects1 = ax.bar(x - width/2, np.round(tot_time_juwels, 2), width, label='Juwels')
#rects2 = ax.bar(x + width/2, np.round(tot_time_booster, 2), width, label='Booster')

def plot_computation_times(times1, times2, ngpus, names, plt_fname, log_yvals = False):
    
    nlabels = len(ngpus)
    x_pos = np.arange(nlabels)
    
    bar_width = 0.35
    ytitle = "Time"
    ymax = np.ceil(np.maximum(np.max(times1)/100. + 0.5, np.max(times2)/100. + 0.5))*100.
    print(ymax)    
    if log_yvals: 
        times1, times2 = np.log(times1), np.log(times2)
        ytitle = "LOG(Time) [min]"
        ymax = np.ceil(np.maximum(np.max(times1)+0.5, np.max(times2) + 0.5))
    
    # create plot object
    fig, ax = plt.subplots()
    # create data bars
    rects1 = ax.bar(x_pos - bar_width/2, np.round(times1, 2), bar_width, label=names[0])
    rects2 = ax.bar(x_pos + bar_width/2, np.round(times2, 2), bar_width, label=names[1])
    # customize plot appearance
    # Add some text for labels, title and custom x-axis tick labels, etc.
    ax.set_ylabel(ytitle)
    ax.set_title('Comparison {0} and {1} with convLSTM model'.format(*names))
    ax.set_xticks(x_pos)
    ax.set_xticklabels(labels)
    ax.set_xlabel('# GPUs')
    print(np.ceil(np.maximum(np.max(times1)+0.5, np.max(times2) + 0.5)))
    ax.set_ylim(0., ymax)
    ax.legend()
                
    # add labels
    autolabel(ax, rects1)
    autolabel(ax, rects2)
    plt.savefig(plt_fname+".png")
    plt.close()
    

def autolabel(ax, rects):
    """Attach a text label above each bar in *rects*, displaying its height."""
    for rect in rects:
        height = rect.get_height()
        ax.annotate('{}'.format(height),
                    xy=(rect.get_x() + rect.get_width() / 2, height),
                    xytext=(0, 3),  # 3 points vertical offset
                    textcoords="offset points",
                    ha='center', va='bottom')


In [13]:
# Plot mean + std 
# Juwels
dict_stat_juwels = iter_stat(base, wildcard_juwels, gpu_id_str="gpu")
#print(dict_stat_juwels)
iter_mean_juwels = np.array([dict_stat_juwels["{0:d} GPU(s)".format(key)]["mean"] for key in labels])
iter_std_juwels = np.array([dict_stat_juwels["{0:d} GPU(s)".format(key)]["std"] for key in labels])

dict_stat_booster = iter_stat(base, wildcard_booster, gpu_id_str="gpu")
iter_mean_booster = np.array([dict_stat_booster["{0:d} GPU(s)".format(key)]["mean"] for key in labels])
iter_std_booster = np.array([dict_stat_booster["{0:d} GPU(s)".format(key)]["std"] for key in labels])

In [29]:
iter_time_juwels = all_iter(base, wildcard_juwels)
iter_time_booster= all_iter(base, wildcard_booster)

max_iter_juwels = np.shape(iter_time_booster["{0:d} GPU(s)".format(labels[0])])[0]
max_iter_booster = np.shape(iter_time_booster["{0:d} GPU(s)".format(labels[0])])[0]

arr_iter_juwels = np.full((nexps, max_iter_juwels), np.nan)
arr_iter_booster= np.full((nexps, max_iter_booster), np.nan)




(21225,)


In [37]:
# box plot instead of errorbar plot
# Juwels
#data_juwels = list_time_per_iteration_all_runs(base, wildcard_juwels)
data_juwels = all_iter(base, wildcard_juwels, gpu_id_str="gpu")
# Booster
#data_booster = list_time_per_iteration_all_runs(base, wildcard_booster)
data_booster = all_iter(base, wildcard_booster, gpu_id_str="gpu")
def simple_boxplot(time_per_iteration_data, title):
    # Multiple box plots on one Axes
    fig, ax = plt.subplots()
    ax.set_title(title)
    ax.boxplot(time_per_iteration_data, showfliers=False) # Outliers for initialization are disturbing 
    plt.xticks([1, 2, 3, 4, 5 ,6], ['1', '4', '8', '16', '32', '64'])
    #plt.savefig('boxplot_'+title)
    #plt.close()

In [86]:
print(np.argmax(data_booster["64 GPU(s)"]))
print(np.max(data_booster["64 GPU(s)"]))
print(data_booster["64 GPU(s)"][0])
print(np.shape(data_booster["64 GPU(s)"]))
print("***********")

print(np.argmax(data_juwels["64 GPU(s)"][1::]))
print(np.max(data_juwels["64 GPU(s)"][1::]))
print(data_juwels["64 GPU(s)"][0])
print(np.shape(data_juwels["64 GPU(s)"]))
print("***********")

print(np.argmax(data_juwels["4 GPU(s)"][1::]))
print(np.max(data_juwels["4 GPU(s)"][1::]))
print(data_juwels["4 GPU(s)"][0])
print(np.shape(data_juwels["4 GPU(s)"]))
 
print("***********")
print(np.argmax(data_booster["4 GPU(s)"][1::]))
print(np.max(data_booster["4 GPU(s)"][1::]))
print(data_booster["4 GPU(s)"][0])
print(np.shape(data_booster["4 GPU(s)"]))

#simple_boxplot(data_juwels, 'Juwels')

886
64.08639097213745
31.232596397399902
(1326,)
***********
2100
4.405388832092285
29.095214366912842
(2653,)
***********
36981
7.751298189163208
26.409477949142456
(42450,)
***********
3843
66.00082683563232
29.385547637939453
(21225,)


In [None]:
simple_boxplot(data_booster, 'Booster')

In [81]:
# Try more fancy box plot 
def more_fancy_boxplot(time_per_iteration_data1, time_per_iteration_data2, ngpu_list, title):
    nexps = len(ngpu_list)
    # Shuffle data: EXPECT JUWELS FIRST FOR THE LEGEND! NOT GENERIC!
    data = []
    for i in np.arange(nexps):
        data.append(time_per_iteration_data1["{0} GPU(s)".format(ngpu_list[i])])
        data.append(time_per_iteration_data2["{0} GPU(s)".format(ngpu_list[i])])
     
    # trick to get list with duplicated entries
    xlabels = [val for val in ngpu_list for _ in (0, 1)]

    # Multiple box plots on one Axes
    #fig, ax = plt.subplots()
    fig = plt.figure(figsize=(6,4))
    ax = plt.axes([0.1, 0.15, 0.75, 0.75])   
    
    ax.set_title(title)
    bp = ax.boxplot(data, notch=0, sym='+', vert=1, whis=1.5, showfliers=False) # Outliers for initialization are disturbing
    plt.xticks(np.arange(1, nexps*2 +1), xlabels)
    ax.set_xlabel('# GPUs')
    ax.set_ylabel('Seconds')
    
    # Reference: https://matplotlib.org/3.1.1/gallery/statistics/boxplot_demo.html 
    box_colors = ['darkkhaki', 'royalblue']
    num_boxes = len(data)
    medians = np.empty(num_boxes)
    for i in range(num_boxes):
        box = bp['boxes'][i]
        boxX = []
        boxY = []
        for j in range(5):
            boxX.append(box.get_xdata()[j])
            boxY.append(box.get_ydata()[j])
        box_coords = np.column_stack([boxX, boxY])
        # Alternate between Dark Khaki and Royal Blue
        ax.add_patch(Polygon(box_coords, facecolor=box_colors[i % 2]))
        # Now draw the median lines back over what we just filled in
        med = bp['medians'][i]
        medianX = []
        medianY = []
        for j in range(2):
            medianX.append(med.get_xdata()[j])
            medianY.append(med.get_ydata()[j])
            ax.plot(medianX, medianY, 'k')
        medians[i] = medianY[0]
        # Finally, overplot the sample averages, with horizontal alignment
        # in the center of each box
        ax.plot(np.average(med.get_xdata()), np.average(data[i]),
                color='w', marker='*', markeredgecolor='k')
    
    # Finally, add a basic legend
    fig.text(0.9, 0.15, 'Juwels',
             backgroundcolor=box_colors[0], color='black', weight='roman',
             size='small')
    fig.text(0.9, 0.09, 'Booster',
             backgroundcolor=box_colors[1],
             color='white', weight='roman', size='small')
    #fig.text(0.90, 0.015, '*', color='white', backgroundcolor='silver',
    #         weight='roman', size='medium')
    fig.text(0.9, 0.03, '* Mean', color='white', backgroundcolor='silver',
             weight='roman', size='small')

    
    plt.savefig('fancy_boxplot_'+title.replace(' ', '_'))
    plt.close()

In [82]:
more_fancy_boxplot(data_juwels, data_booster, ngpus_sort, 'Time needed to iterate one step')

In [None]:
flist_hpc1 = sorted(glob.glob(base + wildcard_juwels))
flist_hpc2 = sorted(glob.glob(base + wildcard_booster))


        

print(get_ngpus(flist_hpc1[2], "gpu"))
print(get_ngpus(flist_hpc1[0], "gpu"))

print(get_ngpus(flist_hpc2[2], "gpu"))
