In [9]:
import matplotlib.pyplot as plt
# Turn Interactive Mode off. Only displays plot with plt.show()
plt.ioff()

import pandas as pd
import numpy as np
import json
import os

from collections import defaultdict

In [10]:
DATA_PATH = "./../scripts/output"

In [157]:
lambda_version = "step-v3"

# # Mono : (crop,scaledown,mirror,bw,rotate,watermark)
step_version="step-agg-csmbrw-{}Sec-batch".format(batch_len)
stages_id = ["app"]
stages_suffix = [""]
powerValues = [1024, 2048, 4096, 8192]


# # Manual : crop->scaledown->mirror->bw->rotate->watermark
step_version="step-agg-c-s-m-b-r-w-{}Sec-batch".format(batch_len)
stages_id = ["crop", "scaledown", "mirror", "bw", "rotate", "watermark"]
stages_suffix = ["_cropped", "_resized", "_mirror", "_bw", "_rot", "_watermarked"]
powerValues = [1024, 2048, 4096, 8192]


# # AGG1 : crop->scaledown->(mirror,bw,rotate,watermark)
step_version="step-agg-c-s-mbrw-{}Sec-batch".format(batch_len)
stages_id = ["crop", "scaledown", "mirror_bw_rotate_watermark"]
stages_suffix = ["_cropped", "_resized", "_mirror_bw_rot_watermarked"]
powerValues = [1024, 2048, 4096, 8192]

# # AGG2 : crop->scaledown->(mirror,bw)->(rotate,watermark)
step_version="step-agg-c-s-mb-rw-{}Sec-batch".format(batch_len)
stages_id = ["crop", "scaledown", "mirror_bw", "rotate_watermark"]
stages_suffix = ["_cropped", "_resized", "_mirror_bw", "_rot_watermarked"]
powerValues = [1024, 2048, 4096, 8192]

# # AGG3 : crop->(scaledown,mirror,bw,rotate,watermark)
step_version="step-agg-c-smbrw-{}Sec-batch".format(batch_len)
stages_id = ["crop", "scaledown_mirror_bw_rotate_watermark"]
stages_suffix = ["_cropped", "_resized_mirror_bw_rot_watermarked"]
powerValues = [1024, 2048, 4096, 8192]

# Stage Wise Plots

In [11]:
def plotStageInvocationTimeVsMemory(path, batch_name, step_version, stages_id, displayPlot=False):
    plt.figure(figsize=(10,5))
    for stage in stages_id: 
        f = open('{}/{}/{}/{}.json'.format(path, batch_name, step_version, stage), 'r')
        data = json.load(f)
        power_values = []
        stage_stats = []
        for stat in data['stats']:
            stage_stats.append(stat['averageDuration']/1000)
            power_values.append(stat['value'])
        plt.plot(power_values, stage_stats, label = stage, marker='o')
        f.close()
    plt.legend()
    plt.title(step_version)
    plt.xlabel('Memory(MB)')
    plt.ylabel('Invocation Time(s)')

    # Create names on the x axis
    plt.xticks(power_values)
    
    # Save Plot
    # Create folder if not exists
    if not os.path.exists("./plots/{}/stagewise".format(batch_name)):
        # os.mkdir("./plots/{}".format(batch_name))
        os.mkdir("./plots/{}/stagewise".format(batch_name))
    plt.savefig("./plots/{}/stagewise/InvocationTimeMemoryPlot_{}.png".format(batch_name, step_version))
    if displayPlot:
        plt.show()
    
def plotStageInvocationCostVsMemory(path, batch_name, step_version, stages_id, displayPlot=False):
    plt.figure(figsize=(10,5))
    for stage in stages_id:
        f = open('{}/{}/{}/{}.json'.format(path, batch_name, step_version, stage), 'r')
        data = json.load(f)
        stage_stats = []
        power_values = []
        for stat in data['stats']:
            stage_stats.append(stat['averagePrice']/1000)
            power_values.append(stat['value'])
        plt.plot(power_values, stage_stats, label = stage, marker='o')
        f.close()
    plt.legend()
    plt.title(step_version)
    plt.xlabel('Memory(MB)')
    plt.ylabel('Invocation Cost(USD)')

    # Create names on the x axis
    plt.xticks(power_values)
    
    # Save Plot
    # Create folder if not exists
    if not os.path.exists("./plots/{}/stagewise".format(batch_name)):
        # os.mkdir("./plots/{}".format(batch_name))
        os.mkdir("./plots/{}/stagewise".format(batch_name))
    plt.savefig("./plots/{}/stagewise/InvocationCostMemoryPlot_{}.png".format(batch_name, step_version))
    if displayPlot:
        plt.show()
    
    
def plotStageInvocationTimeCostVsMemory(path, batch_name, step_version, stages_id, displayPlot=False):
    # create figure and axis objects with subplots()
    fig,ax = plt.subplots(figsize=(10,5))
    
    for stage in stages_id: 
        f = open('{}/{}/{}/{}.json'.format(path, batch_name, step_version, stage), 'r')
        data = json.load(f)
        power_values = []
        stage_stats = []
        for stat in data['stats']:
            stage_stats.append(stat['averageDuration']/1000)
            power_values.append(stat['value'])
        ax.plot(power_values, stage_stats, label = stage, marker='o')
        f.close()
    ax.set_xlabel('Memory(MB)')
    ax.set_ylabel('Invocation Time(s)')
        
    ax2=ax.twinx()
    for stage in stages_id:
        f = open('{}/{}/{}/{}.json'.format(path, batch_name, step_version, stage), 'r')
        data = json.load(f)
        stage_stats = []
        power_values = []
        for stat in data['stats']:
            stage_stats.append(stat['averagePrice']/1000)
            power_values.append(stat['value'])
        ax2.plot(power_values, stage_stats, label = stage, marker='x', linestyle="--")
        f.close()
    ax2.set_xlabel('Memory(MB)')
    ax2.set_ylabel('Invocation Cost(USD)')

    # Create names on the x axis
    plt.xticks(power_values)
    plt.legend()
    plt.title(step_version)
    
    # Save Plot
    # Create folder if not exists
    if not os.path.exists("./plots/{}/stagewise".format(batch_name)):
        # os.mkdir("./plots/{}".format(batch_name))
        os.mkdir("./plots/{}/stagewise".format(batch_name))
    plt.savefig("./plots/{}/stagewise/TimeCostMemoryPlot_{}.png".format(batch_name, step_version))
    if displayPlot:
        plt.show()

In [152]:
batch_len = 150
for batch_len in [120, 150, 300]:
    batch_name = "{}Sec".format(batch_len)
    # # Mono : (crop,scaledown,mirror,bw,rotate,watermark)
    step_version="step-agg-csmbrw-{}Sec-batch".format(batch_len)
    stages_id = ["app"]
    plotStageInvocationTimeVsMemory(DATA_PATH, batch_name, step_version, stages_id)
    plotStageInvocationCostVsMemory(DATA_PATH, batch_name, step_version, stages_id)
    plotStageInvocationTimeCostVsMemory(DATA_PATH, batch_name, step_version, stages_id)

    # # Manual : crop->scaledown->mirror->bw->rotate->watermark
    step_version="step-agg-c-s-m-b-r-w-{}Sec-batch".format(batch_len)
    stages_id = ["crop", "scaledown", "mirror", "bw", "rotate", "watermark"]
    plotStageInvocationTimeVsMemory(DATA_PATH, batch_name, step_version, stages_id)
    plotStageInvocationCostVsMemory(DATA_PATH, batch_name, step_version, stages_id)
    plotStageInvocationTimeCostVsMemory(DATA_PATH, batch_name, step_version, stages_id)

    # # AGG1 : crop->scaledown->(mirror,bw,rotate,watermark)
    step_version="step-agg-c-s-mbrw-{}Sec-batch".format(batch_len)
    stages_id = ["crop", "scaledown", "mirror_bw_rotate_watermark"]
    plotStageInvocationTimeVsMemory(DATA_PATH, batch_name, step_version, stages_id)
    plotStageInvocationCostVsMemory(DATA_PATH, batch_name, step_version, stages_id)
    plotStageInvocationTimeCostVsMemory(DATA_PATH, batch_name, step_version, stages_id)

    # # AGG2 : crop->scaledown->(mirror,bw)->(rotate,watermark)
    step_version="step-agg-c-s-mb-rw-{}Sec-batch".format(batch_len)
    stages_id = ["crop", "scaledown", "mirror_bw", "rotate_watermark"]
    plotStageInvocationTimeVsMemory(DATA_PATH, batch_name, step_version, stages_id)
    plotStageInvocationCostVsMemory(DATA_PATH, batch_name, step_version, stages_id)
    plotStageInvocationTimeCostVsMemory(DATA_PATH, batch_name, step_version, stages_id)

    # # AGG3 : crop->(scaledown,mirror,bw,rotate,watermark)
    step_version="step-agg-c-smbrw-{}Sec-batch".format(batch_len)
    stages_id = ["crop", "scaledown_mirror_bw_rotate_watermark"]
    plotStageInvocationTimeVsMemory(DATA_PATH, batch_name, step_version, stages_id)
    plotStageInvocationCostVsMemory(DATA_PATH, batch_name, step_version, stages_id)
    plotStageInvocationTimeCostVsMemory(DATA_PATH, batch_name, step_version, stages_id)

# Batcher and Collector Plots

In [15]:
for batch_len in [120, 150, 300]:
    batch_name = "{}Sec".format(batch_len)
    step_version="step-agg-{}Sec-batcher".format(batch_len)
    stages_id = ["batcher"]
    plotStageInvocationTimeVsMemory(DATA_PATH, batch_name, step_version, stages_id)
    plotStageInvocationCostVsMemory(DATA_PATH, batch_name, step_version, stages_id)
    plotStageInvocationTimeCostVsMemory(DATA_PATH, batch_name, step_version, stages_id)
    
for batch_len in [120, 150, 300]:
    batch_name = "{}Sec".format(batch_len)
    step_version="step-agg-{}Sec-collector".format(batch_len)
    stages_id = ["collector"]
    plotStageInvocationTimeVsMemory(DATA_PATH, batch_name, step_version, stages_id)
    plotStageInvocationCostVsMemory(DATA_PATH, batch_name, step_version, stages_id)
    plotStageInvocationTimeCostVsMemory(DATA_PATH, batch_name, step_version, stages_id)

# Cumulative Time Cost Plots for Pipeline Variants

In [32]:
def getPipelineTimeCost(path, batch_name, step_version, stages_id, displayPlot=False):
    total_cost_dict = defaultdict(lambda : 0)
    total_time_dict = defaultdict(lambda : 0)
    for stage in stages_id:
        f = open('{}/{}/{}/{}.json'.format(path, batch_name, step_version, stage), 'r')
        data = json.load(f)
        for stat in data['stats']:
            total_cost_dict[stat["value"]] += stat["averagePrice"] 
            total_time_dict[stat["value"]] += stat['averageDuration']/1000
#         if stage == 'batcher' or 'collector': 
#             total_cost_dict[512] = 0
#             total_time_dict[512] = 0
        f.close()
    return list(total_cost_dict.keys()), list(total_time_dict.values()), list(total_cost_dict.values())

def plotPipelineTimeCost(path, batch_name, step_version, stages_id, displayPlot=False):
    power_values, total_time, total_cost = getPipelineTimeCost(path, batch_name, step_version, stages_id)
    
    # Plot Time
    fig,ax = plt.subplots(figsize=(10,5))
    ax.plot(power_values, total_time, label="Invocation Time")
    ax.set_xlabel('Memory(MB)')
    ax.set_ylabel('Invocation Time(s)')
    plt.legend()
    
    # Plot Cost
    ax2=ax.twinx()
    ax2.plot(power_values, total_cost, linestyle="--", label="Invocation Cost")
    ax2.set_ylabel('Invocation Cost(USD)')
    
    # Label
    plt.xticks(power_values)
    plt.legend()
    plt.title(step_version)
    
    # Save Plot
    # Create folder if not exists
    if not os.path.exists("./plots/{}/cumulative".format(batch_name)):
        os.mkdir("./plots/{}".format(batch_name))
        os.mkdir("./plots/{}/cumulative".format(batch_name))
    plt.savefig("./plots/{}/cumulative/CumulativeTimeCostMemoryPlot_{}.png".format(batch_name, step_version))
    if displayPlot:
        plt.show()

### Plot

In [144]:
for batch_len in [120, 150, 300]:
    batch_name = "{}Sec".format(batch_len)

    # # Mono : (crop,scaledown,mirror,bw,rotate,watermark)
    step_version="step-agg-csmbrw-{}Sec-batch".format(batch_len)
    stages_id = ["app"]
    plotPipelineTimeCost(DATA_PATH, batch_name, step_version, stages_id)

    # # Manual : crop->scaledown->mirror->bw->rotate->watermark
    step_version="step-agg-c-s-m-b-r-w-{}Sec-batch".format(batch_len)
    stages_id = ["crop", "scaledown", "mirror", "bw", "rotate", "watermark"]
    plotPipelineTimeCost(DATA_PATH, batch_name, step_version, stages_id)

    # # AGG1 : crop->scaledown->(mirror,bw,rotate,watermark)
    step_version="step-agg-c-s-mbrw-{}Sec-batch".format(batch_len)
    stages_id = ["crop", "scaledown", "mirror_bw_rotate_watermark"]
    plotPipelineTimeCost(DATA_PATH, batch_name, step_version, stages_id)

    # # AGG2 : crop->scaledown->(mirror,bw)->(rotate,watermark)
    step_version="step-agg-c-s-mb-rw-{}Sec-batch".format(batch_len)
    stages_id = ["crop", "scaledown", "mirror_bw", "rotate_watermark"]
    plotPipelineTimeCost(DATA_PATH, batch_name, step_version, stages_id)

    # # AGG3 : crop->(scaledown,mirror,bw,rotate,watermark)
    step_version="step-agg-c-smbrw-{}Sec-batch".format(batch_len)
    stages_id = ["crop", "scaledown_mirror_bw_rotate_watermark"]
    plotPipelineTimeCost(DATA_PATH, batch_name, step_version, stages_id)

### Get Data

In [56]:
stagesList = [
    ["app"],
    ["crop", "scaledown", "mirror", "bw", "rotate", "watermark"],
    ["crop", "scaledown", "mirror_bw_rotate_watermark"],
    ["crop", "scaledown", "mirror_bw", "rotate_watermark"],
    ["crop", "scaledown_mirror_bw_rotate_watermark"]
]


allStatsDict = {"pipeline":[], "aggregation":[], "batchLen":[], "memory":[], "timePerBatch":[], "costPerBatch":[], "BatcherCost":[], "CollectorCost":[]}

for batch_len in [120,150,300]:
    batch_name = "{}Sec".format(batch_len)
    pipelines = [
        "step-agg-csmbrw-{}Sec-batch".format(batch_len), 
        "step-agg-c-s-m-b-r-w-{}Sec-batch".format(batch_len), 
        "step-agg-c-s-mbrw-{}Sec-batch".format(batch_len), 
        "step-agg-c-s-mb-rw-{}Sec-batch".format(batch_len), 
        "step-agg-c-smbrw-{}Sec-batch".format(batch_len)
    ]
    batcher = "step-agg-{}Sec-batcher".format(batch_len)
    collector = "step-agg-{}Sec-collector".format(batch_len)
    
    for step_pipeline, stages_id in zip(pipelines, stagesList):
        agg_strat = step_pipeline.rstrip("-{}-batch".format(batch_name)).lstrip("step-agg-")
        # Get Stats
        power_values, total_time, total_cost = getPipelineTimeCost(DATA_PATH, batch_name, step_pipeline, stages_id)
        n = len(power_values)
        allStatsDict["pipeline"] += [step_pipeline]*(n+2)
        allStatsDict["aggregation"] += [agg_strat]*(n+2)
        allStatsDict["batchLen"] += [batch_len]*(n+2)
        
        # Add Stats for all Memory Values
        allStatsDict["memory"] += power_values
        allStatsDict["timePerBatch"] += total_time
        allStatsDict["costPerBatch"] += total_cost
        _,batcher_time, batcher_cost = getPipelineTimeCost(DATA_PATH, batch_name, batcher, ["batcher"])
        if batch_len == 120:
            allStatsDict["BatcherCost"] += ([None] + batcher_cost)
        elif batch_len == 300 and agg_strat == 'csmbrw':
            batcher_cost.pop(0)
            allStatsDict["BatcherCost"] += batcher_cost
        else:
            allStatsDict["BatcherCost"] += batcher_cost
        #allStatsDict["BatcherTime"] += batcher_time
        
        _,collector_time, collector_cost = getPipelineTimeCost(DATA_PATH, batch_name, collector, ["collector"])
        if batch_len == 120:
            allStatsDict["CollectorCost"] += ([None] + collector_cost)
        elif batch_len == 300 and agg_strat == 'csmbrw':
            collector_cost.pop(0)
            allStatsDict["CollectorCost"] += collector_cost
        else:
            allStatsDict["CollectorCost"] += collector_cost
        #allStatsDict["CollectorTime"] += collector_time
        
        # Add the best cost
        allStatsDict["memory"] += ["least_cost"]
        allStatsDict["timePerBatch"] += [total_time[total_cost.index(min(total_cost))]]
        allStatsDict["costPerBatch"] += [min(total_cost)]
        allStatsDict["BatcherCost"] += [min(batcher_cost)]
        allStatsDict["CollectorCost"] += [min(collector_cost)]
        #allStatsDict["BatcherTime"] += [None]
        #allStatsDict["CollectorTime"] += [None]
        
        # Add the least time
        allStatsDict["memory"] += ["least_time"]
        allStatsDict["timePerBatch"] += [min(total_time)]
        allStatsDict["costPerBatch"] += [total_cost[total_time.index(min(total_time))]]
        allStatsDict["BatcherCost"] += [None]
        allStatsDict["CollectorCost"] += [None]
        #allStatsDict["BatcherTime"] += [None]
        #allStatsDict["CollectorTime"] += [None]
        #allStatsDict["BatcherCost"] += [batcher_cost[batcher_time.index(min(batcher_time))]]
        #allStatsDict["CollectorCost"] += [collector_cost[collector_time.index(min(collector_time))]]
        

In [57]:
totalStatDF = pd.DataFrame(allStatsDict)

In [59]:
totalStatDF[totalStatDF["batchLen"]==300]

Unnamed: 0,pipeline,aggregation,batchLen,memory,timePerBatch,costPerBatch,BatcherCost,CollectorCost
65,step-agg-csmbrw-300Sec-batch,csmbrw,300,2048,454.640761,0.015276,0.0003,0.002695
66,step-agg-csmbrw-300Sec-batch,csmbrw,300,4096,253.940963,0.017065,0.000535,0.00298
67,step-agg-csmbrw-300Sec-batch,csmbrw,300,8192,190.076352,0.025546,0.001282,0.004628
68,step-agg-csmbrw-300Sec-batch,csmbrw,300,least_cost,454.640761,0.015276,0.0003,0.002695
69,step-agg-csmbrw-300Sec-batch,csmbrw,300,least_time,190.076352,0.025546,,
70,step-agg-c-s-m-b-r-w-300Sec-batch,c-s-m-b-r-w,300,1024,937.744646,0.015754,0.000143,0.00275
71,step-agg-c-s-m-b-r-w-300Sec-batch,c-s-m-b-r-w,300,2048,460.504638,0.015473,0.0003,0.002695
72,step-agg-c-s-m-b-r-w-300Sec-batch,c-s-m-b-r-w,300,4096,257.247818,0.017287,0.000535,0.00298
73,step-agg-c-s-m-b-r-w-300Sec-batch,c-s-m-b-r-w,300,8192,195.335081,0.026253,0.001282,0.004628
74,step-agg-c-s-m-b-r-w-300Sec-batch,c-s-m-b-r-w,300,least_cost,460.504638,0.015473,0.000143,0.002695


In [47]:
totalStatDF["noOfBatch"] = totalStatDF.apply(lambda x: 10*60/x["batchLen"], axis=1)
totalStatDF["timeTotal"] = totalStatDF.apply(lambda x: x["timePerBatch"]*x["noOfBatch"], axis=1)
totalStatDF["AllBat"] = totalStatDF.apply(lambda x: x["costPerBatch"]*x["noOfBatch"], axis=1)
totalStatDF["costTotal"] = totalStatDF.apply(lambda x: x["costPerBatch"]*x["noOfBatch"] + x["BatcherCost"] + x["CollectorCost"], axis=1)

#totalStatDF.to_csv('./data/Pipeline-Memory-TimeCost.csv')

In [55]:
totalStatDF[totalStatDF["batchLen"]==120]

Unnamed: 0,pipeline,aggregation,batchLen,memory,timePerBatch,costPerBatch,BatcherCost,CollectorCost,noOfBatch,timeTotal,costTotal
0,step-agg-csmbrw-120Sec-batch,csmbrw,120,512,784.404546,0.006589,,,5.0,3922.022728,
1,step-agg-csmbrw-120Sec-batch,csmbrw,120,1024,386.522434,0.006494,0.000287,0.002798,5.0,1932.612172,0.035553
2,step-agg-csmbrw-120Sec-batch,csmbrw,120,2048,189.280761,0.00636,0.00036,0.002735,5.0,946.403806,0.034895
3,step-agg-csmbrw-120Sec-batch,csmbrw,120,4096,104.937922,0.007052,0.000837,0.003049,5.0,524.689608,0.039146
4,step-agg-csmbrw-120Sec-batch,csmbrw,120,8192,80.498194,0.010819,0.001586,0.004652,5.0,402.490969,0.060333
5,step-agg-csmbrw-120Sec-batch,csmbrw,120,least_cost,189.280761,0.00636,0.000287,0.002735,5.0,946.403806,0.034821
6,step-agg-csmbrw-120Sec-batch,csmbrw,120,least_time,80.498194,0.010819,,,5.0,402.490969,
7,step-agg-c-s-m-b-r-w-120Sec-batch,c-s-m-b-r-w,120,512,792.182834,0.006654,,,5.0,3960.914169,
8,step-agg-c-s-m-b-r-w-120Sec-batch,c-s-m-b-r-w,120,1024,390.970037,0.006568,0.000287,0.002798,5.0,1954.850186,0.035927
9,step-agg-c-s-m-b-r-w-120Sec-batch,c-s-m-b-r-w,120,2048,193.14484,0.00649,0.00036,0.002735,5.0,965.7242,0.035544
