In [2]:
# logfile = 'log_bert.csv'
# logfile = 'logs_resnet50.csv'
# # logfile = '../alexnet_log.csv'
# # logfile = 'logs_gpt2.csv'
# logfile = 'resnet_log.csv'

In [1]:
import pandas as pd
import ast
import seaborn as sns
import matplotlib.pyplot as plt
import json
import math

In [2]:
def get_results(logfile):    
    
    # Read the file line by line and convert each line to a list using ast.literal_eval
    data = []
    with open(logfile, 'r') as file:
        for line in file:
            data.append(ast.literal_eval(line))

    # Create a DataFrame from the list of lists with custom column names
    df = pd.DataFrame(data, columns=['prediction', 'timestamp', 'prepro_time', 'predict_time', 'total_time'])

    # Drop the 'prediction' column
#     df.drop('prediction', axis=1, inplace=True)

    # Convert 'timestamp' column to datetime and extract the minute value
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df['minute'] = df['timestamp'].dt.minute
    # Add a new column representing the row numbers
    df['Request'] = df.index + 1

    # Print the DataFrame
    df

    percentiles = [0.99, 0.9, 0.5]
    response_times = df['total_time'].quantile(percentiles)
    response_times

    avg_rt = df['total_time'].mean()
    avg_rt

    return response_times, avg_rt, df

In [4]:
def get_results_func(logfile):

    # Read the data from the file
    with open(logfile, 'r') as file:
        data = file.read()

    # Split the data into individual JSON objects
    json_objects = data.strip().split('}\n{')  # Assumes there are no spaces between objects

    # Add back missing curly braces to make each item a valid JSON object
    json_objects = ['{' + obj + '}' for obj in json_objects]

    # Parse each JSON object and store them in a list
    parsed_data = []
    for obj in json_objects:
        try:
            parsed_data.append(json.loads(obj))
        except json.JSONDecodeError:
            pass
#             print(f"Failed to parse JSON: {obj}")

    # Create a DataFrame from the parsed data
    df = pd.DataFrame(parsed_data)
    
    data = df['output'].to_list()
    
    # Remove 'nan' elements using list comprehension
    data = [x for x in data if x is not None and not (isinstance(x, float) and math.isnan(x))]

    # Flatten the inner lists
    flattened_data = []
    for item in data:
        row = [item[0]] + item[1]
        flattened_data.append(row)

    # Convert the flattened data into a DataFrame
    df = pd.DataFrame(flattened_data, columns=['prediction', 'timestamp', 'prepro_time', 'predict_time', 'total_time'])

    # Drop the 'prediction' column
    df.drop('prediction', axis=1, inplace=True)

    # Convert 'timestamp' column to datetime and extract the minute value
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df['minute'] = df['timestamp'].dt.minute
    # Add a new column representing the row numbers
    df['Request'] = df.index + 1

    # Print the DataFrame
    df

    percentiles = [0.99, 0.9, 0.5]
    response_times = df['total_time'].quantile(percentiles)
    response_times

    avg_rt = df['total_time'].mean()
    avg_rt

    return response_times, avg_rt, df

# Nature of Deployments

## Alexnet

In [85]:
logfile = '../../experiments/00_container_exps/alexnet_log_1hr_container.csv'

response_times, avg_rt, df = get_results(logfile)

print("Response Time Percentiles:\n",response_times)
print("Average RT:",avg_rt)

Response Time Percentiles:
 0.99    0.031107
0.90    0.028936
0.50    0.025761
Name: total_time, dtype: float64
Average RT: 0.02592422779868631


In [86]:
logfile = '../../experiments/01_vm_exps/alexnet_log_1hr_vm.csv'

response_times, avg_rt, df = get_results(logfile)

print("Response Time Percentiles:\n",response_times)
print("Average RT:",avg_rt)

Response Time Percentiles:
 0.99    0.041114
0.90    0.037637
0.50    0.034439
Name: total_time, dtype: float64
Average RT: 0.03462948933060648


In [87]:
logfile = '../../experiments/02_func_exps/responses.txt'

response_times, avg_rt, df = get_results_func(logfile)

print("Response Time Percentiles:\n",response_times)
print("Average RT:",avg_rt)

Response Time Percentiles:
 0.99    0.024498
0.90    0.023814
0.50    0.022099
Name: total_time, dtype: float64
Average RT: 0.02224441675039438


## ResNet

In [88]:
logfile = '../../experiments/00_container_exps/resnet_log_1hr_container.csv'

response_times, avg_rt, df = get_results(logfile)

print("Response Time Percentiles:\n",response_times)
print("Average RT:",avg_rt)

Response Time Percentiles:
 0.99    0.118116
0.90    0.102984
0.50    0.081753
Name: total_time, dtype: float64
Average RT: 0.08499220186028598


In [89]:
logfile = '../../experiments/01_vm_exps/resnet_log_1hr_vm.csv'

response_times, avg_rt, df = get_results(logfile)

print("Response Time Percentiles:\n",response_times)
print("Average RT:",avg_rt)

Response Time Percentiles:
 0.99    0.182101
0.90    0.164112
0.50    0.148082
Name: total_time, dtype: float64
Average RT: 0.14991033251624555


## BERT

In [90]:
logfile = '../../experiments/00_container_exps/bert_log_1hr_container.csv'

response_times, avg_rt, df = get_results(logfile)

print("Response Time Percentiles:\n",response_times)
print("Average RT:",avg_rt)

Response Time Percentiles:
 0.99    0.094091
0.90    0.090199
0.50    0.076289
Name: total_time, dtype: float64
Average RT: 0.07738236080516468


In [92]:
logfile = '../../experiments/01_vm_exps/bert_log_1hr_vm.csv'

response_times, avg_rt, df = get_results(logfile)

print("Response Time Percentiles:\n",response_times)
print("Average RT:",avg_rt)

Response Time Percentiles:
 0.99    0.104564
0.90    0.092211
0.50    0.081577
Name: total_time, dtype: float64
Average RT: 0.08228314789858732


## GPT-2

In [6]:
logfile = '../../experiments/00_container_exps/gpt2_log_2.csv'

response_times, avg_rt, df = get_results(logfile)

print("Response Time Percentiles:\n",response_times)
print("Average RT:",avg_rt)

Response Time Percentiles:
 0.99    1.537016
0.90    1.474117
0.50    1.404700
Name: total_time, dtype: float64
Average RT: 1.374212430823933


In [7]:
logfile = '../../experiments/01_vm_exps/gpt2_log_vm_1hr_2.csv'

response_times, avg_rt, df = get_results(logfile)

print("Response Time Percentiles:\n",response_times)
print("Average RT:",avg_rt)

Response Time Percentiles:
 0.99    1.544103
0.90    1.459034
0.50    1.361770
Name: total_time, dtype: float64
Average RT: 1.3495006209070033


In [107]:
# print([x for x in df['total_time'].to_list() if x > 5])

# Containers vs Functions in Conjunction with VMs