In [None]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import json,argparse
import numpy as np
import pandas as pd
# plot the data
import matplotlib.pyplot as plt
import natsort
import os

In [None]:


# Define the directory to search for log files
dir_ = "../final_analysis_cloudlab_twenty"

# Function to get GPU and main log files from the specified directory
def get_gpu_and_main_log_files(dir_path: str) -> tuple:
    import os
    gpu_files = []  # List to store GPU idle files
    gpu_util_files = []  # List to store GPU utilization files
    log_files = {}  # Dictionary to store main and worker log files

    # Walk through the directory and its subdirectories
    for root, dirs, files in os.walk(dir_path):
        log_files[root] = []  # Initialize the list for the current directory
        for file in files:
            # Check if the file is a GPU utilization file (excluding PNG files)
            if "gpu_util" in file and 'png' not in file:
                gpu_util_files.append(os.path.join(root, file))
            # Check if the file is a GPU idle file (excluding PNG files)
            if "gpu_idle" in file and 'png' not in file:
                gpu_files.append(os.path.join(root, file))
            # Check if the file is a main log file (excluding PNG files)
            if "main" in file and 'png' not in file:
                log_files[root].append(os.path.join(root, file))
            # Check if the file is a worker log file (excluding PNG files)
            if "worker" in file and 'png' not in file:
                log_files[root].append(os.path.join(root, file))

    # Return the lists of GPU idle files, main and worker log files, and GPU utilization files
    return gpu_files, log_files, gpu_util_files

# Get the GPU idle files, main and worker log files, and GPU utilization files from the specified directory
gpu_files, log_files, gpu_util_files = get_gpu_and_main_log_files(dir_)

# Filter out empty log file entries
log_files = {key: val for key, val in log_files.items() if val}

# Sort the log file keys naturally
log_files_keys = natsort.natsorted(log_files)

# Sort the GPU idle files naturally
gpu_files = natsort.natsorted(gpu_files)

# Sort the GPU utilization files naturally
gpu_util_files = natsort.natsorted(gpu_util_files)


In [None]:
df_dict_everything = {}

def get_batch_pin_memory_time_with_ts(log_file: str) -> pd.DataFrame:
    with open(log_file) as f:
        lines = f.readlines()
        batch_pin_memory_times = {}
        batch_pin_memory_times_ts = {}
        
        for line in lines:
            if "SBatchPinMemory" in line:
                parts = line.split(',')
                batch_id = int(parts[0].split('_')[1])
                pin_memory_time = float(parts[-1]) / (1000 * 1000 * 1000)
                batch_pin_memory_times[batch_id] = pin_memory_time
                batch_pin_memory_times_ts[batch_id] = float(parts[-2])
        
        data = {
            'batch_id': list(batch_pin_memory_times.keys()),
            'pin_memory_time': list(batch_pin_memory_times.values()),
            'pin_memory_time_ts': list(batch_pin_memory_times_ts.values())
        }
        
        df = pd.DataFrame(data)
        df.set_index('batch_id', inplace=True)
        df.sort_index(inplace=True)
        
        return df

def get_batch_idle_times_with_ts(log_file: str) -> pd.DataFrame:
    with open(log_file) as f:
        lines = f.readlines()
        batch_wait_times = {}
        batch_wait_times_ts = {}
        
        for line in lines:
            if "SBatchWait" in line:
                parts = line.split(',')
                batch_id = int(parts[0].split('_')[1])
                batch_wait_times[batch_id] = float(parts[2]) / (1000 * 1000 * 1000)
                batch_wait_times_ts[batch_id] = float(parts[1])
        
        data = {
            'batch_id': list(batch_wait_times.keys()),
            'wait_time': list(batch_wait_times.values()),
            'wait_time_ts': list(batch_wait_times_ts.values())
        }
        
        df = pd.DataFrame(data)
        df.set_index('batch_id', inplace=True)
        df.sort_index(inplace=True)
        
        return df

def get_batch_preprocessing_times_with_ts(log_file: str) -> pd.DataFrame:
    with open(log_file) as f:
        lines = f.readlines()
        batch_preprocessing_times = {}
        batch_preprocessing_times_ts = {}
        
        for line in lines:
            if "SBatchPreprocessed" in line:
                parts = line.split(',')
                batch_id = int(parts[0].split('_')[1])
                preprocessing_time = float(parts[-1]) / (1000 * 1000 * 1000)
                batch_preprocessing_times[batch_id] = preprocessing_time
                batch_preprocessing_times_ts[batch_id] = float(parts[-2])
        
        data = {
            'batch_id': list(batch_preprocessing_times.keys()),
            'preprocessing_time': list(batch_preprocessing_times.values()),
            'preprocessing_time_ts': list(batch_preprocessing_times_ts.values())
        }
        
        df = pd.DataFrame(data)
        df.set_index('batch_id', inplace=True)
        df.sort_index(inplace=True)
        
        return df

def get_batch_consumed_times_with_ts(log_file: str) -> pd.DataFrame:
    with open(log_file) as f:
        lines = f.readlines()
    batch_consumed_times = {}
    batch_consumed_times_ts = {}
    for line in lines:
        if "SBatchConsumed" in line:
            parts = line.split(',')
            batch_id = int(parts[0].split('_')[1])
            consumed_time = float(parts[-1]) / (1000 * 1000 * 1000)
            batch_consumed_times[batch_id] = consumed_time
            batch_consumed_times_ts[batch_id] = int(parts[-2])
    
    data = {
        'batch_id': list(batch_consumed_times.keys()),
        'consumed_time': list(batch_consumed_times.values()),
        'consumed_time_ts': list(batch_consumed_times_ts.values())
    }
    
    df = pd.DataFrame(data)
    df.set_index('batch_id', inplace=True)
    df.sort_index(inplace=True)
    
    return df

def get_gpu_util_time(gpu_util_file: str) -> pd.DataFrame:
    util_times = []
    batch_id = 1

    with open(gpu_util_file) as f:
        for line in f:
            if "ms" in line:
                util_time = float(line.split()[0]) / 1000
                util_times.append((batch_id, util_time))
                batch_id += 1

    df = pd.DataFrame(util_times, columns=['batch_id', 'util_time'])
    df.set_index('batch_id', inplace=True)
    df.sort_index(inplace=True)

    return df

def get_gpu_wait_time(gpu_file: str) -> pd.DataFrame:
    idle_times = []
    batch_id = 1
    
    with open(gpu_file) as f:
        for line in f:
            if "ms" in line:
                idle_time = float(line.split()[0]) / 1000
                idle_times.append((batch_id, idle_time))
                batch_id += 1
    
    df = pd.DataFrame(idle_times, columns=['batch_id', 'idle_time'])
    df.set_index('batch_id', inplace=True)
    df.sort_index(inplace=True)
    
    return df

def get_everything(log_files, gpu_file, gpu_util_file):
    # get e2e time
    df_main = None
    for file in log_files:
        if "main" in file:
            # get the batch wait time by ID
            df_batch_wait_times = get_batch_idle_times_with_ts(file)
            # concat with df_main using batch_id
            if df_main is None:
                df_main = df_batch_wait_times
            else:
                df_main = df_main.combine_first(df_batch_wait_times)

            df_batch_consumed_times = get_batch_consumed_times_with_ts(file)
            # concat with df_main using batch_id
            if df_main is None:
                df_main = df_batch_consumed_times
            else:
                df_main = df_main.combine_first(df_batch_consumed_times)
            
            df_batch_pin_memory_times = get_batch_pin_memory_time_with_ts(file)
            # concat with df_main using batch_id
            if df_main is None:
                df_main = df_batch_pin_memory_times
            else:
                df_main = df_main.combine_first(df_batch_pin_memory_times)
            
        if "worker" in file:
            # get batch preprocessing time by ID
            df_batch_preprocessing_times = get_batch_preprocessing_times_with_ts(file)
            # concat with df_main using batch_id
            if df_main is None:
                df_main = df_batch_preprocessing_times
            else:
                df_main = df_main.combine_first(df_batch_preprocessing_times)
    # in df_main, calculate wait_time_ts - (preprocessing_time_ts + preprocessing_time) for each batch and store in a new column
    df_main['wait_time_preprocessing_time_ts_diff'] = df_main['wait_time_ts']/(1000 * 1000 * 1000) - (df_main['preprocessing_time_ts']/(1000 * 1000 * 1000) + df_main['preprocessing_time'])
    df_main['wait_time_preprocessing_time_ts_diff'] = df_main['wait_time_preprocessing_time_ts_diff']
    # in df_main, calculate (consumed_time_ts - (preprocessing_time_ts + preprocessing_time) prefor each batch and store in a new column
    df_main['consumed_time_preprocessing_time_ts_diff'] = df_main['consumed_time_ts']/(1000 * 1000 * 1000) - (df_main['preprocessing_time_ts']/(1000 * 1000 * 1000) + df_main['preprocessing_time'])
    df_main['consumed_time_preprocessing_time_ts_diff'] = df_main['consumed_time_preprocessing_time_ts_diff']
    # get the idle times
    df_gpu = get_gpu_wait_time(gpu_file)
    df_gpu_util = get_gpu_util_time(gpu_util_file)
    # concat with df_main using batch_id
    # df_main = pd.concat([df_main,df_gpu],axis=1)
    df_main = df_main.combine_first(df_gpu)
    df_main = df_main.combine_first(df_gpu_util)
    return df_main

for key, gpu_file, gpu_util_file in zip(log_files_keys, gpu_files, gpu_util_files):
    log_files_list = log_files[key]
    # keep only the text after the last '/'
    key = key.split('/')[-1]
    print(key)
    df_dict_everything[key] = get_everything(log_files_list, gpu_file, gpu_util_file)


In [None]:
import matplotlib.pyplot as plt

os.makedirs("figs/ooo/wait_times", exist_ok=True)

# Iterate through each DataFrame in df_dict_everything
for key, df in df_dict_everything.items():
    print(f"Processing DataFrame for key: {key}")
    
    # Ensure required columns exist in the DataFrame
    if 'wait_time' not in df.columns:
        print(f"Skipping {key} as 'wait_time' column is missing.")
        continue
    
    # Calculate average wait time
    avg_wait_time = df['wait_time'].mean()
    print(f"Average wait time for {key}: {avg_wait_time:.2f} seconds")
    
    # Plot wait times for all batches
    plt.figure(figsize=(10, 6))
    plt.bar(df.index, df['wait_time'], color='skyblue', label='Batch Wait Time')
    plt.axhline(y=avg_wait_time, color='red', linestyle='--', label=f'Average Wait Time: {avg_wait_time:.2f}s')
    plt.title(f'Batch Wait Times for {key}')
    plt.xlabel('Batch ID')
    plt.ylabel('Wait Time (seconds)')
    plt.legend()
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.tight_layout()
    
    # Show the plot
    # plt.show()
    # Save the plot as an image file
    plt.savefig(f"figs/ooo/wait_times/{key}_wait_times.png")
    plt.close()


In [31]:
for key, df in df_dict_everything.items():
    print(f"Processing DataFrame for key: {key}")
    
    if 'wait_time' not in df.columns:
        print(f"Skipping {key} as 'wait_time' column is missing.")
        continue
    
    # skip the first 30 batches
    df = df.iloc[100:]

    # Calculate variance and standard deviation
    wait_time_variance = df['wait_time'].var()
    wait_time_std_dev = df['wait_time'].std()
    print(f"Wait time variance for {key}: {wait_time_variance:.2f} seconds^2")
    print(f"Wait time standard deviation for {key}: {wait_time_std_dev:.2f} seconds")
    
    # Plot
    plt.figure(figsize=(10, 6))
    plt.bar(df.index, df['wait_time'], color='skyblue', label='Batch Wait Time')
    plt.axhline(y=wait_time_std_dev, color='green', linestyle='--', label=f'Standard Deviation: {wait_time_std_dev:.2f}s')
    plt.title(f'Batch Wait Time Variance and Standard Deviation for {key}')
    plt.xlabel('Batch ID')
    plt.ylabel('Wait Time (seconds)')
    plt.legend()
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.tight_layout()
    # plt.show()
    # Save the plot as an image file
    plt.savefig(f"figs/ooo/wait_times/{key}_wait_time_variance_std_dev.png")
    plt.close()


Processing DataFrame for key: b128_gpu2_w4
Wait time variance for b128_gpu2_w4: 0.05 seconds^2
Wait time standard deviation for b128_gpu2_w4: 0.22 seconds
Processing DataFrame for key: b128_gpu2_w8
Wait time variance for b128_gpu2_w8: 0.01 seconds^2
Wait time standard deviation for b128_gpu2_w8: 0.10 seconds
Processing DataFrame for key: b128_gpu2_w12
Wait time variance for b128_gpu2_w12: 0.00 seconds^2
Wait time standard deviation for b128_gpu2_w12: 0.04 seconds
Processing DataFrame for key: b128_gpu2_w16
Wait time variance for b128_gpu2_w16: 0.00 seconds^2
Wait time standard deviation for b128_gpu2_w16: 0.01 seconds
Processing DataFrame for key: b128_gpu2_w20
Wait time variance for b128_gpu2_w20: 0.00 seconds^2
Wait time standard deviation for b128_gpu2_w20: 0.00 seconds
Processing DataFrame for key: b128_gpu2_w24
Wait time variance for b128_gpu2_w24: 0.00 seconds^2
Wait time standard deviation for b128_gpu2_w24: 0.00 seconds
Processing DataFrame for key: b128_gpu2_w28
Wait time vari

In [None]:
import os
import matplotlib.pyplot as plt

# Ensure the output directory exists
output_dir = "figs/ooo/wait_times"
os.makedirs(output_dir, exist_ok=True)

# # Example structure of df_dict_everything (replace with actual data)
# df_dict_everything = {
#     'config1': pd.DataFrame({'wait_time': [0.5, 0.6, 0.7, 0.8]}),
#     'config2': pd.DataFrame({'wait_time': [0.4, 0.5, 0.6, 0.7]}),
#     'config3': pd.DataFrame({'wait_time': [0.3, 0.4, 0.5, 0.6]}),
# }

# Dictionary to store standard deviation for each configuration
std_dev_dict = {}

for key, df in df_dict_everything.items():
    print(f"Processing DataFrame for key: {key}")

    if 'wait_time' not in df.columns:
        print(f"Skipping {key} as 'wait_time' column is missing.")
        continue

    # Skip the first 100 rows
    df = df.iloc[100:] if len(df) > 100 else df

    # Calculate standard deviation
    wait_time_std_dev = df['wait_time'].std()
    std_dev_dict[key] = wait_time_std_dev

# Plotting the changing standard deviation for all configurations
plt.figure(figsize=(10, 6))
# increase resolution
plt.rcParams['figure.dpi'] = 1000
plt.plot(list(std_dev_dict.keys()), list(std_dev_dict.values()), marker='o', linestyle='-', color='blue', label='Standard Deviation')
plt.title('Changing Standard Deviation of Wait Times Across Configurations')
plt.xlabel('Configuration')
plt.ylabel('Standard Deviation (seconds)')
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.xticks(rotation=90, ha='right')
plt.legend()
plt.tight_layout()
plt.savefig(f"{output_dir}/std_dev_across_configurations.png")
plt.show()


Processing DataFrame for key: b128_gpu2_w4
Processing DataFrame for key: b128_gpu2_w8
Processing DataFrame for key: b128_gpu2_w12
Processing DataFrame for key: b128_gpu2_w16
Processing DataFrame for key: b128_gpu2_w20
Processing DataFrame for key: b128_gpu2_w24
Processing DataFrame for key: b128_gpu2_w28
Processing DataFrame for key: b128_gpu3_w4
Processing DataFrame for key: b128_gpu3_w8
Processing DataFrame for key: b128_gpu3_w12
Processing DataFrame for key: b128_gpu3_w16
Processing DataFrame for key: b128_gpu3_w20
Processing DataFrame for key: b128_gpu3_w24
Processing DataFrame for key: b128_gpu3_w28
Processing DataFrame for key: b128_gpu4_w4
Processing DataFrame for key: b128_gpu4_w8
Processing DataFrame for key: b128_gpu4_w12
Processing DataFrame for key: b128_gpu4_w16
Processing DataFrame for key: b128_gpu4_w20
Processing DataFrame for key: b128_gpu4_w24
Processing DataFrame for key: b128_gpu4_w28
Processing DataFrame for key: b256_gpu2_w4
Processing DataFrame for key: b256_gpu2

In [None]:
for key, df in df_dict_everything.items():
    print(f"Processing DataFrame for key: {key}")
    
    if 'wait_time' not in df.columns:
        print(f"Skipping {key} as 'wait_time' column is missing.")
        continue
    
    # Calculate median wait time
    median_wait_time = df['wait_time'].median()
    print(f"Median wait time for {key}: {median_wait_time:.2f} seconds")
    
    # Plot
    plt.figure(figsize=(10, 6))
    plt.bar(df.index, df['wait_time'], color='skyblue', label='Batch Wait Time')
    plt.axhline(y=median_wait_time, color='purple', linestyle='--', label=f'Median Wait Time: {median_wait_time:.2f}s')
    plt.title(f'Batch Median Wait Time for {key}')
    plt.xlabel('Batch ID')
    plt.ylabel('Wait Time (seconds)')
    plt.legend()
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.tight_layout()
    # plt.show()
    # Save the plot as an image file
    plt.savefig(f"figs/ooo/wait_times/{key}_median_wait_times.png")
    plt.close()


In [None]:
for key, df in df_dict_everything.items():
    print(f"Processing DataFrame for key: {key}")
    
    if 'wait_time' not in df.columns:
        print(f"Skipping {key} as 'wait_time' column is missing.")
        continue
    
    # Calculate throughput (batches processed per unit time)
    total_batches = len(df)
    total_time = df['wait_time'].sum()
    throughput = total_batches / total_time if total_time > 0 else 0
    print(f"Throughput for {key}: {throughput:.2f} batches per second")
    
    # Plot
    plt.figure(figsize=(10, 6))
    plt.bar(df.index, df['wait_time'], color='skyblue', label='Batch Wait Time')
    plt.axhline(y=throughput, color='brown', linestyle='--', label=f'Throughput: {throughput:.2f} batches/s')
    plt.title(f'Batch Throughput for {key}')
    plt.xlabel('Batch ID')
    plt.ylabel('Wait Time (seconds)')
    plt.legend()
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.tight_layout()
    # plt.show()
    # Save the plot as an image file
    plt.savefig(f"figs/ooo/wait_times/{key}_throughput.png")
    plt.close()

In [None]:
for key, df in df_dict_everything.items():
    print(f"Processing DataFrame for key: {key}")
    
    if 'wait_time' not in df.columns or 'pin_memory_time' not in df.columns:
        print(f"Skipping {key} as required columns are missing.")
        continue
    
    # Calculate queue occupancy time (difference between wait time and pin memory time)
    # df['queue_occupancy_time'] = df['wait_time'] - df['pin_memory_time']

    # Calculate queue occupancy time (difference between preprocessing time and pin memory time)
    df['queue_occupancy_time'] = df['pin_memory_time_ts'] / (1000 * 1000 * 1000) - df['preprocessing_time_ts'] / (1000 * 1000 * 1000) - df['preprocessing_time']
    
    # Plot
    plt.figure(figsize=(10, 6))
    plt.bar(df.index, df['queue_occupancy_time'], color='skyblue', label='Queue Occupancy Time')
    plt.title(f'Queue Occupancy Time for {key}')
    plt.xlabel('Batch ID')
    plt.ylabel('Queue Occupancy Time (seconds)')
    plt.legend()
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.tight_layout()
    
    # Show the plot
    # plt.show()
    # Save the plot as an image file
    plt.savefig(f"figs/ooo/wait_times/{key}_queue_occupancy_time.png")
    plt.close()

In [None]:
for key, df in df_dict_everything.items():
    if 'preprocessing_time_ts' not in df.columns or 'consumed_time_ts' not in df.columns:
        continue
    
    preprocessing_order = df['preprocessing_time_ts'].sort_values().index.tolist()
    consumed_order = df['consumed_time_ts'].sort_values().index.tolist()

    deviations = [abs(preprocessing_order.index(batch_id) - consumed_order.index(batch_id)) 
                  for batch_id in preprocessing_order]
    
    plt.figure(figsize=(10, 6))
    plt.bar(df.index, deviations, color='skyblue', label='Order Deviation')
    plt.title(f'Processing Order Deviation for {key}')
    plt.xlabel('Batch ID')
    plt.ylabel('Deviation')
    plt.legend()
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.tight_layout()
    # plt.show()
    # Save the plot as an image file
    plt.savefig(f"figs/ooo/wait_times/{key}_processing_order_deviation.png")
    plt.close()

