In [15]:
import os
import pandas as pd
import numpy as np

def read_worker_logs(directory):
    data = []
    for filename in os.listdir(directory):
        if filename.startswith('worker') and filename.endswith('.log'):
            with open(os.path.join(directory, filename), 'r') as f:
                for line in f:
                    timestamp, worker_id = line.strip().split(',')
                    data.append({'timestamp': float(timestamp), 'worker_id': int(worker_id)})
    return pd.DataFrame(data)

def calculate_samples_per_second(df):
    df = df.sort_values('timestamp')
    total_time = df['timestamp'].max() - df['timestamp'].min()
    total_samples = len(df)
    samples_per_second = total_samples / total_time
    return samples_per_second

def create_latex_table(results, output_path):
    latex_table = r"""
\begin{table}[h]
\centering
\begin{tabular}{lcccc}
\hline
Experiment & Samples/sec & Speedup Factor\\
\hline
"""
    
    baseline_samples = results['baseline']['samples_per_sec']
    
    for name, value in results.items():
        speedup = value['samples_per_sec'] / baseline_samples
        scaling = speedup / value['worker_count'] if value['worker_count'] > 0 else 1.0
        latex_table += f"{name} & {value['samples_per_sec']:.2f} $\pm$ {value['std_dev']:.2f} & {speedup:.2f}x \\\\\n"
    
    latex_table += r"""\hline
\end{tabular}
\caption{Samples Generated per Second, Speedup Factor}
\label{tab:samples_generated}
\end{table}
"""
    
    with open(output_path, 'w') as f:
        f.write(latex_table)

# Read baseline data from previous experiment
def read_baseline_data(csv_path):
    df = pd.read_csv(csv_path)
    df['time'] = pd.to_datetime(df['time'], unit='s')
    df = df.sort_values('time')
    df['time_diff'] = df['time'].diff().dt.total_seconds()
    df['samples_diff'] = df['samples_read'].diff()
    df['samples_per_second'] = df['samples_diff'] / df['time_diff']
    df = df.replace([np.inf, -np.inf], np.nan).dropna()
    return df['samples_per_second'].mean(), df['samples_per_second'].std()

def process_wirehead_local(csv_path):
    df = pd.read_csv(csv_path)
    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
    df = df.sort_values('timestamp')
    df['time_diff'] = df['timestamp'].diff().dt.total_seconds()
    df['samples_diff'] = df['sample'].diff()
    df['samples_per_second'] = df['samples_diff'] / df['time_diff']
    df = df.replace([np.inf, -np.inf], np.nan).dropna()
    return df['samples_per_second'].mean(), df['samples_per_second'].std()

# Main execution
baseline_path = 'baseline-2024-06-26_15-14/metrics.csv'
multigpu_path = 'generator/1xgen/generator'

baseline_avg, baseline_std = read_baseline_data(baseline_path)

df_multigpu = read_worker_logs(multigpu_path)
multigpu_avg = calculate_samples_per_second(df_multigpu)
multigpu_std = df_multigpu.groupby('worker_id').apply(calculate_samples_per_second).std()

# In the main execution section, add:
wirehead_local_path = 'wirehead_train-2024-06-25_20-07/generator.csv'
wirehead_local_avg, wirehead_local_std = process_wirehead_local(wirehead_local_path)

# Update the results dictionary:


results = {
    'baseline': {
        'samples_per_sec': baseline_avg,
        'std_dev': baseline_std,
        'worker_count': 1
    },
    'wirehead_local': {
        'samples_per_sec': wirehead_local_avg,
        'std_dev': wirehead_local_std,
        'worker_count': 1
    },
    'wirehead_distributed': {
        'samples_per_sec': multigpu_avg,
        'std_dev': multigpu_std,
        'worker_count': df_multigpu['worker_id'].nunique()
    }
}

create_latex_table(results, 'svg/samples_generated_table.tex')

print("Latex table has been saved as 'samples_generated_table.tex'")

Latex table has been saved as 'samples_generated_table.tex'


  multigpu_std = df_multigpu.groupby('worker_id').apply(calculate_samples_per_second).std()


In [29]:
import os
import pandas as pd

def read_worker_logs(base_path, config):
    directory = os.path.join(base_path, f"{config}xgen", "generator")
    data = []
    for filename in os.listdir(directory):
        if filename.startswith('worker') and filename.endswith('.log'):
            with open(os.path.join(directory, filename), 'r') as f:
                for line in f:
                    timestamp, worker_id = line.strip().split(',')
                    data.append({'timestamp': float(timestamp), 'worker_id': int(worker_id)})
    return pd.DataFrame(data)

def calculate_samples_per_second(df):
    df = df.sort_values('timestamp')
    total_time = df['timestamp'].max() - df['timestamp'].min()
    total_samples = len(df)
    print(total_samples/ total_time)
    return total_samples / total_time

def process_configuration(base_path, config):
    df = read_worker_logs(base_path, config)
    samples_per_second = calculate_samples_per_second(df)
    worker_count = df['worker_id'].nunique()
    return samples_per_second, worker_count

def create_latex_table(results, output_path):
    latex_table = r"""
\begin{table}[h]
\centering
\begin{tabular}{lccc}
\hline
Experiment & Workers & Samples/sec & Scaling Factor \\
\hline
"""
    
    baseline_samples = results['1x wirehead generator']['samples_per_sec']
    
    for name, value in results.items():
        scaling_factor = value['samples_per_sec'] / baseline_samples
        latex_table += f"{name} & {value['worker_count']} & {value['samples_per_sec']:.2f} & {scaling_factor:.2f} \\\\\n"
    
    latex_table += r"""\hline
\end{tabular}
\caption{Samples Generated per Second, Worker Count, and Scaling Factor for Each Experiment}
\label{tab:samples_generated}
\end{table}
"""
    
    with open(output_path, 'w') as f:
        f.write(latex_table)

# Main execution
base_path = 'generator'
configurations = [1, 2, 4, 8, 16]

results = {}

for config in configurations:
    samples_per_second, worker_count = process_configuration(base_path, config)
    results[f'{config}x wirehead generator'] = {
        'samples_per_sec': samples_per_second,
        'worker_count': worker_count
    }

create_latex_table(results, 'svg/scaling_samples_generated_table.tex')
print("Latex table has been saved as 'samples_generated_table.tex'")

0.2512957848743425
0.5000564480791648
0.9832177906530393
1.946361922288535
3.944512843935724
Latex table has been saved as 'samples_generated_table.tex'
