In [1]:
import pandas as pd
import io


In [2]:
df = pd.read_csv('benchmark_data.csv')
df.head()

Unnamed: 0,task_name,vector_size,cpu_load_%,gpu_util_%,gpu_mem_%,cpu_time_ms,gpu_time_ms
0,vector_add,37218276,16.2,0,96.700202,23.901,77.9776
1,vector_add,25453730,28.8,0,96.700202,14.8487,53.9392
2,vector_add,48413886,25.3,42,96.700202,23.7896,102.7267
3,vector_add,1639423,20.4,0,96.700202,1.8199,1.0926
4,vector_add,13903155,29.6,0,96.700202,8.7203,29.4625


In [None]:
df['scheduler_choice'] = (df['cpu_time_ms'] < df['gpu_time_ms']).astype(int)
''' 
1-cpu
0-gpu
'''
# Save the updated dataframe to a new CSV file
df.to_csv('scheduler_choice_dataset.csv', index=False)


In [5]:
import pandas as pd
import numpy as np

# Define the input and output filenames
input_filename = 'scheduler_choice_dataset.csv'
output_filename = 'dataset_with_overhead.csv'

# Read the dataset into a pandas DataFrame
try:
    df = pd.read_csv(input_filename)
    print("Successfully loaded the dataset.")
except FileNotFoundError:
    print(f"Error: The file '{input_filename}' was not found. Please make sure it's in the correct directory.")
    exit()

# --- Feature Engineering: Create the 'overhead' column ---

# To prevent division by zero, we can replace any 0 in 'gpu_time_ms' 
# with a very small number (epsilon) before dividing.
epsilon = 1e-9 
gpu_time = df['gpu_time_ms'].replace(0, epsilon)

# Calculate overhead
df['overhead'] = df['cpu_time_ms'] / gpu_time

print("\nSuccessfully added the 'overhead' feature.")

# --- Save and display the result ---

# Save the updated DataFrame to a new CSV file
df.to_csv(output_filename, index=False)
print(f"The new dataset has been saved to '{output_filename}'")

# Display the first few rows of the new DataFrame to verify the result
print("\nFirst 5 rows of the updated dataset:")
print(df.head())

Successfully loaded the dataset.

Successfully added the 'overhead' feature.
The new dataset has been saved to 'dataset_with_overhead.csv'

First 5 rows of the updated dataset:
    task_name  vector_size  cpu_load_%  gpu_util_%  gpu_mem_%  cpu_time_ms  \
0  vector_add     37218276        16.2           0  96.700202      23.9010   
1  vector_add     25453730        28.8           0  96.700202      14.8487   
2  vector_add     48413886        25.3          42  96.700202      23.7896   
3  vector_add      1639423        20.4           0  96.700202       1.8199   
4  vector_add     13903155        29.6           0  96.700202       8.7203   

   gpu_time_ms  scheduler_choice  overhead  
0      77.9776                 1  0.306511  
1      53.9392                 1  0.275286  
2     102.7267                 1  0.231581  
3       1.0926                 0  1.665660  
4      29.4625                 1  0.295980  
