In [1]:
from glob import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
file_list = glob("data/a100/*/*output_ncu.csv")
profile_results = []
memory_pct = []
compute_pct = []
for file in file_list:
    df = pd.read_csv(file)
    used_columns = [0,6,13,14,15,16]
    unused_columns = df.columns[list(set(range(df.columns.shape[0])).difference(used_columns))]
    df = df.drop(columns=unused_columns)
    metrics = ['Duration', 'Memory Throughput', 'Compute (SM) Throughput']
    df = df[df['Metric Name'].isin(metrics) & df['Section Name'].isin(["GPU Speed Of Light Throughput"])]
    df_memory = df[0::3][['ID', 'Kernel Name', 'Metric Value']].rename(columns={'Metric Value': 'memory'})
    df_duration = df[1::3][['ID', 'Kernel Name', 'Metric Value']].rename(columns={'Metric Value': 'duration'})
    df_compute = df[2::3][['ID', 'Kernel Name', 'Metric Value']].rename(columns={'Metric Value': 'compute'})
    df = pd.merge(df_memory, df_duration, on=['ID', 'Kernel Name'])
    df = pd.merge(df, df_compute, on=['ID', 'Kernel Name'])
    df['memory'] = df['memory'].str.replace(',', '').astype(float)/100
    df['duration'] = df['duration'].str.replace(',', '').astype(float)/1e9
    df['compute'] = df['compute'].str.replace(',', '').astype(float)/100

    if file == 'data/a100/diffusion/batchsize_1_output_ncu.csv':
        kernel_to_drop = [sorted(df['Kernel Name'].unique())[66]]
        ind_to_drop = np.where(df['Kernel Name'].isin(kernel_to_drop))[0]
        df = df.drop(ind_to_drop)

    if file == 'data/a100/diffusion-2/batchsize_1_output_ncu.csv':
        kernel_to_drop = [sorted(df['Kernel Name'].unique())[48]]
        ind_to_drop = np.where(df['Kernel Name'].isin(kernel_to_drop))[0]
        df = df.drop(ind_to_drop)
    
    profile_results.append(df.groupby('Kernel Name'))
    time_fraction = df['duration']/np.sum(df['duration'])
    memory_pct.append(np.dot(time_fraction, df['memory']))
    compute_pct.append(np.dot(time_fraction, df['compute']))

In [3]:
d_2step = profile_results[3]['duration'].sum()
d_1step = profile_results[1]['duration'].sum()
d_50step = d_1step + 49*(d_2step-d_1step)
time_fraction = d_50step/np.sum(d_50step)
sum(d_50step)

7.037667872000001

In [4]:
file_list

['data/a100/bert/batchsize_1_output_ncu.csv',
 'data/a100/diffusion/batchsize_1_output_ncu.csv',
 'data/a100/whisper/batchsize_1_output_ncu.csv',
 'data/a100/diffusion-2/batchsize_1_output_ncu.csv',
 'data/a100/gpt/batchsize_1_output_ncu.csv']

In [5]:
m_2step = profile_results[3]['memory'].mean()
m_1step = profile_results[1]['memory'].mean()
memory_pct.append(np.dot(time_fraction, 0.5*(m_1step+m_2step)))
del memory_pct[1]
del memory_pct[2]

In [6]:
c_2step = profile_results[3]['compute'].mean()
c_1step = profile_results[1]['compute'].mean()
compute_pct.append(np.dot(time_fraction, 0.5*(c_1step+c_2step)))
del compute_pct[1]
del compute_pct[2]

In [7]:
print(file_list[0][:-27],file_list[2][:-27],file_list[4][:-27],'stable diffusion 50 steps')
memory_pct

data/a100/bert data/a100/whisper data/a100/gpt stable diffusion 50 steps


[0.171995496339209,
 0.20844794454463927,
 0.3518461771746365,
 0.4126555404307167]

In [8]:
print(file_list[0][:-27],file_list[2][:-27],file_list[4][:-27],'stable diffusion 50 steps')
compute_pct

data/a100/bert data/a100/whisper data/a100/gpt stable diffusion 50 steps


[0.1754384847841299,
 0.16334168914998157,
 0.18966544607598243,
 0.569321338586786]