In [2]:
# experiment tracker
sys.path.append('../../')
sys.path.append('../')
sys.path.append('../../experiment-impact-tracker/')

from experiment_impact_tracker.data_interface import DataInterface
from experiment_impact_tracker.data_utils import *

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns


### Notes
### local vs CC seem to huge FLOP difference during setup

loading region bounding boxes for computing carbon emissions region, this may take a moment...
 454/454... rate=405.01 Hz, eta=0:00:00, total=0:00:01, wall=23:07 EST
Done!


In [3]:
project_dir = '../'
fastsurfer_exp_dir = '{}/FastSurfer_experiments/'.format(project_dir)
preproc_exp_dir = '{}/preproc_pipeline_experiments/'.format(project_dir)
fastsurfer_results_dir = '{}results/exp_impact_tracker/'.format(fastsurfer_exp_dir)
preproc_results_dir = '{}results/exp_impact_tracker/'.format(preproc_exp_dir)
subject_lists = '{}subject_lists/ukb_pilot_subjects.csv'.format(project_dir)

In [4]:
def get_tracker_data(experiment_name, logdir, use_cuda, read_flops):
    # impact tracker log
    tracker_df =  load_data_into_frame(logdir)

    if use_cuda:
        power_df = tracker_df[0][['timestamp','rapl_power_draw_absolute','rapl_estimated_attributable_power_draw','nvidia_draw_absolute','nvidia_estimated_attributable_power_draw']]
        power_df['total_attributable_power_draw'] = power_df['rapl_estimated_attributable_power_draw'] + power_df['nvidia_estimated_attributable_power_draw']

    else:
        power_df = tracker_df[0][['timestamp','rapl_power_draw_absolute','rapl_estimated_attributable_power_draw']]
        power_df['total_attributable_power_draw'] = power_df['rapl_estimated_attributable_power_draw']
        
    # start time from 0
    power_df['timestamp'] = power_df['timestamp'] - power_df['timestamp'][0]
    power_df['experiment_name'] = experiment_name

    # papi log
    flops_df = None
    total_duration = 0
    if read_flops:
        compute_flops_csv = logdir + 'compute_costs_flop.csv'
        flops_df = pd.read_csv(compute_flops_csv)
        flops_df['experiment_name'] = experiment_name
    
        flops_df['start_time'] = flops_df['start_time'] - flops_df['start_time'][0]

        # Aggregate power draws per epoch for each papi context calculation (i.e. setup, axial, aggr etc))
        epoch_power_draw_list = []
        epoch_timestamps = list(flops_df['start_time'].values[1:]) + [power_df['timestamp'].values[-1]]
        
        for e, epoch in enumerate(epoch_timestamps):
            epoch_power_draw = power_df[power_df['timestamp'] < epoch]['total_attributable_power_draw'].sum() # this is sum upto the epoch (not interval)
            if e == 0:
                epoch_power_draw_list.append(epoch_power_draw)  
            else:
                epoch_power_draw_list.append(epoch_power_draw - epoch_power_draw_list[e-1]) #Subtracting previous stage power-draw 

        flops_df['epoch_power_draw'] = epoch_power_draw_list
    

    data_interface = DataInterface([logdir])
    total_power = data_interface.total_power
    total_carbon = data_interface.kg_carbon
    PUE = data_interface.PUE
    total_duration = (power_df['timestamp'].values[-1]-power_df['timestamp'].values[0])/3600

    tracker_summary_df = pd.DataFrame(columns=['experiment_name','total_power','total_carbon','PUE','total_duration'])
    tracker_summary_df.loc[0] = [experiment_name,total_power,total_carbon,PUE,total_duration]
    return power_df, flops_df, tracker_summary_df
 



### Custom set of expeirments

In [None]:
experiment_dict = {
                   'FastSurfer_local_cpu':('{}/track_sub_000_benchmark/'.format(fastsurfer_results_dir),False), 
                   'FastSurfer_CC_cpu':('{}/CC_cpu_test/'.format(fastsurfer_results_dir),False),
                   'FastSurfer_CC_gpu':('{}/CC_gpu_test/'.format(fastsurfer_results_dir),True),
                   'autorecon1_local_test_1':('{}/ReconAll_test_local_cpu/'.format(preproc_results_dir),False),
                   'autorecon2-3_local_test_1':('{}/ReconAll_test_location_override/'.format(preproc_results_dir),False),
                   'autorecon1_CC_test_1':('{}/ReconAll_test_CC_cpu/'.format(preproc_results_dir),False),
                   'autorecon1_CC_ukb_1':('{}/ReconAll_ukb_CC_cpu/'.format(preproc_results_dir),False)
                   }

read_flops = True

power_df_concat = pd.DataFrame()
flops_df_concat = pd.DataFrame()
tracker_summary_df_concat = pd.DataFrame()

for k,v in experiment_dict.items():
    power_df, flops_df, tracker_summary_df = get_tracker_data(k, v[0], v[1], read_flops)
    power_df_concat = power_df_concat.append(power_df)
    flops_df_concat = flops_df_concat.append(flops_df)
    tracker_summary_df_concat = tracker_summary_df_concat.append(tracker_summary_df)


### UKB pilot experiment sets
- recon-all 
- fastsurfer
    - pruning_{0,25,50}
    - gpu/cpu

In [5]:
def collate_tracker_data(tracker_log_dir, exp_list, use_cuda, read_flops=True):
    experiment_dict = {}
    for id in exp_list:
        # reconall 
        tracker_path = tracker_log_dir + 'sub-{}/'.format(id)
        experiment_dict[id] = (tracker_path, use_cuda)

    power_df_concat = pd.DataFrame()
    flops_df_concat = pd.DataFrame()
    tracker_summary_df_concat = pd.DataFrame()
    i = 0
    for k,v in experiment_dict.items():
        print('{} subject_id: {}'.format(i, k))
        i += 1
        try:
            power_df, flops_df, tracker_summary_df = get_tracker_data(k, v[0], v[1], read_flops)
            power_df_concat = power_df_concat.append(power_df)
            flops_df_concat = flops_df_concat.append(flops_df)
            tracker_summary_df_concat = tracker_summary_df_concat.append(tracker_summary_df)
        except:
            continue

    return tracker_summary_df_concat, flops_df_concat, power_df_concat

In [6]:
subject_df = pd.read_csv(subject_lists)
subject_ids = subject_df['eid']

ignore_subjects = {1009760}
subject_ids = list(set(subject_ids) - ignore_subjects)

print('Found {} subjects'.format(len(subject_ids)))

experiment_sets = {
                   'recon-all':(preproc_results_dir + 'ukb/', False), # log_dir, use_cuda
                   'fastsurfer_gpu_prune_0':(fastsurfer_results_dir + 'ukb/gpu/', False) # log_dir, use_cuda
                  }

for exp_name, exp_config in experiment_sets.items(): 
    tracker_log_dir = exp_config[0]
    use_cuda = exp_config[1]
    tracker_summary_df_concat, flops_df_concat, power_df_concat = collate_tracker_data(tracker_log_dir, subject_ids, use_cuda)
    tracker_summary_df_concat['experiment_set'] = exp_name
    flops_df_concat['experiment_set'] = exp_name
    power_df_concat['experiment_set'] = exp_name
    
    tracker_summary_df_concat.to_csv('{}{}'.format(tracker_log_dir,'tracker_summary_df_concat.csv'))
    flops_df_concat.to_csv('{}{}'.format(tracker_log_dir,'flops_df_concat.csv'))
    power_df_concat.to_csv('{}{}'.format(tracker_log_dir,'power_df_concat.csv'))
    

Found 72 subjects
0 subject_id: 1047553
1 subject_id: 3553027
2 subject_id: 1406980
3 subject_id: 1013261
4 subject_id: 1037070
5 subject_id: 1554964
6 subject_id: 1027606
7 subject_id: 4451351
8 subject_id: 4656152
9 subject_id: 1190941
10 subject_id: 2810142
11 subject_id: 1792032
12 subject_id: 1072930
13 subject_id: 1282100
14 subject_id: 1042997
15 subject_id: 1518902
16 subject_id: 1086009
17 subject_id: 3522877
18 subject_id: 1100101
19 subject_id: 3614538
20 subject_id: 1000011
21 subject_id: 3672652
22 subject_id: 3171661
23 subject_id: 5142605
24 subject_id: 1088341
25 subject_id: 1000537
26 subject_id: 1032539
27 subject_id: 1245278
28 subject_id: 1011809
29 subject_id: 1081696
30 subject_id: 1530211
31 subject_id: 1035366
32 subject_id: 1112423
33 subject_id: 2316147
34 subject_id: 1061245
35 subject_id: 1085566
36 subject_id: 1232770
37 subject_id: 4778114
38 subject_id: 4364423
39 subject_id: 1021576
40 subject_id: 4245899
41 subject_id: 1000083
42 subject_id: 1004436
43 

### Collate saved tracker perf

In [None]:
tracker_summary_df_concat_exp_set = pd.DataFrame()
flops_df_concat_exp_set = pd.DataFrame()
power_df_concat_exp_set = pd.DataFrame()

experiment_sets = {
                   'recon-all':(preproc_results_dir + 'ukb/', False), # log_dir, use_cuda
                   'fastsurfer_gpu_prune_0':(fastsurfer_results_dir + 'ukb/gpu/', False) # log_dir, use_cuda
                  }

for exp_name, exp_config in experiment_sets.items(): 
    tracker_log_dir = exp_config[0]
    tracker_summary_df_concat = pd.read_csv('{}{}'.format(tracker_log_dir,'tracker_summary_df_concat.csv'))
    flops_df_concat = pd.read_csv('{}{}'.format(tracker_log_dir,'flops_df_concat.csv'))
    power_df_concat = pd.read_csv('{}{}'.format(tracker_log_dir,'power_df_concat.csv'))

    tracker_summary_df_concat_exp_set = tracker_summary_df_concat_exp_set.append(tracker_summary_df_concat)
    flops_df_concat_exp_set = flops_df_concat_exp_set.append(flops_df_concat)
    power_df_concat_exp_set = power_df_concat_exp_set.append(power_df_concat)


## Plots
### Summary power

In [None]:
plot_df = power_df_concat.copy()

sns.set(font_scale = 1.5)

with sns.axes_style("whitegrid"):
    g = sns.catplot(y='total_attributable_power_draw', x='experiment_name', data=plot_df, kind='strip',aspect=3)


### Flops vs Power

In [None]:
plot_df = flops_df_concat.copy()

sns.set(font_scale = 1.5)

with sns.axes_style("whitegrid"):
    fig, axes = plt.subplots(figsize=(15,10),sharex=False,sharey=False)
    g = sns.scatterplot(y='epoch_power_draw', x='DP', hue='task', s=100, data=plot_df)
    g.set(xscale='log',yscale='log') 


### Power trace

In [None]:
experiment_name = 'autorecon1_CC_ukb_1'

plot_df = power_df_concat[power_df_concat['experiment_name']==experiment_name].copy()
total_power = tracker_summary_df_concat[tracker_summary_df_concat['experiment_name']==experiment_name]['total_power'].values[0]
PUE = tracker_summary_df_concat[tracker_summary_df_concat['experiment_name']==experiment_name]['PUE'].values[0]

use_cuda = False
plot_epochs = False

sns.set(font_scale = 1.5)
with sns.axes_style("whitegrid"):
    fig, axes = plt.subplots(figsize=(15,5),sharex=False,sharey=False)

    g = sns.lineplot(plot_df['timestamp'],plot_df['rapl_power_draw_absolute'].cumsum(),label='cpu absolute_power')
    g = sns.lineplot(plot_df['timestamp'],plot_df['rapl_estimated_attributable_power_draw'].cumsum(),label='cpu attributed_power')

    if use_cuda:
        g = sns.lineplot(plot_df['timestamp'],plot_df['nvidia_draw_absolute'].cumsum(),label='gpu total_power')
        g = sns.lineplot(plot_df['timestamp'],plot_df['nvidia_estimated_attributable_power_draw'].cumsum(),label='gpu attributed_power')

    if plot_epochs: 
        for e in epochs:
            g = plt.axvline(e, linestyle='-',c='purple')
        
    g = plt.axhline(1000000*total_power/PUE, linestyle='--',c='black',label='total power estimate')
    
    plt.legend()


In [None]:
# experiment tracker
sys.path.append('../../')
sys.path.append('../')
sys.path.append('../../experiment-impact-tracker/')

from experiment_impact_tracker.compute_tracker import ImpactTracker
import tempfile
import time

def func1():
    count = 0
    for i in range(100):
        count += i
    return count

def func2():
    count = 0
    for i in range(100):
        count += i**2
    return count

In [None]:
experiment1 = tempfile.mkdtemp()
experiment2 = tempfile.mkdtemp()

print('Tracker 1')
tracker1  = ImpactTracker(experiment1)
print(tracker1.launched)
with tracker1:
    func1()
    print(tracker1.launched)

time.sleep(1)
print('Tracker 2')
tracker2  = ImpactTracker(experiment2)
print(tracker2.launched)
with tracker2:
    print('Starting context')
    print(tracker2.launched)
    print('Starting func2')
    func2()
    print(tracker2.launched)

In [None]:
from experiment_impact_tracker.emissions.get_region_metrics import \
    get_current_region_info_cached, get_current_location

In [None]:
region_info = get_current_region_info_cached()
region_info

In [None]:
region_info[0]['geometry']

In [None]:
get_current_location()