In [1]:
from fynesse import access, assess, address

from functools import partial

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd


In [2]:
models = ['resnet50', 'retinanet', 'unet', 'vitb16']

# this is a total of 15 events
events = ['instructions',
'cache-misses',
'cache-references',
'L1-dcache-load-misses',
'L1-dcache-loads',
'L1-dcache-stores',
'LLC-load-misses',
'LLC-loads',
'LLC-store-misses',
'LLC-stores',
'fp_arith_inst_retired.128b_packed_single',
'fp_arith_inst_retired.256b_packed_single',
'fp_arith_inst_retired.512b_packed_single',
'fp_arith_inst_retired.scalar_double',
'fp_arith_inst_retired.scalar_single']

seeds = [0, 42, 137]

## Data Clean-Up

In [3]:
model = 'resnet50'
seed = 42

for trial in [1, 2, 3]:
    for event in events:
        event_hash = address.hash(event)
        access.process(f'../experiments/results/t{trial}/{model}_{event_hash}_s{seed}.txt', f'./data/csv/t{trial}/{model}_{event_hash}_s{seed}.csv')


In [4]:
model = 'resnet50'
event = 'instructions'
event_hash = address.hash(event)

cycles = list(map(lambda x: int(x), [1e4, 2.5e4, 5e4, 7.5e4, 1e5, 2.5e5, 5e5, 7.5e5, 1e6, 2.5e6, 5e6, 7.5e6, 1e7]))

for seed in seeds:
    for cycle in cycles:
        access.process(f'../experiments/results/sp/{model}_{event_hash}_s{seed}_c{cycle}.txt', f'./data/csv/sp/{model}_{event_hash}_s{seed}_c{cycle}.csv')

In [5]:
for model in models:
    for event in events:
        for seed in seeds:
            event_hash = address.hash(event)
            access.process(f'../experiments/results/core/{model}_{event_hash}_s{seed}.txt', f'./data/csv/core/{model}_{event_hash}_s{seed}.csv')

## Data Preprocessing

Each timeseries is parsed as np arrays. 100 such timeseries is stored together as a list.
Each such list can be accessed by a two-layered dictionary object. The first level key is model name. The second level key is event name.
One such dictionary is constructed for each seed value and pickled.

In [6]:
for seed in seeds:

    master_dict = dict()

    for model in models:
        master_dict[model] = dict()
        curr_dict = master_dict[model]

        for event in events:
            event_hash = address.hash(event)

            df = access.load_df(f'./data/csv/core/{model}_{event_hash}_s{seed}.csv')
            inferences = assess.extract_inferences(df)

            curr_dict[event] = list(map(partial(address.make_timeseries, event=event), inferences))
            
    assess.make_pickle(f'./data/pickle/s{seed}.pickle', master_dict)