In [27]:
import polars as pl
from datetime import datetime
import numpy as np
import time
from scipy.integrate import simps
import config



In [28]:
def remove_percentage(x): return int(x.split('%')[0])
def remove_MiB(x): return int(x.split('MiB')[0])
def remove_W(x): return float(x.split('W')[0])


def convert_to_timestamp(x): return int(datetime.strptime(
    x + " +0800", '%Y/%m/%d %H:%M:%S.%f %z').timestamp() * 1000)


In [44]:
def process(name: str):
    index_df = pl.read_csv(
        "index_nvidia_{}.txt".format(name))
    metrics_df = pl.read_csv("nvidiadata_{}.csv".format(name))
    metrics_df = metrics_df.with_columns([
        pl.col('utilizationgpu').apply(remove_percentage),
        pl.col('utilizationmemory').apply(remove_percentage),
        pl.col('memoryfree').apply(remove_MiB),
        pl.col('memoryused').apply(remove_MiB),
        pl.col('powerdraw').apply(remove_W),
        pl.col('timestamp').apply(convert_to_timestamp)
    ])
    metrics_df = metrics_df.drop('temperaturememory').drop(
        'pcielinkgencurrent').drop('index')
    data = []
    for i in range(index_df.shape[0]):

        model_name = index_df[i]['model'][0]
        batch_size = index_df[i]['batch_size'][0]
        start_time = index_df[i]['start_time'][0]
        end_time = index_df[i]['end_time'][0]
        duration = index_df[i]['duration'][0]

        filted_data = metrics_df.filter(
            pl.col('timestamp').is_between(
                start_time / 1000000, end_time / 1000000)
        ).to_numpy()[:, 1:]
        
        
        if filted_data.shape[0] >= 1024:
            length = 1024
            filted_data = filted_data[:1024]

        else:
            length = filted_data.shape[0]
            zero_padding = np.zeros((1024 - length, filted_data.shape[1]), dtype=np.float32)
            filted_data = np.concatenate((filted_data, zero_padding), axis=0)

        data.append(
            {
                'model': model_name,
                'batch_size': batch_size,
                'duration': duration,
                'data': filted_data,
                'length': length
            }
        )
        # data: 
        #       0 utilizationgpu,
        #       1 utilizationmemory,
        #       2 memoryfree,
        #       3 memoryused,
        #       4 temperaturegpu,
        #       5 powerdraw
    return data


In [45]:
data_transfer = []

all_data = {}

for x in ['A40', 'GTX_1080', 'RTX_2080', 'TITANX', 'TITANXp', 'V100']:
    all_data[x] = process(x)
    

In [46]:
data_label = {}

for x in ['A40', 'GTX_1080', 'RTX_2080', 'TITANX', 'TITANXp', 'V100']:
    data_label[x] = []
    for d in all_data[x]:
        i = 0
        while d['model'] + '_' + str(i) in data_label[x]:
            i += 1
        data_label[x].append(
            d['model'] + '_' + str(i)
        )

In [32]:
for x in ['A40', 'GTX_1080', 'RTX_2080', 'TITANX', 'TITANXp', 'V100']:
    for y in ['A40', 'GTX_1080', 'RTX_2080', 'TITANX', 'TITANXp', 'V100']:
        for data_label_x in data_label[x]:
            for data_label_y in data_label[y]:
                if data_label_x.split('_')[0] == data_label_y.split('_')[0]:
                    data_transfer.append(
                        {
                            'from': x,
                            'to': y,
                            'data_label': data_label_x.split('_')[0],
                            'from_batch_size': all_data[x][data_label[x].index(data_label_x)]['batch_size'],
                            'to_batch_size': all_data[y][data_label[y].index(data_label_y)]['batch_size'],
                            'from_duration': all_data[x][data_label[x].index(data_label_x)]['duration'],
                            'to_duration': all_data[y][data_label[y].index(data_label_y)]['duration'],
                            'from_metrics': all_data[x][data_label[x].index(data_label_x)]['data'],
                            'from_length': all_data[x][data_label[x].index(data_label_x)]['length']
                        }
                    )
                
                

In [34]:
len(data_transfer)

3021027

In [35]:
data_transfer[0]

{'from': 'A40',
 'to': 'A40',
 'data_label': 'hf',
 'from_batch_size': 1,
 'to_batch_size': 1,
 'from_duration': 11576.40234375,
 'to_duration': 11576.40234375,
 'from_metrics': array([[1.0000e+02, 9.0000e+01, 4.0077e+04, 5.5570e+03, 3.2000e+01,
         1.5479e+02],
        [9.9000e+01, 9.1000e+01, 4.0077e+04, 5.5570e+03, 3.3000e+01,
         1.6948e+02],
        [9.9000e+01, 9.1000e+01, 4.0077e+04, 5.5570e+03, 3.3000e+01,
         1.8035e+02],
        ...,
        [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00]]),
 'from_length': 110}

In [33]:
np.save("transfer_data.npy", data_transfer)

In [49]:
data = np.load('sequence_data.npy', allow_pickle=True)

In [50]:
t = set()
for d in data:
    if d['data'].shape[0] != 1024:
        print(d['data'].shape[0], d['length'])

In [30]:
modelnamelist = sorted(['timm_nfnet', 'resnet50_quantized_qat', 'shufflenet_v2_x1_0', 'densenet121', 'detectron2_maskrcnn', 'vgg16', 'hf_Bart', 'yolov3', 'hf_DistilBert', 'resnet18', 'hf_GPT2', 'hf_Albert', 'mobilenet_v3_large', 'maml_omniglot', 'timm_vision_transformer', 'maml', 'vision_maskrcnn', 'alexnet', 'nvidia_deeprecommender', 'LearningToPaint', 'resnext50_32x4d', 'hf_Bert', 'drq', 'timm_vovnet', 'pytorch_unet', 'hf_T5', 'mobilenet_v2', 'timm_regnet', 'squeezenet1_1', 'pytorch_CycleGAN_and_pix2pix', 'hf_Reformer', 'dcgan', 'Super_SloMo', 'mobilenet_v2_quantized_qat', 'hf_Longformer', 'resnet50', 'timm_efficientnet', 'attention_is_all_you_need_pytorch', 'BERT_pytorch', 'mnasnet1_0', 'fastNLP_Bert', 'hf_BigBird'])
model_name_to_num = {modelnamelist.index(model): model for model in modelnamelist}
model_num_to_name = {model: modelnamelist.index(model) for model in modelnamelist}

In [32]:
data[0].keys()

dict_keys(['model', 'batch_size', 'duration', 'data', 'length'])

In [1]:
import ShanghaitechData

ds = ShanghaitechData.ShanghaitechClusterDataset('/home/murez/CS225/project/throughput_estimator/Shanghaitech_SIST_datacenter/transfer_data.npy')

import torch
from torch.utils.data import DataLoader

In [2]:

dl = DataLoader(ds, batch_size=64, shuffle=True)


In [25]:
for d in dl:
    (device_feature,
    from_metrics, 
    from_length,
    duration_rate) = d
    break

In [29]:
from_metrics.shape

torch.Size([64, 1024, 6])

In [26]:
from model import DashEstimator

In [27]:
m = DashEstimator()



In [28]:
m(device_feature, from_metrics, from_length)

tensor([[-55.9812],
        [-77.7703],
        [-78.9193],
        [-76.9743],
        [-60.4700],
        [-79.2917],
        [-54.5944],
        [-76.1798],
        [-79.6758],
        [-77.6575],
        [-56.8798],
        [-71.5048],
        [-63.9357],
        [-76.7603],
        [-81.4734],
        [-61.4035],
        [-52.9177],
        [-54.5732],
        [-80.5189],
        [-78.7876],
        [-71.6160],
        [-79.4317],
        [-65.1680],
        [-80.1811],
        [-78.8246],
        [-79.2175],
        [-79.4171],
        [-78.8692],
        [-77.1077],
        [-64.1454],
        [-52.4029],
        [-64.2431],
        [-77.8095],
        [-55.4689],
        [-81.0892],
        [-71.7560],
        [-52.6316],
        [-71.7816],
        [-78.3162],
        [-52.3811],
        [-81.7865],
        [-60.4838],
        [-55.1627],
        [-78.4862],
        [-78.8800],
        [-77.5968],
        [-71.2326],
        [-66.3137],
        [-76.8101],
        [-77.0593],


In [30]:
from config import *
device_from = 'A40'
device_to = ['A40', 'GTX_1080', 'RTX_2080', 'TITANX', 'TITANXp', 'V100']
from_batch_size = 64
to_batch_size = [16, 32, 256, 64, 64, 64]

transfer_feature = []
for i, to in enumerate(device_to):
    from_device_config = GPU_config_list[device_from]
    to_device_config = GPU_config_list[to]
    feature = np.concatenate([from_device_config, to_device_config, from_batch_size, to_batch_size[i]], axis=None)
    transfer_feature.append(feature)
transfer_feature = torch.from_numpy(np.array(transfer_feature)).float()

In [38]:
torch.cat([from_metrics[0].unsqueeze(0) for x in range(10)], dim=0).shape

torch.Size([10, 1024, 6])

In [32]:
transfer_feature.shape

torch.Size([6, 16])

In [42]:
int(len(ds) * 0.1)

302102

In [40]:
torch.utils.data.random_split(range(10), [3, 7], generator=torch.Generator().manual_seed(42))

[<torch.utils.data.dataset.Subset at 0x7fe0d8630040>,
 <torch.utils.data.dataset.Subset at 0x7fe0d86300a0>]

In [2]:
import numpy as np
np.inf > 3

True

In [1]:
import predictor
predict = predictor.DashPredictor()
predict.train(8192)



epoch: 0


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
100%|██████████| 332/332 [05:42<00:00,  1.03s/it]


Epoch:  0 | train loss: 46.7809
epoch: 1


100%|██████████| 332/332 [06:09<00:00,  1.11s/it]


Epoch:  1 | train loss: 45.3443
epoch: 2


100%|██████████| 332/332 [05:39<00:00,  1.02s/it]


Epoch:  2 | train loss: 30.3647
epoch: 3


100%|██████████| 332/332 [05:23<00:00,  1.03it/s]


Epoch:  3 | train loss: 58.1206
epoch: 4


100%|██████████| 332/332 [05:32<00:00,  1.00s/it]


Epoch:  4 | train loss: 28.6978
epoch: 5


 92%|█████████▏| 305/332 [04:35<00:27,  1.01s/it]