In [1]:
import tqdm
import pandas as pd
import os
import sys

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
parent_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
sys.path.append(parent_dir)

from predict.prediction_models import (
    DensePredictor,
    ConvPredictor,
)

In [3]:
dense_predictor = DensePredictor(with_features=True)
conv_predictor = ConvPredictor(with_features=True)

In [4]:
dense_wo_features_predictor = DensePredictor(with_features=False)
conv_wo_features_predictor = ConvPredictor(with_features=False)

In [5]:
gpu_name = 'V100'

In [None]:
# Initialize an empty dictionary to store the features of each convolutional layer
vgg16_conv_layers = {}
vgg16_dense_layers = {}
batch_size = 64
dataset_size = 1048

# Helper function to add layer details to the dictionary
def add_conv_layer_vgg(layer_name, matrix_size, kernel_size, channels_in, channels_out, strides, padding, activation, optimizer):
    vgg16_conv_layers[layer_name] = {
        'batchsize': batch_size,
        'matsize': matrix_size,
        'kernelsize': kernel_size,
        'channels_in': channels_in,
        'channels_out': channels_out,
        'strides': strides,
        'padding': padding,
        'activation_fct': activation,
        'optimizer': optimizer
    }
    
def add_dense_layer_vgg(layer_name, input_size, output_size, activation, optimizer):
    vgg16_dense_layers[layer_name] = {
        'batchsize': batch_size,
        'dim_input': input_size,
        'dim_output': output_size,
        'activation_fct': activation,
        'optimizer': optimizer
    }

# Block 1: 2 Conv layers + Max Pooling
add_conv_layer_vgg("block1_conv1", 224, 3, 3, 64, 1, 1, "ReLU", "Adam")
add_conv_layer_vgg("block1_conv2", 224, 3, 64, 64, 1, 1, "ReLU", "Adam")
# Max Pooling (2x2, stride 2) => reduces to 112x112

# Block 2: 2 Conv layers + Max Pooling
add_conv_layer_vgg("block2_conv1", 112, 3, 64, 128, 1, 1, "ReLU", "Adam")
add_conv_layer_vgg("block2_conv2", 112, 3, 128, 128, 1, 1, "ReLU", "Adam")
# Max Pooling (2x2, stride 2) => reduces to 56x56

# Block 3: 3 Conv layers + Max Pooling
add_conv_layer_vgg("block3_conv1", 56, 3, 128, 256, 1, 1, "ReLU", "Adam")
add_conv_layer_vgg("block3_conv2", 56, 3, 256, 256, 1, 1, "ReLU", "Adam")
add_conv_layer_vgg("block3_conv3", 56, 3, 256, 256, 1, 1, "ReLU", "Adam")
# Max Pooling (2x2, stride 2) => reduces to 28x28

# Block 4: 3 Conv layers + Max Pooling
add_conv_layer_vgg("block4_conv1", 28, 3, 256, 512, 1, 1, "ReLU", "Adam")
add_conv_layer_vgg("block4_conv2", 28, 3, 512, 512, 1, 1, "ReLU", "Adam")
add_conv_layer_vgg("block4_conv3", 28, 3, 512, 512, 1, 1, "ReLU", "Adam")
# Max Pooling (2x2, stride 2) => reduces to 14x14

# Block 5: 3 Conv layers + Max Pooling
add_conv_layer_vgg("block5_conv1", 14, 3, 512, 512, 1, 1, "ReLU", "Adam")
add_conv_layer_vgg("block5_conv2", 14, 3, 512, 512, 1, 1, "ReLU", "Adam")
add_conv_layer_vgg("block5_conv3", 14, 3, 512, 512, 1, 1, "ReLU", "Adam")
# Max Pooling (2x2, stride 2) => reduces to 7x7

# Print out the dictionary to verify
for layer_name, features in vgg16_conv_layers.items():
    print(f"{layer_name}: {features}")
    
    
# add dense layers
add_dense_layer_vgg("fc1", 25088, 4096, "ReLU", "Adam")
add_dense_layer_vgg("fc2", 4096, 4096, "ReLU", "Adam")
add_dense_layer_vgg("fc3", 4096, 10, "Softmax", "Adam")

block1_conv1: {'batchsize': 64, 'matsize': 224, 'kernelsize': 3, 'channels_in': 3, 'channels_out': 64, 'strides': 1, 'padding': 1, 'activation_fct': 'ReLU', 'optimizer': 'Adam'}
block1_conv2: {'batchsize': 64, 'matsize': 224, 'kernelsize': 3, 'channels_in': 64, 'channels_out': 64, 'strides': 1, 'padding': 1, 'activation_fct': 'ReLU', 'optimizer': 'Adam'}
block2_conv1: {'batchsize': 64, 'matsize': 112, 'kernelsize': 3, 'channels_in': 64, 'channels_out': 128, 'strides': 1, 'padding': 1, 'activation_fct': 'ReLU', 'optimizer': 'Adam'}
block2_conv2: {'batchsize': 64, 'matsize': 112, 'kernelsize': 3, 'channels_in': 128, 'channels_out': 128, 'strides': 1, 'padding': 1, 'activation_fct': 'ReLU', 'optimizer': 'Adam'}
block3_conv1: {'batchsize': 64, 'matsize': 56, 'kernelsize': 3, 'channels_in': 128, 'channels_out': 256, 'strides': 1, 'padding': 1, 'activation_fct': 'ReLU', 'optimizer': 'Adam'}
block3_conv2: {'batchsize': 64, 'matsize': 56, 'kernelsize': 3, 'channels_in': 256, 'channels_out': 25

In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import time

# Check if GPU is available and set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define transformations for the training data
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to 224x224
    transforms.ToTensor(),  # Convert images to tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize
])

# Create a random dataset (For demonstration, using FakeData)
train_dataset = datasets.FakeData(size=dataset_size, transform=transform, image_size=(3, 224, 224))  # 1000 samples
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Load VGG16 model
model = models.vgg16(pretrained=False)  # Set pretrained=True if you want to use ImageNet weights
model.classifier[6] = nn.Linear(model.classifier[6].in_features, 10)  # Change the final layer to 10 classes
model.to(device)


# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
epochs = 100
start_time = time.time()
for epoch in tqdm.tqdm(range(epochs)):
    model.train()  # Set the model to training mode
    running_loss = 0.0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    epoch_loss = running_loss / len(train_loader)
    print(f'Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss:.4f}')
end_time = time.time()
print("Training complete!")


  1%|          | 1/100 [00:08<14:19,  8.68s/it]

Epoch [1/100], Loss: 15.0384


  2%|▏         | 2/100 [00:18<15:07,  9.26s/it]

Epoch [2/100], Loss: 2.3020


  3%|▎         | 3/100 [00:27<15:08,  9.37s/it]

Epoch [3/100], Loss: 2.2997


  4%|▍         | 4/100 [00:36<14:25,  9.02s/it]

Epoch [4/100], Loss: 2.2998


  5%|▌         | 5/100 [00:45<14:06,  8.91s/it]

Epoch [5/100], Loss: 2.2973


  6%|▌         | 6/100 [00:54<14:05,  8.99s/it]

Epoch [6/100], Loss: 2.2968


  7%|▋         | 7/100 [01:03<14:14,  9.19s/it]

Epoch [7/100], Loss: 2.2985


  8%|▊         | 8/100 [01:12<13:53,  9.06s/it]

Epoch [8/100], Loss: 2.2977


  9%|▉         | 9/100 [01:20<13:21,  8.81s/it]

Epoch [9/100], Loss: 2.2978


 10%|█         | 10/100 [01:29<13:03,  8.71s/it]

Epoch [10/100], Loss: 2.2974


 11%|█         | 11/100 [01:37<12:49,  8.65s/it]

Epoch [11/100], Loss: 2.2972


 12%|█▏        | 12/100 [01:47<13:13,  9.01s/it]

Epoch [12/100], Loss: 2.2961


 13%|█▎        | 13/100 [01:56<13:00,  8.97s/it]

Epoch [13/100], Loss: 2.2967


 14%|█▍        | 14/100 [02:05<12:53,  9.00s/it]

Epoch [14/100], Loss: 2.2983


 15%|█▌        | 15/100 [02:13<12:24,  8.75s/it]

Epoch [15/100], Loss: 2.2944


 16%|█▌        | 16/100 [02:22<12:04,  8.63s/it]

Epoch [16/100], Loss: 2.2971


 17%|█▋        | 17/100 [02:30<11:52,  8.58s/it]

Epoch [17/100], Loss: 2.2971


 18%|█▊        | 18/100 [02:39<11:53,  8.70s/it]

Epoch [18/100], Loss: 2.2990


 19%|█▉        | 19/100 [02:48<11:46,  8.72s/it]

Epoch [19/100], Loss: 2.2961


 20%|██        | 20/100 [02:57<11:42,  8.78s/it]

Epoch [20/100], Loss: 2.2966


 21%|██        | 21/100 [03:05<11:28,  8.71s/it]

Epoch [21/100], Loss: 2.2942


 22%|██▏       | 22/100 [03:14<11:22,  8.75s/it]

Epoch [22/100], Loss: 2.2969


 23%|██▎       | 23/100 [03:23<11:11,  8.72s/it]

Epoch [23/100], Loss: 2.2968


 24%|██▍       | 24/100 [03:31<10:54,  8.61s/it]

Epoch [24/100], Loss: 2.2974


 25%|██▌       | 25/100 [03:40<10:43,  8.57s/it]

Epoch [25/100], Loss: 2.2957


 26%|██▌       | 26/100 [03:49<10:41,  8.67s/it]

Epoch [26/100], Loss: 2.2976


 27%|██▋       | 27/100 [03:58<10:43,  8.81s/it]

Epoch [27/100], Loss: 2.2978


 28%|██▊       | 28/100 [04:07<10:34,  8.82s/it]

Epoch [28/100], Loss: 2.2969


 29%|██▉       | 29/100 [04:15<10:11,  8.61s/it]

Epoch [29/100], Loss: 2.2969


 30%|███       | 30/100 [04:23<09:51,  8.45s/it]

Epoch [30/100], Loss: 2.2963


 31%|███       | 31/100 [04:31<09:42,  8.45s/it]

Epoch [31/100], Loss: 2.2960


 32%|███▏      | 32/100 [04:39<09:24,  8.31s/it]

Epoch [32/100], Loss: 2.2976


 33%|███▎      | 33/100 [04:48<09:22,  8.39s/it]

Epoch [33/100], Loss: 2.2971


 34%|███▍      | 34/100 [04:57<09:24,  8.56s/it]

Epoch [34/100], Loss: 2.2962


 35%|███▌      | 35/100 [05:05<09:14,  8.54s/it]

Epoch [35/100], Loss: 2.2966


 36%|███▌      | 36/100 [05:13<08:59,  8.42s/it]

Epoch [36/100], Loss: 2.2984


 37%|███▋      | 37/100 [05:22<08:48,  8.39s/it]

Epoch [37/100], Loss: 2.2973


 38%|███▊      | 38/100 [05:30<08:36,  8.32s/it]

Epoch [38/100], Loss: 2.2953


 39%|███▉      | 39/100 [05:38<08:28,  8.33s/it]

Epoch [39/100], Loss: 2.2990


 40%|████      | 40/100 [05:46<08:16,  8.27s/it]

Epoch [40/100], Loss: 2.2963


 41%|████      | 41/100 [05:55<08:10,  8.31s/it]

Epoch [41/100], Loss: 2.2975


 42%|████▏     | 42/100 [06:03<08:03,  8.34s/it]

Epoch [42/100], Loss: 2.2979


 43%|████▎     | 43/100 [06:11<07:51,  8.28s/it]

Epoch [43/100], Loss: 2.2956


 44%|████▍     | 44/100 [06:20<07:45,  8.32s/it]

Epoch [44/100], Loss: 2.2964


 45%|████▌     | 45/100 [06:28<07:32,  8.23s/it]

Epoch [45/100], Loss: 2.2967


 46%|████▌     | 46/100 [06:36<07:21,  8.18s/it]

Epoch [46/100], Loss: 2.2962


 47%|████▋     | 47/100 [06:44<07:17,  8.25s/it]

Epoch [47/100], Loss: 2.2975


 48%|████▊     | 48/100 [06:52<07:05,  8.17s/it]

Epoch [48/100], Loss: 2.2982


 49%|████▉     | 49/100 [07:00<06:56,  8.18s/it]

Epoch [49/100], Loss: 2.2960


 50%|█████     | 50/100 [07:09<06:51,  8.23s/it]

Epoch [50/100], Loss: 2.2963


 51%|█████     | 51/100 [07:17<06:46,  8.31s/it]

Epoch [51/100], Loss: 2.2967


 52%|█████▏    | 52/100 [07:25<06:36,  8.27s/it]

Epoch [52/100], Loss: 2.2949


 53%|█████▎    | 53/100 [07:34<06:29,  8.28s/it]

Epoch [53/100], Loss: 2.2978


 54%|█████▍    | 54/100 [07:42<06:19,  8.24s/it]

Epoch [54/100], Loss: 2.2959


 55%|█████▌    | 55/100 [07:50<06:12,  8.27s/it]

Epoch [55/100], Loss: 2.2933


 56%|█████▌    | 56/100 [07:58<06:00,  8.20s/it]

Epoch [56/100], Loss: 2.2984


 57%|█████▋    | 57/100 [08:06<05:51,  8.17s/it]

Epoch [57/100], Loss: 2.2954


 58%|█████▊    | 58/100 [08:15<05:49,  8.32s/it]

Epoch [58/100], Loss: 2.2942


 59%|█████▉    | 59/100 [08:24<05:54,  8.65s/it]

Epoch [59/100], Loss: 2.2963


 60%|██████    | 60/100 [08:34<05:57,  8.94s/it]

Epoch [60/100], Loss: 2.2996


 61%|██████    | 61/100 [08:43<05:46,  8.89s/it]

Epoch [61/100], Loss: 2.2954


 62%|██████▏   | 62/100 [08:52<05:38,  8.90s/it]

Epoch [62/100], Loss: 2.2944


 63%|██████▎   | 63/100 [09:00<05:21,  8.68s/it]

Epoch [63/100], Loss: 2.2970


 64%|██████▍   | 64/100 [09:08<05:08,  8.57s/it]

Epoch [64/100], Loss: 2.2937


 65%|██████▌   | 65/100 [09:17<04:58,  8.54s/it]

Epoch [65/100], Loss: 2.2972


 66%|██████▌   | 66/100 [09:25<04:50,  8.53s/it]

Epoch [66/100], Loss: 2.2975


 67%|██████▋   | 67/100 [09:34<04:44,  8.62s/it]

Epoch [67/100], Loss: 2.2962


 68%|██████▊   | 68/100 [09:43<04:40,  8.76s/it]

Epoch [68/100], Loss: 2.2979


 69%|██████▉   | 69/100 [09:51<04:25,  8.56s/it]

Epoch [69/100], Loss: 2.2976


 70%|███████   | 70/100 [10:01<04:25,  8.86s/it]

Epoch [70/100], Loss: 2.2948


 71%|███████   | 71/100 [10:10<04:17,  8.90s/it]

Epoch [71/100], Loss: 2.2978


 72%|███████▏  | 72/100 [10:20<04:16,  9.18s/it]

Epoch [72/100], Loss: 2.2966


 73%|███████▎  | 73/100 [10:28<04:04,  9.05s/it]

Epoch [73/100], Loss: 2.2962


 74%|███████▍  | 74/100 [10:37<03:49,  8.84s/it]

Epoch [74/100], Loss: 2.2962


 75%|███████▌  | 75/100 [10:45<03:35,  8.62s/it]

Epoch [75/100], Loss: 2.2961


 76%|███████▌  | 76/100 [10:53<03:25,  8.56s/it]

Epoch [76/100], Loss: 2.2975


 77%|███████▋  | 77/100 [11:01<03:15,  8.49s/it]

Epoch [77/100], Loss: 2.2966


 78%|███████▊  | 78/100 [11:10<03:04,  8.39s/it]

Epoch [78/100], Loss: 2.2954


 79%|███████▉  | 79/100 [11:18<02:58,  8.52s/it]

Epoch [79/100], Loss: 2.2947


 80%|████████  | 80/100 [11:27<02:48,  8.42s/it]

Epoch [80/100], Loss: 2.2956


 81%|████████  | 81/100 [11:36<02:43,  8.59s/it]

Epoch [81/100], Loss: 2.2963


 82%|████████▏ | 82/100 [11:44<02:34,  8.57s/it]

Epoch [82/100], Loss: 2.2951


 83%|████████▎ | 83/100 [11:52<02:23,  8.43s/it]

Epoch [83/100], Loss: 2.2977


 84%|████████▍ | 84/100 [12:01<02:16,  8.51s/it]

Epoch [84/100], Loss: 2.2957


 85%|████████▌ | 85/100 [12:10<02:08,  8.55s/it]

Epoch [85/100], Loss: 2.2977


 86%|████████▌ | 86/100 [12:19<02:01,  8.70s/it]

Epoch [86/100], Loss: 2.2945


 87%|████████▋ | 87/100 [12:28<01:56,  8.93s/it]

Epoch [87/100], Loss: 2.2956


 88%|████████▊ | 88/100 [12:37<01:48,  9.07s/it]

Epoch [88/100], Loss: 2.2988


 89%|████████▉ | 89/100 [12:47<01:41,  9.21s/it]

Epoch [89/100], Loss: 2.2971


 90%|█████████ | 90/100 [12:56<01:31,  9.13s/it]

Epoch [90/100], Loss: 2.2986


 91%|█████████ | 91/100 [13:05<01:20,  8.99s/it]

Epoch [91/100], Loss: 2.2963


 92%|█████████▏| 92/100 [13:14<01:11,  8.99s/it]

Epoch [92/100], Loss: 2.2960


 93%|█████████▎| 93/100 [13:22<01:01,  8.73s/it]

Epoch [93/100], Loss: 2.2967


 94%|█████████▍| 94/100 [13:31<00:52,  8.82s/it]

Epoch [94/100], Loss: 2.2976


 95%|█████████▌| 95/100 [13:40<00:44,  8.88s/it]

Epoch [95/100], Loss: 2.2965


 96%|█████████▌| 96/100 [13:49<00:35,  8.87s/it]

Epoch [96/100], Loss: 2.2968


 97%|█████████▋| 97/100 [13:58<00:26,  8.92s/it]

Epoch [97/100], Loss: 2.2962


 98%|█████████▊| 98/100 [14:07<00:18,  9.08s/it]

Epoch [98/100], Loss: 2.2963


 99%|█████████▉| 99/100 [14:16<00:08,  8.90s/it]

Epoch [99/100], Loss: 2.2977


100%|██████████| 100/100 [14:25<00:00,  8.65s/it]

Epoch [100/100], Loss: 2.2959
Training complete!





In [8]:
time_elapsed = end_time - start_time # Calculate the total time taken for training in seconds
print(f"Total time taken for training VGG16: {time_elapsed:.2f} seconds")

Total time taken for training VGG16: 865.44 seconds


In [9]:
time_for_one_epoch = time_elapsed / epochs
print(f"Average time taken for one epoch of VGG16: {time_for_one_epoch:.2f} seconds")

Average time taken for one epoch of VGG16: 8.65 seconds


In [10]:
gpu_specs = pd.read_csv('../gpus/GPUs.csv')
gpu_specs

Unnamed: 0,GPU,Provisioning,Base Clock (MHz),Boost Clock (MHz),Memory Clock (MHz),Memory (GB),Memory Type,Memory Bus (bit),GPU Memory Bandwidth (GB/s),Bus,...,TMUs,ROPs,SM,TC,RT,PR,TR,FP16,FP32,FP64
0,L4,Cloud,795,2040,1563,24,GDDR6,192,300,PCIe 4.0,...,240,80,60,240,60,163,490,30290,30290,473
1,P4,Cloud,886,1114,1502,8,GDDR5,256,192,PCIe 3.0,...,160,64,20,0,0,71,178,89,5700,178
2,P100,Cloud,1190,1329,715,16,HBM2,4096,732,PCIe 3.0,...,224,96,56,0,0,127,297,19050,9526,4763
3,RTX4090,Local,2235,2520,1313,24,GDDR6X,384,1001,PCIe 4.0,...,512,176,128,512,128,443,1290,82580,82580,1290
4,RTXA4000,Local,735,1560,1750,16,GDDR6,256,448,PCIe 4.0,...,192,96,48,192,48,150,300,19170,19170,300
5,T4,Cloud,585,1590,1250,16,GDDR6,256,320,PCIe 3.0,...,160,64,40,320,40,101,254,65130,8141,254
6,V100,Cloud,1245,1380,876,16,HBM2,4096,897,PCIe 3.0,...,320,128,80,640,0,176,441,28260,14130,7066


In [11]:
conv_layer_features = vgg16_conv_layers
dense_layer_features = vgg16_dense_layers

In [12]:
gpu = gpu_specs[gpu_specs['GPU'] == gpu_name]
gpu = gpu.squeeze()
gpu = pd.DataFrame([gpu] * len(conv_layer_features))

conv_layer_features = pd.DataFrame.from_dict(conv_layer_features, orient='index')
conv_layer_features = pd.concat([conv_layer_features.reset_index(drop=True), gpu.reset_index(drop=True)], axis=1)

gpu = gpu_specs[gpu_specs['GPU'] == gpu_name]
gpu = gpu.squeeze()
gpu = pd.DataFrame([gpu] * len(dense_layer_features))

dense_layer_features = pd.DataFrame.from_dict(dense_layer_features, orient='index')
dense_layer_features = pd.concat([dense_layer_features.reset_index(drop=True), gpu.reset_index(drop=True)], axis=1)

In [13]:
conv_layer_features['activation_fct'] = 1
conv_layer_features['optimizer'] = 4
conv_layer_features['precision'] = 32
conv_layer_features['padding'] = conv_layer_features['padding'].apply(lambda x: 'valid' if x == 0 else 'same')
conv_layer_features['use_bias'] = 0

In [14]:
dense_layer_features['activation_fct'] = 1
dense_layer_features['optimizer'] = 4
dense_layer_features['precision'] = 32

In [15]:
conv_layer_features

Unnamed: 0,batchsize,matsize,kernelsize,channels_in,channels_out,strides,padding,activation_fct,optimizer,GPU,...,SM,TC,RT,PR,TR,FP16,FP32,FP64,precision,use_bias
0,64,224,3,3,64,1,same,1,4,V100,...,80,640,0,176,441,28260,14130,7066,32,0
1,64,224,3,64,64,1,same,1,4,V100,...,80,640,0,176,441,28260,14130,7066,32,0
2,64,112,3,64,128,1,same,1,4,V100,...,80,640,0,176,441,28260,14130,7066,32,0
3,64,112,3,128,128,1,same,1,4,V100,...,80,640,0,176,441,28260,14130,7066,32,0
4,64,56,3,128,256,1,same,1,4,V100,...,80,640,0,176,441,28260,14130,7066,32,0
5,64,56,3,256,256,1,same,1,4,V100,...,80,640,0,176,441,28260,14130,7066,32,0
6,64,56,3,256,256,1,same,1,4,V100,...,80,640,0,176,441,28260,14130,7066,32,0
7,64,28,3,256,512,1,same,1,4,V100,...,80,640,0,176,441,28260,14130,7066,32,0
8,64,28,3,512,512,1,same,1,4,V100,...,80,640,0,176,441,28260,14130,7066,32,0
9,64,28,3,512,512,1,same,1,4,V100,...,80,640,0,176,441,28260,14130,7066,32,0


In [16]:
dense_layer_features

Unnamed: 0,batchsize,dim_input,dim_output,activation_fct,optimizer,GPU,Provisioning,Base Clock (MHz),Boost Clock (MHz),Memory Clock (MHz),...,ROPs,SM,TC,RT,PR,TR,FP16,FP32,FP64,precision
0,64,25088,4096,1,4,V100,Cloud,1245,1380,876,...,128,80,640,0,176,441,28260,14130,7066,32
1,64,4096,4096,1,4,V100,Cloud,1245,1380,876,...,128,80,640,0,176,441,28260,14130,7066,32
2,64,4096,10,1,4,V100,Cloud,1245,1380,876,...,128,80,640,0,176,441,28260,14130,7066,32


In [17]:
from predict.features import (
    PreprocessConvFeatures,
    PreprocessDenseFeatures,
)

conv_features = PreprocessConvFeatures(conv_layer_features, include_additional_features=True).features
dense_features = PreprocessDenseFeatures(dense_layer_features, include_additional_features=True).features

conv_features_less = PreprocessConvFeatures(conv_layer_features, include_additional_features=False).features
dense_features_less = PreprocessDenseFeatures(dense_layer_features, include_additional_features=False).features

 'Adam' 'Adam' 'Adam']' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  self.features.loc[:, 'optimizer'] = self.features['optimizer'].map({0:'None',
 'relu' 'relu' 'relu']' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  self.features.loc[:, 'activation_fct'] = self.features['activation_fct'].map({0:'None',
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.features['flops'] = (self.features['batchsize']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.features['flops/spee

In [18]:
conv_features

Unnamed: 0,Base Clock (MHz),Boost Clock (MHz),Bus_PCIe 3.0,Bus_PCIe 4.0,Cores,FP32,GPU,GPU Memory Bandwidth (GB/s),Memory (GB),Memory Bus (bit),...,optimizer_Adagrad,optimizer_Adam,optimizer_None,optimizer_RMSProp,optimizer_SGD,padding_same,padding_valid,precision,strides,use_bias
0,1245,1380,1.0,0.0,5120,14130,V100,897,16,4096,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,32,1,0
1,1245,1380,1.0,0.0,5120,14130,V100,897,16,4096,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,32,1,0
2,1245,1380,1.0,0.0,5120,14130,V100,897,16,4096,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,32,1,0
3,1245,1380,1.0,0.0,5120,14130,V100,897,16,4096,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,32,1,0
4,1245,1380,1.0,0.0,5120,14130,V100,897,16,4096,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,32,1,0
5,1245,1380,1.0,0.0,5120,14130,V100,897,16,4096,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,32,1,0
6,1245,1380,1.0,0.0,5120,14130,V100,897,16,4096,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,32,1,0
7,1245,1380,1.0,0.0,5120,14130,V100,897,16,4096,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,32,1,0
8,1245,1380,1.0,0.0,5120,14130,V100,897,16,4096,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,32,1,0
9,1245,1380,1.0,0.0,5120,14130,V100,897,16,4096,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,32,1,0


In [19]:
dense_features

Unnamed: 0,Base Clock (MHz),Boost Clock (MHz),Bus_PCIe 3.0,Bus_PCIe 4.0,Cores,FP32,GPU,GPU Memory Bandwidth (GB/s),Memory (GB),Memory Bus (bit),...,memory_out,memory_total,memory_weights,optimizer_Adadelta,optimizer_Adagrad,optimizer_Adam,optimizer_None,optimizer_RMSProp,optimizer_SGD,precision
0,1245,1380,1.0,0.0,5120,14130,V100,897,16,4096,...,262144,209256448,102760448,0.0,0.0,1.0,0.0,0.0,0.0,32
1,1245,1380,1.0,0.0,5120,14130,V100,897,16,4096,...,262144,34603008,16777216,0.0,0.0,1.0,0.0,0.0,0.0,32
2,1245,1380,1.0,0.0,5120,14130,V100,897,16,4096,...,640,607488,40960,0.0,0.0,1.0,0.0,0.0,0.0,32


In [20]:
conv_features = conv_features.drop(columns=['GPU'])
dense_features = dense_features.drop(columns=['GPU'])

conv_features_less = conv_features_less.drop(columns=['GPU'])
dense_features_less = dense_features_less.drop(columns=['GPU'])

In [21]:
predicted_conv_less = conv_wo_features_predictor.predict(conv_features_less)
predicted_dense_less = dense_wo_features_predictor.predict(dense_features_less)

predicted_conv_less = sum(predicted_conv_less * (dataset_size / batch_size)) / 1000
predicted_dense_less = sum(predicted_dense_less * (dataset_size / batch_size)) / 1000

In [22]:
predicted_conv = conv_predictor.predict(conv_features)
predicted_dense = dense_predictor.predict(dense_features)

predicted_conv = sum(predicted_conv * (dataset_size / batch_size)) / 1000
predicted_dense = sum(predicted_dense * (dataset_size / batch_size)) / 1000

In [23]:
predicted_e2e = predicted_conv + predicted_dense
predicted_e2e_less = predicted_conv_less + predicted_dense_less

In [24]:
print(f"Error in prediction with all features for VGG16: {abs(predicted_e2e - time_for_one_epoch) / time_for_one_epoch * 100:.2f}%")
print(f'Predicted time for one epoch will all features for VGG16: {predicted_e2e:.2f} seconds')
print(f'Actual time for one epoch: {time_for_one_epoch:.2f} seconds')

Error in prediction with all features for VGG16: 13.96%
Predicted time for one epoch will all features for VGG16: 7.45 seconds
Actual time for one epoch: 8.65 seconds


In [25]:
print(f"Error in prediction with raw features for VGG16: {abs(predicted_e2e_less - time_for_one_epoch) / time_for_one_epoch * 100:.2f}%")
print(f'Predicted time for one epoch with raw features for VGG16: {predicted_e2e_less:.2f} seconds')
print(f'Actual time for one epoch: {time_for_one_epoch:.2f} seconds')

Error in prediction with raw features for VGG16: 78.87%
Predicted time for one epoch with raw features for VGG16: 1.83 seconds
Actual time for one epoch: 8.65 seconds
