In [2]:
import numpy as np
import math
import torch
from preprocessing import *
from model import *
from model_deep import PerfectFeatureModel
from types import SimpleNamespace
import time


In [3]:
if torch.cuda.is_available():
    num_devices = torch.cuda.device_count()
    for i in range(num_devices):
        device = torch.cuda.device(i)
        total_mem = torch.cuda.get_device_properties(i).total_memory / 1024**3  # Convert to GB
        allocated_mem = torch.cuda.memory_allocated(i) / 1024**3  # Convert to GB
        free_mem = total_mem - allocated_mem
        
        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
        print(f"Total Memory: {total_mem:.1f}GB")
        print(f"Allocated Memory: {allocated_mem:.1f}GB")
        print(f"Free Memory: {free_mem:.1f}GB")
        
        if free_mem < 8:
            print(f"Warning: GPU {i} has less than 8GB of free VRAM!")
        else:
            print(f"Using GPU {i} with {free_mem:.1f}GB free VRAM")
            break 
    device = torch.device(f"cuda:{0}")
else:
    print("Warning: No CUDA devices available - running on CPU only")
    device = torch.device("cpu")

GPU 0: NVIDIA RTX A6000
Total Memory: 44.6GB
Allocated Memory: 0.0GB
Free Memory: 44.6GB
Using GPU 0 with 44.6GB free VRAM


In [4]:
def anti_vec(vec):
    N = math.ceil(math.sqrt(vec.shape[-1] * 2))
    adj = torch.zeros((vec.shape[0], N, N), dtype=vec.dtype)
    row_idx, col_idx = torch.triu_indices(N, N, offset=1)
    row_idx = (N - 1 - row_idx).flip(dims=[0])
    col_idx = (N - 1 - col_idx).flip(dims=[0])
    adj[:, row_idx, col_idx] = vec
    adj[:, col_idx, row_idx] = vec
    return adj

In [5]:
training_data = np.loadtxt('../lr_train.csv', delimiter=',', skiprows=1)
training_data_adj = anti_vec(torch.from_numpy(training_data))

training_label = np.loadtxt('../hr_train.csv', delimiter=',', skiprows=1)
training_label_adj = anti_vec(torch.from_numpy(training_label))

In [None]:
n = math.ceil(math.sqrt(training_data.shape[-1] * 2))
n_prime =  math.ceil(math.sqrt(training_label.shape[-1] * 2))

args = {
    "lr_dim": 160,
    "hr_dim": 268,
    "hidden_dim": 268,
    "lr": 0.005,
    "epochs": 100,
    "padding": 26,
    "device": device,
    "batch_size": len(training_data) // 10
}
args = SimpleNamespace(**args)
model = PerfectFeatureModel(160, 268).to(device)
state_dict = torch.load(f"./model_{0}/model.pth")
model.load_state_dict(state_dict)

<All keys matched successfully>

In [None]:
total_time = 0.0
max_memory_use = 0.0
for x, y in zip(training_data_adj, training_label_adj):
    torch.cuda.reset_peak_memory_stats()
    initial_memory = torch.cuda.memory_allocated()
    x = torch.tensor(x, dtype=torch.float32).to(device)
    start_time = time.time()
    pred, _ = model(x)
    total_time += time.time() - start_time
    max_memory_use = max(max_memory_use, torch.cuda.max_memory_allocated() - initial_memory)
total_time /= len(training_data_adj)
print(f"Max Memory Allocated: {max_memory_use / 1024**2:.2f} MB")
print(f"Average Time per Sample: {total_time} s")

  x = torch.tensor(x, dtype=torch.float32).to(device)


Max Memory Allocated: 2116.49 MB
Average Time per Sample: 0.004427226003772484 s


In [15]:
model = model.to('cpu')
total_time = 0.0
for x, y in zip(training_data_adj, training_label_adj):
    x = torch.tensor(x, dtype=torch.float32)
    start_time = time.time()
    pred, _ = model(x)
    total_time += time.time() - start_time
total_time /= len(training_data_adj)
print(f"Average Time per Sample: {total_time} s")

  x = torch.tensor(x, dtype=torch.float32)


Average Time per Sample: 0.6095667627757181 s
