In [1]:
from torch_geometric.nn import GRetriever
from torch_geometric.datasets import UpdatedWebQSPDataset
from torch_geometric.profile.profiler import Profiler
from g_retriever import train, get_loss, inference_step
import time
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
ds = UpdatedWebQSPDataset(root="profiled_ds")

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = GRetriever(
    llm_to_use="TinyLlama/TinyLlama-1.1B-Chat-v0.1",
    num_llm_params=1,  # 1 Billion
    gnn_hidden_channels=1024,
    num_gnn_layers=4,
    mlp_out_dim=2048,
).to(device)

Loading TinyLlama/TinyLlama-1.1B-Chat-v0.1
Setting up TinyLlama/TinyLlama-1.1B-Chat-v0.1 w/ kwargs = {'revision': 'main', 'max_memory': {0: '22GiB'}, 'low_cpu_mem_usage': True, 'device_map': 'auto', 'torch_dtype': torch.bfloat16}


In [4]:
with Profiler(model=model, use_cuda=True) as prof:
    start = time.time()
    prep_time, dataset, gnn_llm_eval_outs = train(since=start, num_epochs=1, hidden_channels=1024, num_gnn_layers=4, batch_size=8, eval_batch_size=16, lr=1e-5, loss_fn=get_loss, inference_fn=inference_step, dataset=ds, model=model)

Total Prep Time (prep_time) = 0.3
Training beginning...


Epoch: 1|1:   0%|          | 0/353 [00:00<?, ?it/s]STAGE:2024-06-17 19:37:29 183206:183206 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2024-06-17 19:37:29 183206:183206 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2024-06-17 19:37:29 183206:183206 ActivityProfilerController.cpp:324] Completed Stage: Post Processing
STAGE:2024-06-17 19:37:29 183206:183206 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2024-06-17 19:37:29 183206:183206 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2024-06-17 19:37:29 183206:183206 ActivityProfilerController.cpp:324] Completed Stage: Post Processing
STAGE:2024-06-17 19:37:29 183206:183206 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2024-06-17 19:37:29 183206:183206 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2024-06-17 19:37:29 183206:183206 ActivityProfilerController.cpp:324] Completed Stage: Post Processing
STAGE:2

Epoch: 1|1,Train Loss (Epoch Mean): 1.6556277872820413


STAGE:2024-06-17 19:39:59 183206:183206 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2024-06-17 19:39:59 183206:183206 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2024-06-17 19:39:59 183206:183206 ActivityProfilerController.cpp:324] Completed Stage: Post Processing
STAGE:2024-06-17 19:39:59 183206:183206 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2024-06-17 19:39:59 183206:183206 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2024-06-17 19:39:59 183206:183206 ActivityProfilerController.cpp:324] Completed Stage: Post Processing
STAGE:2024-06-17 19:39:59 183206:183206 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2024-06-17 19:39:59 183206:183206 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2024-06-17 19:39:59 183206:183206 ActivityProfilerController.cpp:324] Completed Stage: Post Processing
STAGE:2024-06-17 19:39:59 183206:183206 ActivityProfilerCo

Epoch: 1|1, Val Loss: 1.5606553107500076
Final Evaluation...


  0%|          | 0/102 [00:00<?, ?it/s]STAGE:2024-06-17 19:40:03 183206:183206 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2024-06-17 19:40:03 183206:183206 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2024-06-17 19:40:03 183206:183206 ActivityProfilerController.cpp:324] Completed Stage: Post Processing
STAGE:2024-06-17 19:40:03 183206:183206 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2024-06-17 19:40:03 183206:183206 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2024-06-17 19:40:03 183206:183206 ActivityProfilerController.cpp:324] Completed Stage: Post Processing
STAGE:2024-06-17 19:40:03 183206:183206 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2024-06-17 19:40:03 183206:183206 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2024-06-17 19:40:03 183206:183206 ActivityProfilerController.cpp:324] Completed Stage: Post Processing
STAGE:2024-06-17 19

Hit: 0.3286
Precision: 0.2192
Recall: 0.2013
F1: 0.1746
Test Acc 0.3286240786240786
Saving Model...


100%|██████████| 102/102 [01:43<00:00,  1.02s/it]

Saving eval output for downstream demo...
Done!





In [5]:
trace = prof.get_trace()

In [6]:
trace

('Module                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         | Self CPU total | CPU total | Number of Calls\n-----------------------------------------------------------------------

In [8]:
with open('trace.txt', 'w') as f:
    f.write(trace[0])

In [14]:
import pandas as pd

In [19]:
len(trace[1])

10

In [20]:
trace[1]

['Module',
 'Self CPU total',
 'CPU total',
 'Self CUDA total',
 'CUDA total',
 'Self CPU Mem',
 'CPU Mem',
 'Self CUDA Mem',
 'CUDA Mem',
 'Number of Calls']

In [21]:
df = pd.DataFrame(trace[4], columns=trace[1][1:], index=trace[3])

In [22]:
df

Unnamed: 0,Self CPU total,CPU total,Self CUDA total,CUDA total,Self CPU Mem,CPU Mem,Self CUDA Mem,CUDA Mem,Number of Calls
-dropout--aten::dropout,136.000us,136.000us,0.000us,0.000us,0 b,0 b,0 b,0 b,1413
-dropout--cudaDeviceSynchronize,4.750ms,4.750ms,0.000us,0.000us,0 b,0 b,0 b,0 b,1413
-act--aten::relu,280.710ms,520.573ms,0.000us,1.413ms,0 b,0 b,0 b,0 b,1413
-act--aten::clamp_min,22.064ms,239.863ms,1.413ms,1.413ms,0 b,0 b,0 b,0 b,1413
-act--cudaLaunchKernel,217.799ms,217.799ms,0.000us,0.000us,0 b,0 b,0 b,0 b,1413
...,...,...,...,...,...,...,...,...,...
-norms-- -cudaDeviceSynchronize,1.048ms,1.048ms,0.000us,0.000us,0 b,0 b,0 b,0 b,471
-norms-- -aten::copy_,745.000us,17.173ms,118.000us,118.000us,0 b,0 b,0 b,0 b,118
-norms-- -cudaMemcpyAsync,16.428ms,16.428ms,0.000us,0.000us,0 b,0 b,0 b,0 b,118
-norms-- -Memcpy DtoD (Device -> Device),0.000us,0.000us,118.000us,118.000us,0 b,0 b,0 b,0 b,118
