In [1]:
from torch_geometric.nn import GRetriever
from torch_geometric.datasets import UpdatedWebQSPDataset
from torch_geometric.profile.profiler import Profiler
from torch_geometric.profile import profileit, timeit, nvtxit
from g_retriever import train, get_loss, inference_step
import time
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
ds = UpdatedWebQSPDataset(root="profiled_ds")

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = GRetriever(
    llm_to_use="TinyLlama/TinyLlama-1.1B-Chat-v0.1",
    num_llm_params=1,  # 1 Billion
    gnn_hidden_channels=1024,
    num_gnn_layers=4,
    mlp_out_dim=2048,
).to(device)

Loading TinyLlama/TinyLlama-1.1B-Chat-v0.1
Setting up TinyLlama/TinyLlama-1.1B-Chat-v0.1 w/ kwargs = {'revision': 'main', 'max_memory': {0: '22GiB'}, 'low_cpu_mem_usage': True, 'device_map': 'auto', 'torch_dtype': torch.bfloat16}


In [4]:
@nvtxit("test_inference_wrapper")
def inference_wrapper(*args, **kwargs):
    return inference_step(*args, **kwargs)

In [5]:
start = time.time()
result = train(since=start, num_epochs=1,dataset=ds, hidden_channels=1024, num_gnn_layers=4, batch_size=8, eval_batch_size=16, lr=1e-5, loss_fn=get_loss, inference_fn=inference_wrapper, model=model)

AttributeError: 'function' object has no attribute 'named_parameters'

In [4]:
@profileit("cuda")
def profile_wrapper(model: torch.nn.Module, device_tensor: torch.Tensor, **kwargs):
    kwargs['model'] = model
    return train(**kwargs)

In [5]:
start = time.time()
result = profile_wrapper(model, torch.Tensor().to(torch.device('cuda')), since=start, num_epochs=1,dataset=ds, hidden_channels=1024, num_gnn_layers=4, batch_size=8, eval_batch_size=16, lr=1e-5, loss_fn=get_loss, inference_fn=inference_step)

Total Prep Time (prep_time) = 1.93
Training beginning...


Epoch: 1|1: 100%|██████████| 353/353 [03:09<00:00,  1.87it/s]


Epoch: 1|1,Train Loss (Epoch Mean): 1.6746924656308744




Epoch: 1|1, Val Loss: 1.5635125488042831
Final Evaluation...


100%|██████████| 102/102 [02:23<00:00,  1.42s/it]

Label: actor|businessperson|rapper|film director|musician|singer|songwriter|guitarist
Pred: ['[/S]  [/A]  [/R]  [/P]  [/T]  [/F]  [']
Exception: unterminated character set at position 36
------------------
Hit: 0.3239
Precision: 0.2031
Recall: 0.2043
F1: 0.1680
Test Acc 0.3239090350338045
Saving Model...


100%|██████████| 102/102 [02:29<00:00,  1.46s/it]

Saving eval output for downstream demo...
Done!





In [7]:
result[-1]

CUDAStats(time=347.8926875, max_allocated_gpu=19610.99, max_reserved_gpu=23148.0, max_active_gpu=19610.99, nvidia_smi_free_cuda=7884.72, nvidia_smi_used_cuda=17497.37)

In [1]:
with Profiler(model=model.graph_encoder, use_cuda=True, profile_memory=True, max_depth=1) as prof:
    start = time.time()
    prep_time, dataset, gnn_llm_eval_outs = train(since=start, num_epochs=1, hidden_channels=1024, num_gnn_layers=4, batch_size=8, eval_batch_size=16, lr=1e-5, loss_fn=get_loss, inference_fn=inference_step, dataset=ds, model=model)

NameError: name 'Profiler' is not defined

In [5]:
trace = prof.get_trace()

In [6]:
trace

('Module                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         | Self CPU total | CPU total | Number of Calls\n-----------------------------------------------------------------------

In [7]:
with open('trace.txt', 'w') as f:
    f.write(trace[0])

In [8]:
import pandas as pd

In [9]:
len(trace[1])

10

In [10]:
trace[1]

['Module',
 'Self CPU total',
 'CPU total',
 'Self CUDA total',
 'CUDA total',
 'Self CPU Mem',
 'CPU Mem',
 'Self CUDA Mem',
 'CUDA Mem',
 'Number of Calls']

In [11]:
df = pd.DataFrame(trace[4], columns=trace[1][1:], index=trace[3])

In [12]:
df

Unnamed: 0,Self CPU total,CPU total,Self CUDA total,CUDA total,Self CPU Mem,CPU Mem,Self CUDA Mem,CUDA Mem,Number of Calls
-dropout--aten::dropout,142.000us,142.000us,0.000us,0.000us,0 b,0 b,0 b,0 b,1413
-dropout--cudaDeviceSynchronize,5.410ms,5.410ms,0.000us,0.000us,0 b,0 b,0 b,0 b,1413
-act--aten::relu,252.233ms,476.470ms,0.000us,1.413ms,0 b,0 b,0 b,438.95 Mb,1413
-act--aten::clamp_min,23.247ms,224.237ms,1.413ms,1.413ms,0 b,0 b,438.95 Mb,438.95 Mb,1413
-act--cudaLaunchKernel,200.990ms,200.990ms,0.000us,0.000us,0 b,0 b,0 b,0 b,1413
...,...,...,...,...,...,...,...,...,...
-norms-- -cudaDeviceSynchronize,1.042ms,1.042ms,0.000us,0.000us,0 b,0 b,0 b,0 b,471
-norms-- -aten::copy_,777.000us,16.076ms,118.000us,118.000us,0 b,0 b,0 b,0 b,118
-norms-- -cudaMemcpyAsync,15.299ms,15.299ms,0.000us,0.000us,0 b,0 b,0 b,0 b,118
-norms-- -Memcpy DtoD (Device -> Device),0.000us,0.000us,118.000us,118.000us,0 b,0 b,0 b,0 b,118


In [19]:
for c in model.named_children():
    print(c)

('llm_to_use', LLM(
  (llm): LlamaForCausalLM(
    (model): LlamaModel(
      (embed_tokens): Embedding(32001, 2048)
      (layers): ModuleList(
        (0-21): 22 x LlamaDecoderLayer(
          (self_attn): LlamaSdpaAttention(
            (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
            (k_proj): Linear(in_features=2048, out_features=256, bias=False)
            (v_proj): Linear(in_features=2048, out_features=256, bias=False)
            (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
            (rotary_emb): LlamaRotaryEmbedding()
          )
          (mlp): LlamaMLP(
            (gate_proj): Linear(in_features=2048, out_features=5632, bias=False)
            (up_proj): Linear(in_features=2048, out_features=5632, bias=False)
            (down_proj): Linear(in_features=5632, out_features=2048, bias=False)
            (act_fn): SiLU()
          )
          (input_layernorm): LlamaRMSNorm()
          (post_attention_layernorm): LlamaRMSNo