In [1]:
import torch

# Local imports
from train import train_model

In [2]:
import sys
print(sys.executable)
print(sys.version)

/opt/miniconda3/bin/python
3.12.2 | packaged by conda-forge | (main, Feb 16 2024, 20:50:58) [GCC 12.3.0]


In [3]:
if torch.backends.mps.is_available():
    device = torch.device("mps")
elif torch.cuda.is_available():
    device = torch.device("cuda:0")
else:
    device = torch.device("cpu")

print(f"Using device: {device}")

Using device: cuda:0


In [4]:
# In your train.ipynb cell where `args` is defined:

args = {
    "dataset": "DAVIS",  # Or "KIBA", "CD4C"
    "data_path": "../data",
    "protein_graph_dir": "../data/protein_graphs",  # <--- ADD THIS LINE (or update existing)
    "model_path": '../models/gat_cd4c_davis_graphs.pth', # Example model path
    "seed": 0,
    "device": "cuda" if torch.cuda.is_available() else "cpu",
    "batch_size": 64, # 128
    "max_epochs": 500, # 500
    "lr": 1e-4,      #  1e-4
    "weight_decay": 2e-4, # 2e-4
    "stoppage_epochs": 32, # 32
    "scheduler_patience": 10,
    "scheduler_factor": 0.5,
    "huber_beta": 0.5,
    "clip_grad_norm": 1.0,
    "use_small_dataset": False, # True for quick testing
    "frac_train": 0.8,
    "frac_validation": 0.1,
    "frac_test": 0.1,
    "max_nodes": 72, # Max nodes for padding
    "num_workers": 4, # Or based on your CPU cores
    
    # Model architecture (these should match defaults in get_parser or be set explicitly)
    "hidden_size": 256,
    "emb_size": 192, 
    "num_layers": 8,
    "num_attn_heads": 12,
    "dropout": 0.1, # 0.1
    "mlp_dropout": 0.2, #  0.2
    "pooling_dim": 192,
    "mlp_hidden": 256,
    "use_cross": True, # Or False
    # "include_3d_drug": False, # This is handled by getattr in train.py if not present
}

# Then this line converts it to a namespace
# training_args = argparse.Namespace(**args) 
# train_model(training_args, device)

In [5]:
import argparse

training_args = argparse.Namespace(**args)
train_model(training_args, device)

Model parameters: 5,195,733
DEBUG load_data: Received dataset_name = 'DAVIS' (type: <class 'str'>)
Attempting to load main interaction data from: ../data/DAVIS_dataset.csv
Loading main interaction data from cached file: ../data/DAVIS_dataset.csv
Dataset split: Train 20617, Validation 2577, Test 2578


Processing drugs: 100%|██████████| 68/68 [00:00<00:00, 628.06it/s]


INFO: Validating protein graph paths...
INFO: Checking 379 unique Target_IDs from the DataFrame.
  Processed ID 1/379: original='MYO3A', sanitized='MYO3A' -> FOUND (using sanitized): 'MYO3A.pt'
  Processed ID 2/379: original='VEGFR2', sanitized='VEGFR2' -> FOUND (using sanitized): 'VEGFR2.pt'
  Processed ID 3/379: original='TRKC', sanitized='TRKC' -> FOUND (using sanitized): 'TRKC.pt'
  Processed ID 4/379: original='IKK-alpha', sanitized='IKK-alpha' -> FOUND (using sanitized): 'IKK-alpha.pt'
  Processed ID 5/379: original='PRKCH', sanitized='PRKCH' -> FOUND (using sanitized): 'PRKCH.pt'
  Processed ID 6/379: original='LIMK2', sanitized='LIMK2' -> FOUND (using sanitized): 'LIMK2.pt'
  Processed ID 7/379: original='INSR', sanitized='INSR' -> FOUND (using sanitized): 'INSR.pt'
  Processed ID 8/379: original='YANK2', sanitized='YANK2' -> FOUND (using sanitized): 'YANK2.pt'
  Processed ID 9/379: original='PIP5K1A', sanitized='PIP5K1A' -> FOUND (using sanitized): 'PIP5K1A.pt'
  Processed ID 

Processing drugs: 100%|██████████| 68/68 [00:00<00:00, 656.43it/s]


INFO: Validating protein graph paths...
INFO: Checking 379 unique Target_IDs from the DataFrame.
  Processed ID 1/379: original='TNNI3K', sanitized='TNNI3K' -> FOUND (using sanitized): 'TNNI3K.pt'
  Processed ID 2/379: original='TLK1', sanitized='TLK1' -> FOUND (using sanitized): 'TLK1.pt'
  Processed ID 3/379: original='CASK', sanitized='CASK' -> FOUND (using sanitized): 'CASK.pt'
  Processed ID 4/379: original='DAPK1', sanitized='DAPK1' -> FOUND (using sanitized): 'DAPK1.pt'
  Processed ID 5/379: original='MAP3K15', sanitized='MAP3K15' -> FOUND (using sanitized): 'MAP3K15.pt'
  Processed ID 6/379: original='TTK', sanitized='TTK' -> FOUND (using sanitized): 'TTK.pt'
  Processed ID 7/379: original='PDPK1', sanitized='PDPK1' -> FOUND (using sanitized): 'PDPK1.pt'
  Processed ID 8/379: original='PKN1', sanitized='PKN1' -> FOUND (using sanitized): 'PKN1.pt'
  Processed ID 9/379: original='JNK1', sanitized='JNK1' -> FOUND (using sanitized): 'JNK1.pt'
  Processed ID 10/379: original='CSNK1E

Processing drugs: 100%|██████████| 68/68 [00:00<00:00, 654.45it/s]


INFO: Validating protein graph paths...
INFO: Checking 379 unique Target_IDs from the DataFrame.
  Processed ID 1/379: original='ARK5', sanitized='ARK5' -> FOUND (using sanitized): 'ARK5.pt'
  Processed ID 2/379: original='ERBB4', sanitized='ERBB4' -> FOUND (using sanitized): 'ERBB4.pt'
  Processed ID 3/379: original='MYO3A', sanitized='MYO3A' -> FOUND (using sanitized): 'MYO3A.pt'
  Processed ID 4/379: original='AXL', sanitized='AXL' -> FOUND (using sanitized): 'AXL.pt'
  Processed ID 5/379: original='S6K1', sanitized='S6K1' -> FOUND (using sanitized): 'S6K1.pt'
  Processed ID 6/379: original='LKB1', sanitized='LKB1' -> FOUND (using sanitized): 'LKB1.pt'
  Processed ID 7/379: original='MET(Y1235D)', sanitized='MET(Y1235D)' -> FOUND (using sanitized): 'MET.pt'
  Processed ID 8/379: original='RET(V804M)', sanitized='RET(V804M)' -> FOUND (using sanitized): 'RET.pt'
  Processed ID 9/379: original='SRMS', sanitized='SRMS' -> FOUND (using sanitized): 'SRMS.pt'
  Processed ID 10/379: origina

Epoch 1/500 [Train]:   1%|          | 1/162 [00:09<26:01,  9.70s/it]

Runtime error in training batch: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 67.56 MiB is free. Including non-PyTorch memory, this process has 3.74 GiB memory in use. Of the allocated memory 3.40 GiB is allocated by PyTorch, and 189.47 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:   1%|          | 2/162 [00:09<11:00,  4.13s/it]

Runtime error in training batch: CUDA out of memory. Tried to allocate 32.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 72.38 MiB is free. Including non-PyTorch memory, this process has 3.74 GiB memory in use. Of the allocated memory 3.37 GiB is allocated by PyTorch, and 217.53 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:   2%|▏         | 4/162 [00:10<04:02,  1.53s/it]

Runtime error in training batch: CUDA out of memory. Tried to allocate 2.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 48.12 MiB is free. Including non-PyTorch memory, this process has 3.85 GiB memory in use. Of the allocated memory 3.51 GiB is allocated by PyTorch, and 187.07 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.
Runtime error in training batch: CUDA out of memory. Tried to allocate 2.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 48.12 MiB is free. Including non-PyTorch memory, this process has 3.85 GiB memory in use. Of the allocated memory 3.49 GiB is allocated by PyTorch, and 208.34 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_C

Epoch 1/500 [Train]:   3%|▎         | 5/162 [00:10<02:50,  1.09s/it]

Runtime error in training batch: CUDA out of memory. Tried to allocate 2.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 48.12 MiB is free. Including non-PyTorch memory, this process has 3.85 GiB memory in use. Of the allocated memory 3.49 GiB is allocated by PyTorch, and 208.20 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:   4%|▎         | 6/162 [00:11<02:07,  1.22it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 2.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 48.31 MiB is free. Including non-PyTorch memory, this process has 3.85 GiB memory in use. Of the allocated memory 3.49 GiB is allocated by PyTorch, and 208.20 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:   4%|▍         | 7/162 [00:11<01:40,  1.54it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 2.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 48.31 MiB is free. Including non-PyTorch memory, this process has 3.85 GiB memory in use. Of the allocated memory 3.49 GiB is allocated by PyTorch, and 208.20 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:   5%|▍         | 8/162 [00:11<01:22,  1.86it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 2.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 48.31 MiB is free. Including non-PyTorch memory, this process has 3.85 GiB memory in use. Of the allocated memory 3.49 GiB is allocated by PyTorch, and 209.64 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:   6%|▌         | 9/162 [00:11<01:10,  2.16it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 2.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 48.31 MiB is free. Including non-PyTorch memory, this process has 3.85 GiB memory in use. Of the allocated memory 3.49 GiB is allocated by PyTorch, and 210.02 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:   6%|▌         | 10/162 [00:12<01:02,  2.43it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 2.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 48.44 MiB is free. Including non-PyTorch memory, this process has 3.85 GiB memory in use. Of the allocated memory 3.49 GiB is allocated by PyTorch, and 209.63 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:   7%|▋         | 11/162 [00:12<00:56,  2.66it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 2.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 48.44 MiB is free. Including non-PyTorch memory, this process has 3.85 GiB memory in use. Of the allocated memory 3.49 GiB is allocated by PyTorch, and 209.64 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:   7%|▋         | 12/162 [00:13<01:00,  2.49it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 50.56 MiB is free. Including non-PyTorch memory, this process has 3.85 GiB memory in use. Of the allocated memory 3.51 GiB is allocated by PyTorch, and 183.76 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:   8%|▊         | 13/162 [00:13<00:55,  2.68it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 50.81 MiB is free. Including non-PyTorch memory, this process has 3.85 GiB memory in use. Of the allocated memory 3.51 GiB is allocated by PyTorch, and 184.47 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:   9%|▊         | 14/162 [00:13<00:52,  2.84it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 52.81 MiB is free. Including non-PyTorch memory, this process has 3.85 GiB memory in use. Of the allocated memory 3.51 GiB is allocated by PyTorch, and 184.08 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:   9%|▉         | 15/162 [00:13<00:49,  2.96it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 52.81 MiB is free. Including non-PyTorch memory, this process has 3.85 GiB memory in use. Of the allocated memory 3.51 GiB is allocated by PyTorch, and 184.15 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:  10%|▉         | 16/162 [00:14<00:47,  3.05it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 52.81 MiB is free. Including non-PyTorch memory, this process has 3.85 GiB memory in use. Of the allocated memory 3.51 GiB is allocated by PyTorch, and 183.91 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:  10%|█         | 17/162 [00:14<00:46,  3.12it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 52.81 MiB is free. Including non-PyTorch memory, this process has 3.85 GiB memory in use. Of the allocated memory 3.51 GiB is allocated by PyTorch, and 182.89 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:  11%|█         | 18/162 [00:14<00:45,  3.17it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 63.00 MiB is free. Including non-PyTorch memory, this process has 3.85 GiB memory in use. Of the allocated memory 3.51 GiB is allocated by PyTorch, and 183.83 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:  12%|█▏        | 20/162 [00:15<00:37,  3.76it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 2.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 49.38 MiB is free. Including non-PyTorch memory, this process has 3.87 GiB memory in use. Of the allocated memory 3.40 GiB is allocated by PyTorch, and 315.22 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.
Runtime error in training batch: CUDA out of memory. Tried to allocate 2.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 49.38 MiB is free. Including non-PyTorch memory, this process has 3.87 GiB memory in use. Of the allocated memory 3.49 GiB is allocated by PyTorch, and 229.07 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_C

Epoch 1/500 [Train]:  13%|█▎        | 21/162 [00:15<00:38,  3.66it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 2.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 49.38 MiB is free. Including non-PyTorch memory, this process has 3.87 GiB memory in use. Of the allocated memory 3.49 GiB is allocated by PyTorch, and 228.20 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:  14%|█▎        | 22/162 [00:15<00:39,  3.56it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 2.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 49.38 MiB is free. Including non-PyTorch memory, this process has 3.87 GiB memory in use. Of the allocated memory 3.49 GiB is allocated by PyTorch, and 227.88 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:  14%|█▍        | 23/162 [00:16<00:39,  3.49it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 2.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 49.38 MiB is free. Including non-PyTorch memory, this process has 3.87 GiB memory in use. Of the allocated memory 3.49 GiB is allocated by PyTorch, and 229.64 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:  15%|█▍        | 24/162 [00:16<00:48,  2.85it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 32.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 57.12 MiB is free. Including non-PyTorch memory, this process has 3.87 GiB memory in use. Of the allocated memory 3.54 GiB is allocated by PyTorch, and 175.71 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:  15%|█▌        | 25/162 [00:17<00:47,  2.90it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 32.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 53.62 MiB is free. Including non-PyTorch memory, this process has 3.87 GiB memory in use. Of the allocated memory 3.54 GiB is allocated by PyTorch, and 176.03 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:  16%|█▌        | 26/162 [00:17<00:46,  2.95it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 32.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 53.62 MiB is free. Including non-PyTorch memory, this process has 3.87 GiB memory in use. Of the allocated memory 3.54 GiB is allocated by PyTorch, and 175.63 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:  17%|█▋        | 27/162 [00:17<00:45,  2.99it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 32.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 53.25 MiB is free. Including non-PyTorch memory, this process has 3.87 GiB memory in use. Of the allocated memory 3.54 GiB is allocated by PyTorch, and 177.05 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:  17%|█▋        | 28/162 [00:18<00:44,  3.02it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 32.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 51.12 MiB is free. Including non-PyTorch memory, this process has 3.87 GiB memory in use. Of the allocated memory 3.54 GiB is allocated by PyTorch, and 176.90 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:  18%|█▊        | 29/162 [00:18<00:43,  3.04it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 32.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 51.62 MiB is free. Including non-PyTorch memory, this process has 3.87 GiB memory in use. Of the allocated memory 3.54 GiB is allocated by PyTorch, and 177.05 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:  19%|█▊        | 30/162 [00:18<00:43,  3.05it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 32.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 51.62 MiB is free. Including non-PyTorch memory, this process has 3.87 GiB memory in use. Of the allocated memory 3.54 GiB is allocated by PyTorch, and 175.71 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:  19%|█▉        | 31/162 [00:18<00:42,  3.06it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 32.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 51.62 MiB is free. Including non-PyTorch memory, this process has 3.87 GiB memory in use. Of the allocated memory 3.54 GiB is allocated by PyTorch, and 176.90 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:  20%|█▉        | 32/162 [00:19<00:42,  3.06it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 32.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 51.75 MiB is free. Including non-PyTorch memory, this process has 3.87 GiB memory in use. Of the allocated memory 3.54 GiB is allocated by PyTorch, and 175.71 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:  20%|██        | 33/162 [00:19<00:41,  3.07it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 32.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 51.75 MiB is free. Including non-PyTorch memory, this process has 3.87 GiB memory in use. Of the allocated memory 3.54 GiB is allocated by PyTorch, and 175.71 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:  21%|██        | 34/162 [00:19<00:41,  3.07it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 32.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 51.75 MiB is free. Including non-PyTorch memory, this process has 3.87 GiB memory in use. Of the allocated memory 3.54 GiB is allocated by PyTorch, and 175.88 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:  22%|██▏       | 35/162 [00:20<00:41,  3.07it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 32.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 51.88 MiB is free. Including non-PyTorch memory, this process has 3.87 GiB memory in use. Of the allocated memory 3.54 GiB is allocated by PyTorch, and 177.07 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:  22%|██▏       | 36/162 [00:20<00:40,  3.08it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 32.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 51.88 MiB is free. Including non-PyTorch memory, this process has 3.87 GiB memory in use. Of the allocated memory 3.54 GiB is allocated by PyTorch, and 175.88 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:  23%|██▎       | 37/162 [00:20<00:40,  3.08it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 32.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 51.88 MiB is free. Including non-PyTorch memory, this process has 3.87 GiB memory in use. Of the allocated memory 3.54 GiB is allocated by PyTorch, and 175.88 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:  23%|██▎       | 38/162 [00:21<00:40,  3.08it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 32.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 56.94 MiB is free. Including non-PyTorch memory, this process has 3.87 GiB memory in use. Of the allocated memory 3.54 GiB is allocated by PyTorch, and 174.86 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:  24%|██▍       | 39/162 [00:21<00:39,  3.08it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 32.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 61.06 MiB is free. Including non-PyTorch memory, this process has 3.87 GiB memory in use. Of the allocated memory 3.54 GiB is allocated by PyTorch, and 175.71 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:  25%|██▍       | 40/162 [00:21<00:39,  3.08it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 32.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 67.19 MiB is free. Including non-PyTorch memory, this process has 3.87 GiB memory in use. Of the allocated memory 3.54 GiB is allocated by PyTorch, and 175.71 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:  25%|██▌       | 41/162 [00:22<00:39,  3.08it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 32.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 67.19 MiB is free. Including non-PyTorch memory, this process has 3.87 GiB memory in use. Of the allocated memory 3.54 GiB is allocated by PyTorch, and 175.71 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:  26%|██▌       | 42/162 [00:22<00:38,  3.08it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 32.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 67.19 MiB is free. Including non-PyTorch memory, this process has 3.87 GiB memory in use. Of the allocated memory 3.54 GiB is allocated by PyTorch, and 175.71 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:  27%|██▋       | 43/162 [00:22<00:38,  3.08it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 32.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 68.25 MiB is free. Including non-PyTorch memory, this process has 3.87 GiB memory in use. Of the allocated memory 3.54 GiB is allocated by PyTorch, and 175.71 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:  27%|██▋       | 44/162 [00:23<00:38,  3.08it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 32.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 68.25 MiB is free. Including non-PyTorch memory, this process has 3.87 GiB memory in use. Of the allocated memory 3.54 GiB is allocated by PyTorch, and 175.71 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:  28%|██▊       | 45/162 [00:23<00:37,  3.08it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 32.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 68.25 MiB is free. Including non-PyTorch memory, this process has 3.87 GiB memory in use. Of the allocated memory 3.54 GiB is allocated by PyTorch, and 175.71 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:  28%|██▊       | 46/162 [00:23<00:37,  3.08it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 32.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 67.19 MiB is free. Including non-PyTorch memory, this process has 3.87 GiB memory in use. Of the allocated memory 3.54 GiB is allocated by PyTorch, and 175.71 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:  29%|██▉       | 47/162 [00:24<00:37,  3.08it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 32.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 67.19 MiB is free. Including non-PyTorch memory, this process has 3.87 GiB memory in use. Of the allocated memory 3.54 GiB is allocated by PyTorch, and 175.71 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:  30%|██▉       | 48/162 [00:24<00:37,  3.08it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 32.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 67.19 MiB is free. Including non-PyTorch memory, this process has 3.87 GiB memory in use. Of the allocated memory 3.54 GiB is allocated by PyTorch, and 175.71 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:  30%|███       | 49/162 [00:24<00:36,  3.08it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 32.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 67.19 MiB is free. Including non-PyTorch memory, this process has 3.87 GiB memory in use. Of the allocated memory 3.54 GiB is allocated by PyTorch, and 175.71 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:  31%|███       | 50/162 [00:25<00:36,  3.06it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 32.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 68.25 MiB is free. Including non-PyTorch memory, this process has 3.87 GiB memory in use. Of the allocated memory 3.54 GiB is allocated by PyTorch, and 175.71 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:  31%|███▏      | 51/162 [00:25<00:36,  3.06it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 32.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 68.25 MiB is free. Including non-PyTorch memory, this process has 3.87 GiB memory in use. Of the allocated memory 3.54 GiB is allocated by PyTorch, and 175.71 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:  32%|███▏      | 52/162 [00:25<00:35,  3.06it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 32.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 68.25 MiB is free. Including non-PyTorch memory, this process has 3.87 GiB memory in use. Of the allocated memory 3.54 GiB is allocated by PyTorch, and 175.71 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:  33%|███▎      | 53/162 [00:26<00:35,  3.07it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 32.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 68.25 MiB is free. Including non-PyTorch memory, this process has 3.87 GiB memory in use. Of the allocated memory 3.54 GiB is allocated by PyTorch, and 175.71 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:  33%|███▎      | 54/162 [00:26<00:35,  3.06it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 32.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 68.25 MiB is free. Including non-PyTorch memory, this process has 3.87 GiB memory in use. Of the allocated memory 3.54 GiB is allocated by PyTorch, and 175.71 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:  34%|███▍      | 55/162 [00:26<00:34,  3.06it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 32.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 68.25 MiB is free. Including non-PyTorch memory, this process has 3.87 GiB memory in use. Of the allocated memory 3.54 GiB is allocated by PyTorch, and 175.71 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:  35%|███▍      | 56/162 [00:27<00:34,  3.06it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 32.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 66.12 MiB is free. Including non-PyTorch memory, this process has 3.87 GiB memory in use. Of the allocated memory 3.54 GiB is allocated by PyTorch, and 175.71 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:  35%|███▌      | 57/162 [00:27<00:34,  3.05it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 32.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 66.12 MiB is free. Including non-PyTorch memory, this process has 3.87 GiB memory in use. Of the allocated memory 3.54 GiB is allocated by PyTorch, and 175.71 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:  36%|███▌      | 58/162 [00:27<00:34,  3.05it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 32.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 66.12 MiB is free. Including non-PyTorch memory, this process has 3.87 GiB memory in use. Of the allocated memory 3.54 GiB is allocated by PyTorch, and 175.71 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:  36%|███▋      | 59/162 [00:28<00:33,  3.06it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 32.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 66.12 MiB is free. Including non-PyTorch memory, this process has 3.87 GiB memory in use. Of the allocated memory 3.54 GiB is allocated by PyTorch, and 175.71 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:  37%|███▋      | 60/162 [00:28<00:33,  3.06it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 32.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 66.12 MiB is free. Including non-PyTorch memory, this process has 3.87 GiB memory in use. Of the allocated memory 3.54 GiB is allocated by PyTorch, and 175.71 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:  38%|███▊      | 61/162 [00:28<00:33,  3.06it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 32.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 63.50 MiB is free. Including non-PyTorch memory, this process has 3.87 GiB memory in use. Of the allocated memory 3.54 GiB is allocated by PyTorch, and 175.71 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.


Epoch 1/500 [Train]:  38%|███▊      | 62/162 [00:29<00:46,  2.13it/s]

Runtime error in training batch: CUDA out of memory. Tried to allocate 32.00 MiB. GPU 0 has a total capacity of 7.66 GiB of which 64.38 MiB is free. Including non-PyTorch memory, this process has 3.87 GiB memory in use. Of the allocated memory 3.54 GiB is allocated by PyTorch, and 175.71 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables). Skipping batch.





KeyboardInterrupt: 

In [None]:
import pandas as pd

from utils.helper_functions import plot_loss_curves

csv_path = f"{training_args.model_path}.csv"
metrics = pd.read_csv(csv_path) 

plot_loss_curves(metrics)

KeyError: 'train_acc'