In [None]:
import time
while True:
    a = torch.zeros([10, 10, 10], dtype = torch.float32, device = 'cuda:3', requires_grad = False)
    b = a * a
    print(b[0, 0, 0])
    time.sleep(300)

In [2]:
import defdevice
defdevice.force_device('cuda:3')
import metrics
import nns
import immaskdataset
import importlib
import imcapdataset
import embdataset
import embedders
import losses
import imageops
import misc
import segmodel

RuntimeError: No CUDA GPUs are available

In [11]:
image_ds = embdataset.RAMLoadDataset('COCOM2F_ALL_d8', load_amount = 1000000, norm = True)
text_ds = embdataset.RAMLoadDataset('COCOCLIP_ALL', load_amount = 1000000)

Estimated necessary RAM for loading - 294912000000 bytes
Estimated necessary RAM for loading - 4096000000 bytes


In [12]:
model = nns.Linear(text_ds.embedding_shape, image_ds.embedding_shape)

# Train

In [13]:
def_device = 'cuda:3'

image_ds.reset()
text_ds.reset()

import torch
import torch.nn.functional

batch_count = 10000
batch_size = 128
optim = torch.optim.Adam(model.parameters, lr = 0.003)

image_noise = 0.07 / 10
text_noise = 0.15

for batch_idx in range(batch_count):
    ibatch = image_ds.get_batch(batch_size).to(def_device)
    tbatch = text_ds.get_batch(batch_size).to(def_device)
    
    ibatch = ibatch + torch.rand_like(ibatch) * image_noise
    tbatch = tbatch + torch.rand_like(tbatch) * text_noise
    
    tbatch = model.forward(tbatch)
    
    neg_ibatch, neg_tbatch = losses.sample_negatives(ibatch, tbatch, batch_size)
    
    prods = losses.get_products(ibatch, tbatch)
    probs = torch.sigmoid(prods)
    
    neg_prods = losses.get_products(neg_ibatch, neg_tbatch)
    neg_probs = torch.sigmoid(neg_prods)
    
    loss_positive = losses.max_pressure_loss(probs, 1.0)
    loss_negative = losses.max_pressure_loss(neg_probs, 0.0)
    
    loss = (loss_positive + loss_negative) / 2
    
    loss.backward()
    optim.step()
    optim.zero_grad()
    
    if batch_idx % 1000 == 0:
        print(batch_idx, loss_positive, loss_negative)

0 tensor(0.2383, grad_fn=<MeanBackward0>) tensor(0.2631, grad_fn=<MeanBackward0>)
1000 tensor(0.1494, grad_fn=<MeanBackward0>) tensor(0.1140, grad_fn=<MeanBackward0>)
2000 tensor(0.1555, grad_fn=<MeanBackward0>) tensor(0.1576, grad_fn=<MeanBackward0>)
3000 tensor(0.0878, grad_fn=<MeanBackward0>) tensor(0.1581, grad_fn=<MeanBackward0>)
4000 tensor(0.1126, grad_fn=<MeanBackward0>) tensor(0.1514, grad_fn=<MeanBackward0>)
5000 tensor(0.0994, grad_fn=<MeanBackward0>) tensor(0.1270, grad_fn=<MeanBackward0>)
6000 tensor(0.0919, grad_fn=<MeanBackward0>) tensor(0.1184, grad_fn=<MeanBackward0>)
7000 tensor(0.0788, grad_fn=<MeanBackward0>) tensor(0.1317, grad_fn=<MeanBackward0>)
8000 tensor(0.1008, grad_fn=<MeanBackward0>) tensor(0.1319, grad_fn=<MeanBackward0>)
9000 tensor(0.0969, grad_fn=<MeanBackward0>) tensor(0.1082, grad_fn=<MeanBackward0>)


# Fine Tune

## Get Embeddings

In [16]:
image_embr = embedders.M2FImageEmbedder()
text_embr = embedders.CLIPTextEmbedder()

ds = immaskdataset.RUGDDataset()

test_embeddings = []
test_masks = []
test_count = 1000

train_embeddings = []
train_masks = []
train_count = 1000

labels = ds.labels
label_emb = text_embr.forward(labels)

for i in range(test_count):
    image, masks = ds.get_next()
    test_embeddings.append(image_embr.forward(image))
    test_masks.append(torch.tensor(masks, requires_grad = False, device = defdevice.def_device))
    if i % 100 == 0:
        print(i)

for i in range(train_count):
    image, masks = ds.get_next()
    train_embeddings.append(image_embr.forward(image))
    train_masks.append(torch.tensor(masks, requires_grad = False, device = defdevice.def_device))
    if i % 100 == 0:
        print(i)
    
test_embeddings = torch.tensor(test_embeddings, requires_grad = False, device = defdevice.def_device)
train_embeddings = torch.tensor(train_embeddings, requires_grad = False, device = defdevice.def_device)

0
100
200
300
400
500
600
700
800
900
0
100
200
300
400
500
600
700
800
900


KeyboardInterrupt: 

In [30]:
import torch

def check_cuda_memory():
    for i in range(torch.cuda.device_count()):
        device = torch.device(f"cuda:{i}")
        torch.cuda.set_device(device)
        
        # Get the current memory allocation and cached memory
        allocated = torch.cuda.memory_allocated()
        cached = torch.cuda.memory_reserved()
        
        # Get the total GPU memory
        total_memory = torch.cuda.get_device_properties(device).total_memory
        
        # Calculate free memory
        free_memory = total_memory - (allocated + cached)
        
        print(f"Device: {device}")
        print(f"Total Memory: {total_memory / 1024 ** 2:.2f} MB")
        print(f"Allocated Memory: {allocated / 1024 ** 2:.2f} MB")
        print(f"Cached Memory: {cached / 1024 ** 2:.2f} MB")
        print(f"Free Memory: {free_memory / 1024 ** 2:.2f} MB")
        print("-" * 50)

if __name__ == "__main__":
    check_cuda_memory()


Device: cuda:0
Total Memory: 81050.62 MB
Allocated Memory: 0.00 MB
Cached Memory: 0.00 MB
Free Memory: 81050.62 MB
--------------------------------------------------
Device: cuda:1
Total Memory: 81050.62 MB
Allocated Memory: 0.00 MB
Cached Memory: 0.00 MB
Free Memory: 81050.62 MB
--------------------------------------------------
Device: cuda:2
Total Memory: 81050.62 MB
Allocated Memory: 0.00 MB
Cached Memory: 0.00 MB
Free Memory: 81050.62 MB
--------------------------------------------------
Device: cuda:3
Total Memory: 81050.62 MB
Allocated Memory: 38334.69 MB
Cached Memory: 51608.00 MB
Free Memory: -8892.06 MB
--------------------------------------------------
Device: cuda:4
Total Memory: 81050.62 MB
Allocated Memory: 0.00 MB
Cached Memory: 0.00 MB
Free Memory: 81050.62 MB
--------------------------------------------------
Device: cuda:5
Total Memory: 81050.62 MB
Allocated Memory: 0.00 MB
Cached Memory: 0.00 MB
Free Memory: 81050.62 MB
-----------------------------------------------

In [19]:
test_embeddings[0].shape

torch.Size([256, 96, 96])

In [24]:
test_embeddings = torch.tensor(test_embeddings, requires_grad = False, device = defdevice.def_device)
train_embeddings = torch.tensor(train_embeddings, requires_grad = False, device = defdevice.def_device)

  test_embeddings = torch.tensor(test_embeddings, requires_grad = False, device = defdevice.def_device)


RuntimeError: CUDA out of memory. Tried to allocate 8.79 GiB (GPU 3; 79.15 GiB total capacity; 37.44 GiB already allocated; 5.58 GiB free; 50.40 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

## Train on embeddings

In [None]:
optim = torch.optim.Adam(model.parameters, lr = 0.003)

epoch_count = 0

for j in range(epoch_count)
    for i in range(train_count)
        trans_embs = model.forward(label_emb)

        prods = torch.einsum('eyx,ne->nyx', train_embeddings[i], trans_emb)
        probs = torch.sigmoid(prods, dim = 0)

        batch_size, y, x = masks.shape

        probs_interpolated = F.interpolate(probs.unsqueeze(1), size=(y, x), mode='bilinear', align_corners=False)

        probs_interpolated = probs_interpolated.squeeze(1)
        
        loss = ((probs_interpolated - train_masks[i]) ** 2).mean()
        
        loss.backward()
        optim.step()
        optim.zero_grad()
        
        if i % 100 == 0:
            print(loss)

# Test

In [None]:
import 