In [1]:
import torch
import pickle
import os
import torch.nn as nn
import torchvision.transforms.functional as F
from torch.utils.data import DataLoader
from torchvision.datasets import CocoDetection
from torchvision import transforms
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import Dataset
import matplotlib.pyplot as plt
import torch.optim as optim
from torchvision.transforms import ToTensor, Resize, Compose, Normalize, RandomHorizontalFlip, RandomVerticalFlip, RandomRotation, RandomResizedCrop, ColorJitter
import os, argparse, time, subprocess, io, shlex, pickle, pprint
import pandas as pd
import numpy as np
import util.misc as utils
import tqdm
from itertools import islice

In [2]:
torch.cuda.is_available()

True

In [3]:
torch.cuda.device_count()

1

In [4]:
torch.cuda.get_device_name()

'NVIDIA GeForce RTX 3060 Ti'

In [6]:
if not torch.cuda.is_available():
    raise SystemError('GPU device not found')
device = torch.device("cuda:0")

## Dataset

In [7]:
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
class CocoDatasetLoader:
    def __init__(self, coco_root_train, coco_root_val, transform=None, batch_size=128, num_workers=0):
        self.transform = transform
        self.batch_size = batch_size
        self.num_workers = num_workers
        self.coco_root_train = os.path.join(coco_root_train)
        self.coco_root_val = os.path.join(coco_root_val)

    def create_coco_dataset(self, root, ann_file):
        return CocoDetection(root=root, annFile=ann_file, transform=self.transform)
    
    @staticmethod
    def target_to_tensor(target):
        # Extract bounding boxes and labels
        boxes = [obj['bbox'] for obj in target]
        labels = [obj['category_id'] for obj in target]
        image_ids = [obj['image_id'] for obj in target]
        ids = [obj['id'] for obj in target]

        # Pad boxes and labels to a fixed length
        max_len = 100  # Replace with actual maximum length
        while len(boxes) < max_len:
            boxes.append([0, 0, 0, 0])
            labels.append(0)
            image_ids.append(0)
            ids.append(0)

        # Convert to tensors
        boxes = torch.tensor(boxes, dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.int64)
        image_ids = torch.tensor(image_ids, dtype=torch.int64)
        ids = torch.tensor(ids, dtype=torch.int64)

        # Concatenate boxes and labels
        data = torch.cat((boxes, labels.unsqueeze(1), image_ids.unsqueeze(1), ids.unsqueeze(1)), dim=-1)

        return data
     
    @staticmethod           
    def collate_fn(batch):
        images = [item[0] for item in batch]
        targets = [item[1] for item in batch]

        # Ensure images are tensors
        images = [torch.from_numpy(img) if not isinstance(img, torch.Tensor) else img for img in images]

        # Stack images together
        images = torch.stack(images, dim=0)

        # Convert targets to tensors
        targets = [CocoDatasetLoader.target_to_tensor(t) for t in targets]

        return images, targets
    
    
    def get_data_loaders(self):
        
        coco_dataset_train = self.create_coco_dataset(root=self.coco_root_train, ann_file="./annotations/instances_train2017.json")
        coco_dataset_val = self.create_coco_dataset(root=self.coco_root_val, ann_file="./annotations/instances_val2017.json")

        dataloader_train = DataLoader(coco_dataset_train, batch_size=self.batch_size, shuffle=True, num_workers=self.num_workers, collate_fn=self.collate_fn)
        dataloader_val = DataLoader(coco_dataset_val, batch_size=self.batch_size, shuffle=True, num_workers=self.num_workers, collate_fn=self.collate_fn)
        
        return dataloader_train, dataloader_val

coco_loader =CocoDatasetLoader(coco_root_train="./coco_dataset_test/train2017", #print the real image and there normalize image and the
                                coco_root_val="./coco_dataset_test/val2017",
                           
                                transform=transforms.Compose([
                                    RandomHorizontalFlip(),
                                    transforms.Compose([
                                    Resize([256,256]),
                                    RandomResizedCrop(265, scale=(0.2, 1.0)),
                                    ]),
                                    ToTensor(),
                                    Normalize(mean=[0.485, 0.456, 0.406],
                                                std=[0.229, 0.224, 0.225])
                                    
                                ])
)


dataloader_train, dataloader_val = coco_loader.get_data_loaders()


print(f"dataloader_train",len(dataloader_train))
print(f"dataloader_val",len(dataloader_val))
print(f"dataloader_shape",dataloader_train.dataset[0][0].shape)
#print(f"dataloader_shape",dataloader_train.dataset[0][1].shape)

for batch in dataloader_train:
    images, targets = batch
    targets =[ target.to(device) for target in targets]
    images = list(img.to(device) for img in images)
    
    #print("targets",targets)
     
  
    #print(F"targets=", targets)
    #print(F"images=", images)
    
    #for i, image in enumerate(images):
        #print(f"Shape of image {i}: {image.shape}")
        
    #images_tensor = torch.stack(images)
    #print(images_tensor.shape)
    
     # Get color images (assuming images are in RGB format)
    #color_images = [F.to_pil_image(image.cpu()) for image in images]

    # Display color images
    #for i, color_image in enumerate(color_images):
        #plt.imshow(color_image)
        #plt.show()

    # Access tensors (boxes, labels, image_ids, ids) for further processing
    for target in targets:
        #Access target data, e.g., boxes, labels, etc.
        boxes = target[:, :4]
        labels = target[:, 4]
        image_ids = target[:, 5]
        ids = target[:, 6]
        #print(boxes, labels, image_ids, ids)
        
        torch.cuda.empty_cache()
       

   

loading annotations into memory...
Done (t=18.23s)
creating index...
index created!
loading annotations into memory...
Done (t=1.49s)
creating index...
index created!
dataloader_train 925
dataloader_val 40
dataloader_shape torch.Size([3, 265, 265])


## Backbone Model

In [8]:
if not torch.cuda.is_available():
    raise SystemError('GPU device not found')
device = torch.device("cuda:0")

In [9]:
#device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class SimpleCNN(nn.Module):
    def __init__(self, num_boxes):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1)
        self.relu = nn.ReLU(inplace=True)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        
        
        self.fc1 = nn.Linear(557568, 256)  # Adjust input size based on image dimensions for image classification
        self.fc2 = nn.Linear(256, num_boxes * 7)  # Each bounding box has 4 coordinates + num_classes for bounding box classification

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
       
        x = x.view(x.size(0), -1) # Flatten the tensor
        print("shape of x",x.shape)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        x = x.view(x.size(0), -1, 7) # Reshape to (batch_size, num_boxes, 7)
        return x
# Define the transform to be applied to the images
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((256, 256)),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
    # Add more transformations as needed
])

max_num_boxes=0
# Calculate the maximum number of bounding boxes dynamically
for images, targets in dataloader_train:
    max_num_boxes = max(max_num_boxes, max(target.shape[0] for target in targets))


# Initialize the model and other components
model = SimpleCNN( num_boxes=max_num_boxes)  # Adjust num_classes according to your dataset
#device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
if not torch.cuda.is_available():
    raise SystemError('GPU device not found')
device = torch.device("cuda:0")
model.to(device)

# Set the loss function and optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model for a few epochs (this is a simplified training loop)
num_epochs = 10
for epoch in range(num_epochs):
    total_samples = 0
    correct_samples = 0
    for images, targets in dataloader_train:
        images = images.to(device)
        
        # Assuming targets is a list of your tensors
        max_size = max(t.size(0) for t in targets)
        targets = [F.pad(t, (0, 0, 0, max_size - t.size(0))) for t in targets]
        targets = torch.stack(targets).to(device)
        
        # Re-initialize the optimizer
        optimizer = optim.Adam(model.parameters(), lr=0.001)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, targets)
        
        # Your logic for calculating accuracy may vary based on the structure of your targets
        predictions = torch.sigmoid(outputs) > 0.5
        correct_samples += (predictions == targets).sum().item()
        total_samples += targets.numel()

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    accuracy = 100.0 * correct_samples / total_samples
    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item()}, Accuracy: {accuracy:.3f}%")

# Feature extraction for all images in dataloader_train
with torch.no_grad():
    all_extracted_features = []

    for batch in dataloader_train:
        images, _ = batch
        images = images.to(device)
        
        # Assuming 'model' is your object detection model
        extracted_features = model(images)
        all_extracted_features.append(extracted_features)

    # Concatenate features from all batches
    all_extracted_features = torch.cat(all_extracted_features, dim=0)

print("Shape of all extracted features:", all_extracted_features.shape)


shape of x torch.Size([128, 557568])
shape of x torch.Size([128, 557568])
shape of x torch.Size([128, 557568])
shape of x torch.Size([128, 557568])
shape of x torch.Size([128, 557568])
shape of x torch.Size([128, 557568])
shape of x torch.Size([128, 557568])
shape of x torch.Size([128, 557568])
shape of x torch.Size([128, 557568])
shape of x torch.Size([128, 557568])
shape of x torch.Size([128, 557568])
shape of x torch.Size([128, 557568])
shape of x torch.Size([128, 557568])
shape of x torch.Size([128, 557568])
shape of x torch.Size([128, 557568])
shape of x torch.Size([128, 557568])
shape of x torch.Size([128, 557568])
shape of x torch.Size([128, 557568])
shape of x torch.Size([128, 557568])
shape of x torch.Size([128, 557568])
shape of x torch.Size([128, 557568])
shape of x torch.Size([128, 557568])
shape of x torch.Size([128, 557568])
shape of x torch.Size([128, 557568])
shape of x torch.Size([128, 557568])
shape of x torch.Size([128, 557568])
shape of x torch.Size([128, 557568])
s

### backbone model

### backbone model

In [19]:
import sys
sys.path.append("C:/Users/Rahman/Documents/Python_Example/detr_one")


In [36]:

from resnetbackbone import build_backbone
from util.misc import NestedTensor, nested_tensor_from_tensor_list

In [41]:
for batch in dataloader_train:
    images, targets = batch
    targets = [target.to(device) for target in targets]
    images = list(img.to(device) for img in images)
    mask = [torch.ones_like(img) for img in images]
    nested_images = nested_tensor_from_tensor_list(images).to(device)
    

In [42]:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Define your arguments
class Args:
    def __init__(self):
        self.backbone = "resnet50"  # or any other supported backbone
        self.lr_backbone = 0.1  # example learning rate for backbone, adjust as needed
        self.masks = False  # whether to include masks in the output
        self.num_feature_levels = 1  # number of feature levels, adjust as needed
        self.dilation = False  # whether to use dilation in backbone
        self.hidden_dim = 256  # hidden dimension for transformer
        self.position_embedding = "sine"  # type of position embedding to use

args = Args()

# Build the backbone model
model =build_backbone(args).to(device)

# Assuming you have input data 'input_data', pass it through the model
# input_data should be a NestedTensor, which is a tensor with an associated mask
# Example usage assuming input_data is properly defined
output = model(nested_images)

with torch.no_grad():
    all_extracted_features = []

    for batch in dataloader_train:
        images, targets = batch
        targets = [target.to(device) for target in targets]
        images = [img.to(device) for img in images]
        mask = [torch.ones_like(img) for img in images]
        nested_images = nested_tensor_from_tensor_list(images).to(device)
        extracted_features = model(nested_images)
        all_extracted_features.append(extracted_features)

    # Concatenate features from all batches
    all_extracted_features = torch.cat(all_extracted_features, dim=0)

print("Shape of all extracted features:", all_extracted_features.shape)



RuntimeError: CUDA error: out of memory
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
