<a href="https://colab.research.google.com/github/nr2265/Arryhthmia-Prediction/blob/main/attempt1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install necessary libraries
!pip install torch torchvision transformers timm


In [None]:
# Import required libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
import torchvision.models as models
from transformers import DINOConfig, DINOModel
import timm  # For Swin Transformer


In [None]:
# Define the custom backbone with Swin-Large Transformer and Feature Pyramid Network (FPN)
class CustomBackbone(nn.Module):
    def __init__(self, pretrained=True):
        super(CustomBackbone, self).__init__()
        # Load a pre-trained Swin Transformer (Large)
        self.backbone = timm.create_model('swin_large_patch4_window7_224', pretrained=pretrained, features_only=True, out_indices=(0, 1, 2, 3))

        # Feature Pyramid Network for multi-scale feature representation
        self.fpn = models.detection.backbone_utils.FeaturePyramidNetwork([96, 192, 384, 768], 256)

    def forward(self, x):
        # Extract features from Swin Transformer
        features = self.backbone(x)
        # Pass through the FPN for multi-scale outputs
        fpn_features = self.fpn(features)
        return fpn_features



In [None]:
# Define the DINO model with cascading and deformable attention layers
class DINOObjectDetectionModel(nn.Module):
    def __init__(self, backbone):
        super(DINOObjectDetectionModel, self).__init__()
        self.backbone = backbone

        # DINO configuration
        dino_config = DINOConfig()
        dino_config.num_queries = 300
        dino_config.hidden_dim = 256
        self.dino_model = DINOModel(dino_config)

        # Cascading deformable attention layers
        self.deformable_attn_layer1 = nn.MultiheadAttention(embed_dim=256, num_heads=8)
        self.deformable_attn_layer2 = nn.MultiheadAttention(embed_dim=256, num_heads=8)

        # Additional layers to ensure good results on large datasets
        self.cascade_layer1 = nn.Sequential(
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(256)
        )
        self.cascade_layer2 = nn.Sequential(
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(256)
        )

        # Detection heads
        self.class_head = nn.Linear(256, dino_config.num_classes)
        self.bbox_head = nn.Linear(256, 4)

    def forward(self, x):
        features = self.backbone(x)
        x = features[0]  # Use first feature map, extendable based on need

        # Apply cascading layers
        x = self.cascade_layer1(x)
        x = self.cascade_layer2(x)

        # Flatten for attention layers
        B, C, H, W = x.shape
        x = x.view(B, C, -1).permute(2, 0, 1)

        # Deformable attention
        x, _ = self.deformable_attn_layer1(x, x, x)
        x, _ = self.deformable_attn_layer2(x, x, x)

        # Detection heads
        class_logits = self.class_head(x)
        bbox_regression = self.bbox_head(x)

        return class_logits, bbox_regression


In [None]:
# Training function
def train_model(model, dataloader, criterion, optimizer, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for images, targets in dataloader:
            images = images.to(device)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            optimizer.zero_grad()
            class_logits, bbox_regression = model(images)

            loss = criterion(class_logits, bbox_regression, targets)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {running_loss / len(dataloader)}')


In [None]:
# Fine-tuning function with frozen layers
def fine_tune_model(model, dataloader, criterion, optimizer, num_epochs=5):
    # Freeze some layers to retain initial training knowledge
    for param in model.backbone.backbone.parameters():
        param.requires_grad = False

    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for images, targets in dataloader:
            images = images.to(device)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            optimizer.zero_grad()
            class_logits, bbox_regression = model(images)

            loss = criterion(class_logits, bbox_regression, targets)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        print(f'Fine-tuning Epoch {epoch + 1}/{num_epochs}, Loss: {running_loss / len(dataloader)}')


In [None]:
# Set device to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = DINOObjectDetectionModel(backbone=CustomBackbone(pretrained=True)).to(device)

# Placeholder for dataloader (replace with actual DOTA v1.0 and fine-tuning dataset)
train_dataloader = DataLoader(...)  # Add actual DataLoader for DOTA dataset
fine_tune_dataloader = DataLoader(...)  # Add DataLoader for fine-tuning dataset

# Define optimizer and criterion
optimizer = optim.AdamW(model.parameters(), lr=1e-4)
criterion = nn.MSELoss()  # Replace with detection-specific loss

# Train on DOTA v1.0
train_model(model, train_dataloader, criterion, optimizer, num_epochs=20)

# Fine-tune on secondary dataset
fine_tune_model(model, fine_tune_dataloader, criterion, optimizer, num_epochs=10)


In [None]:
import torch
from torch.utils.data import Dataset
import cv2  # For image loading
import os

# Define the custom dataset class for DOTA
class DOTADataset(Dataset):
    def __init__(self, images_dir, annotations_dir, transform=None):
        """
        Args:
            images_dir (str): Directory with all the images.
            annotations_dir (str): Directory with all the annotation text files.
            transform (callable, optional): Optional transform to be applied on a sample.
        """
        self.images_dir = images_dir
        self.annotations_dir = annotations_dir
        self.image_files = [f for f in os.listdir(images_dir) if f.endswith(('.jpg', '.png'))]
        self.transform = transform

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        # Load image
        image_filename = self.image_files[idx]
        image_path = os.path.join(self.images_dir, image_filename)
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert to RGB

        # Load corresponding annotation
        annotation_path = os.path.join(self.annotations_dir, image_filename.replace('.jpg', '.txt').replace('.png', '.txt'))
        boxes = []
        labels = []
        difficulties = []

        if os.path.exists(annotation_path):
            with open(annotation_path, 'r') as file:
                for line in file.readlines():
                    values = line.strip().split(',')
                    if len(values) == 10:
                        # Extract coordinates and label information
                        x1, y1, x2, y2, x3, y3, x4, y4 = map(float, values[:8])
                        category = values[8]
                        difficult = int(values[9])

                        # Append the parsed data
                        boxes.append([x1, y1, x2, y2, x3, y3, x4, y4])
                        labels.append(category)  # Convert category to numerical label as needed
                        difficulties.append(difficult)

        # Convert data to tensors
        boxes = torch.tensor(boxes, dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.int64)  # Adjust as needed for label encoding
        difficulties = torch.tensor(difficulties, dtype=torch.int64)

        sample = {
            'image': image,
            'boxes': boxes,
            'labels': labels,
            'difficulties': difficulties
        }

        if self.transform:
            sample['image'] = self.transform(image)

        return sample


In [None]:
from torch.utils.data import DataLoader
import torchvision.transforms as T

# Define the transform for image preprocessing
transform = T.Compose([
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Standard normalization for pretrained models
])

# Initialize the dataset and DataLoader
dota_dataset = DOTADataset(images_dir='/path/to/images', annotations_dir='/path/to/annotations', transform=transform)
dota_dataloader = DataLoader(dota_dataset, batch_size=4, shuffle=True, collate_fn=lambda x: x)

# Test DataLoader by iterating through a batch (optional)
for batch in dota_dataloader:
    images = batch['image']
    boxes = batch['boxes']
    labels = batch['labels']
    difficulties = batch['difficulties']
    print(f'Image batch shape: {images.shape}')
    print(f'Boxes: {boxes}')
    print(f'Labels: {labels}')
    print(f'Difficulties: {difficulties}')
    break  # Just to test the first batch
