#Extract the Dataset

In [None]:
!unzip /content/ucsc-cse-244-a_data-set.zip -d /content/data-set/

# Import Dependencies

In [None]:
!pip install timm
!pip install tqdm

In [None]:
import os
import pandas as pd
from PIL import Image
from torchvision import models, transforms
import torch
from torch.utils.data import Dataset, DataLoader, Subset
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import KFold
from tqdm import tqdm
from torch.utils.data import random_split
from torch.optim.lr_scheduler import ReduceLROnPlateau
import shutil
import timm

# Auxiliary code

In [None]:
# Custom dataset class
class ImageDataset(Dataset):
    def __init__(self, image_folder, csv_file, transform=None):
        self.image_folder = image_folder
        self.labels_df = pd.read_csv(csv_file)
        self.transform = transform

    def __len__(self):
        return len(self.labels_df)

    def __getitem__(self, idx):
        img_name = os.path.join(self.image_folder, self.labels_df.iloc[idx, 0])
        label = int(self.labels_df.iloc[idx, 1])
        image = Image.open(img_name).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, label

In [None]:
# Paths
labeled_image_folder = "/content/data-set/train/labeled"
labeled_image_true_values = "/content/data-set/train_labeled.csv"

unlabeled_image_folder = '/content/data-set/train/unlabeled'
combined_image_folder = '/content/data-set/train/combined-swin-128'

combined_csv_path = '/content/data-set/train/combined_labels-conf95-swin-noaug-128.csv'

# Model training on labeled data

In [None]:
# Image transforms
transform_swin = transforms.Compose([
    transforms.Resize((384, 384)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [None]:
# Dataset and DataLoader
dataset = ImageDataset(labeled_image_folder, labeled_image_true_values, transform=transform_swin)
dataloader = DataLoader(dataset, batch_size=64, shuffle=True)

FileNotFoundError: [Errno 2] No such file or directory: '/content/data-set/train_labeled.csv'

In [None]:
model = timm.create_model('swin_large_patch4_window12_384', pretrained=True)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/801M [00:00<?, ?B/s]

In [None]:
# Freeze the feature extractors
for param in model.parameters():
    param.requires_grad = False

# Unfreeze the classification head
for param in model.head.parameters():
    param.requires_grad = True

# Unfreeze the last block in the last stage
for param in model.layers[3].blocks[1].parameters():
    param.requires_grad = True

# Unfreeze the last block in the last stage
for param in model.layers[3].blocks[0].parameters():
    param.requires_grad = True

In [None]:
# Update the classification head
num_classes = 135  # Example: Change to the number of classes in your dataset
model.head.fc = nn.Linear(model.head.fc.in_features, num_classes)

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Define optimizer and loss function
optimizer = torch.optim.AdamW([
    {'params': model.head.fc.parameters(), 'lr': 1e-3},  # Higher LR for the head
    {'params': model.layers[3].blocks[0].parameters(), 'lr': 5e-5},  # Lower LR for the last block
    {'params': model.layers[3].blocks[1].parameters(), 'lr': 1e-4},  # Lower LR for the last block
], weight_decay=1e-4)

criterion = nn.CrossEntropyLoss()

# Define learning rate scheduler based on training loss
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10, eta_min=1e-6)


In [None]:
# Training loop
epochs = 15
for epoch in range(epochs):
    model.train()
    running_loss = 0.0

    for images, labels in tqdm(dataloader, desc=f"Epoch {epoch+1}/{epochs}", leave=True):
        images, labels = images.to(device), labels.to(device)

        # Zero the gradient
        optimizer.zero_grad()

        # Forward pass
        outputs = model(images)

        # Compute loss and backpropagate
        loss = criterion(outputs, labels)
        loss.backward()

        # Update weights
        optimizer.step()

        running_loss += loss.item()

    avg_train_loss = running_loss/len(dataloader)
    print(f"Epoch {epoch+1}/{epochs}, Loss: {avg_train_loss}")
    scheduler.step()


Epoch 1/15: 100%|██████████| 308/308 [09:09<00:00,  1.78s/it]


Epoch 1/15, Loss: 0.6611088843314679


Epoch 2/15: 100%|██████████| 308/308 [09:07<00:00,  1.78s/it]


Epoch 2/15, Loss: 0.11903083149733797


Epoch 3/15: 100%|██████████| 308/308 [09:05<00:00,  1.77s/it]


Epoch 3/15, Loss: 0.05580148684290274


Epoch 4/15: 100%|██████████| 308/308 [09:05<00:00,  1.77s/it]


Epoch 4/15, Loss: 0.028244254820528076


Epoch 5/15: 100%|██████████| 308/308 [09:05<00:00,  1.77s/it]


Epoch 5/15, Loss: 0.020848703780532784


Epoch 6/15: 100%|██████████| 308/308 [09:07<00:00,  1.78s/it]


Epoch 6/15, Loss: 0.012520563044438257


Epoch 7/15: 100%|██████████| 308/308 [09:07<00:00,  1.78s/it]


Epoch 7/15, Loss: 0.009994726994650533


Epoch 8/15: 100%|██████████| 308/308 [09:09<00:00,  1.78s/it]


Epoch 8/15, Loss: 0.007387970265326303


Epoch 9/15: 100%|██████████| 308/308 [09:10<00:00,  1.79s/it]


Epoch 9/15, Loss: 0.007090803820869656


Epoch 10/15: 100%|██████████| 308/308 [09:08<00:00,  1.78s/it]


Epoch 10/15, Loss: 0.00683605933831982


Epoch 11/15: 100%|██████████| 308/308 [09:09<00:00,  1.78s/it]


Epoch 11/15, Loss: 0.005412228829229278


Epoch 12/15: 100%|██████████| 308/308 [09:09<00:00,  1.78s/it]


Epoch 12/15, Loss: 0.005635500421774779


Epoch 13/15: 100%|██████████| 308/308 [09:08<00:00,  1.78s/it]


Epoch 13/15, Loss: 0.005583871227555663


Epoch 14/15: 100%|██████████| 308/308 [09:08<00:00,  1.78s/it]


Epoch 14/15, Loss: 0.006561699341380683


Epoch 15/15: 100%|██████████| 308/308 [09:08<00:00,  1.78s/it]

Epoch 15/15, Loss: 0.011315035485577855





In [None]:
model_save_path = '/content/swin_large_32.pth'

# Save the model state_dict
torch.save(model.state_dict(), model_save_path)

In [None]:
test_images_folder = './data-set/test'
test_images = os.listdir(test_images_folder)

# # Move the model to the desired device (GPU or CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Initialize lists for pseudo-labels
pseudo_labels = []
predictions = []

# Set the model to evaluation mode (useful for inference)
model.eval()
# count = 0
with torch.no_grad():
    for img_name in tqdm(test_images, desc="Labeling images", unit="image"):
        img_path = os.path.join(test_images_folder, img_name)
        image = Image.open(img_path).convert("RGB")
        image = transform_swin(image).unsqueeze(0).to(device)  # Add batch dimension

        outputs = model(image)  # Get model outputs
        prob = torch.softmax(outputs, dim=1)  # Convert to probabilities
        confidence, predicted = torch.max(prob, 1)

        predictions.append(predicted.item())
        pseudo_labels.append(img_name)  # Collect pseudo-labels

# Create a DataFrame for pseudo-labels
df_pseudo = pd.DataFrame({'image': pseudo_labels, 'id': predictions})

print(df_pseudo)

Labeling images: 100%|██████████| 8213/8213 [07:38<00:00, 17.91image/s]

          image  id
0     37974.jpg  65
1     33618.jpg   4
2     36143.jpg   8
3     35001.jpg   7
4     37044.jpg   7
...         ...  ..
8208  37142.jpg   6
8209  36486.jpg   8
8210  33912.jpg   9
8211  39932.jpg  14
8212  40686.jpg  47

[8213 rows x 2 columns]





In [None]:
df_pseudo.to_csv('/content/final_prediction_swinlarge_32.csv', index=False)

# Semi-supervised learning

In [None]:
confidence_threshold = 0.95

In [None]:
unlabeled_images = os.listdir(unlabeled_image_folder)
swin_model_load_path = './saved-models/swin_sched_noaug_128.pth'

# Define the model architecture (same as the one used during training)
num_classes = 135  # Example: Change to the number of classes in your dataset
model = timm.create_model('swin_base_patch4_window7_224', pretrained=True) # Set pretrained=False for custom weights
model.head.fc = nn.Linear(model.head.fc.in_features, num_classes)

# Load the model state_dict from a .pth file
model.load_state_dict(torch.load(swin_model_load_path))

# Move the model to the desired device (GPU or CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Initialize lists for pseudo-labels
image_names = []
pseudo_labels = []

# Set the model to evaluation mode (useful for inference)
model.eval()

with torch.no_grad():
    for img_name in tqdm(unlabeled_images, desc="Labeling images", unit="image"):
        img_path = os.path.join(unlabeled_image_folder, img_name)
        image = Image.open(img_path).convert("RGB")
        image = transform_swin(image).unsqueeze(0).to(device)  # Add batch dimension

        outputs = model(image)  # Get model outputs
        prob = torch.softmax(outputs, dim=1)  # Convert to probabilities
        confidence, predicted = torch.max(prob, 1)

        # Apply confidence threshold (e.g., 0.95)
        if confidence.item() > confidence_threshold:
            image_names.append(predicted.item())
            pseudo_labels.append(img_name)  # Collect pseudo-labels

# Create a DataFrame for pseudo-labels
df_pseudo = pd.DataFrame({'image': pseudo_labels, 'id': image_names})

print(df_pseudo)

In [None]:
labeled_csv_path = './data-set/train_labeled.csv'  # Path to labeled CSV
pseudo_labels_csv_path = './data-set/train/unlabeled-prediction-noaug-swin-128.csv'  # Path to pseudo-labeled CSV

# Create the combined image folder if it doesn't exist
os.makedirs(combined_image_folder, exist_ok=True)

# Load labeled data
labeled_df = pd.read_csv(labeled_csv_path)
# Load pseudo-labeled data
pseudo_labels_df = pd.read_csv(pseudo_labels_csv_path)

# Step 3: Copy labeled images to the combined folder
for _, row in labeled_df.iterrows():
    img_name = row['image']  # Use 'image' to get the filename from labeled data
    src_path = os.path.join(labeled_image_folder, img_name)
    dst_path = os.path.join(combined_image_folder, img_name)
    shutil.copy(src_path, dst_path)

# Step 4: Copy pseudo-labeled images to the combined folder
for _, row in pseudo_labels_df.iterrows():
    img_name = row['image']  # Use 'image' to get the filename from pseudo-labeled data
    src_path = os.path.join(unlabeled_image_folder, img_name)
    dst_path = os.path.join(combined_image_folder, img_name)
    shutil.copy(src_path, dst_path)

# Step 5: Combine DataFrames
# Create a DataFrame for the combined data
combined_df = labeled_df.copy()

# Rename the columns in the pseudo_labels_df to match the labeled_df
pseudo_labels_df = pseudo_labels_df.rename(columns={'filename': 'image', 'label': 'id'})  # Rename columns

# Append pseudo-labeled data
combined_df = pd.concat([combined_df, pseudo_labels_df], ignore_index=True)

# Save the combined DataFrame to a new CSV
# combined_csv_path = './data-set/train/combined_labels-conf95-swin-noaug-128.csv'
combined_df.to_csv(combined_csv_path, index=False)

print(f'Combined dataset created at {combined_image_folder}')
print(f'Combined labels saved to {combined_csv_path}')

In [None]:
!zip -r model_weights.zip swin_large_32.pth

  adding: swin_large_32.pth (deflated 7%)


# Retraining the combined Dataset (Labelled + Unlabbeled)

## Combined Image Generator

In [None]:
# Custom dataset class
class ImageDataset(Dataset):
    def __init__(self, image_folder, csv_file, transform=None):
        self.image_folder = image_folder
        self.labels_df = pd.read_csv(csv_file)
        self.transform = transform

    def __len__(self):
        return len(self.labels_df)

    def __getitem__(self, idx):
        img_name = os.path.join(self.image_folder, self.labels_df.iloc[idx, 0])
        label = int(self.labels_df.iloc[idx, 1])
        image = Image.open(img_name).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, label


# Load datasets
combined_csv_path = '/content/combined_labels.csv'


## Transforms (w/ or w/o Augumentation)

In [None]:
# Image transforms
transform_swin = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

## Model Definiton + Training

In [None]:
# Create dataset and data loader
combined_dataset = ImageDataset(combined_image_folder, combined_csv_path, transform=transform_swin)

# Split the dataset into training (80%) and validation (20%)
train_size = int(0.8 * len(combined_dataset))
val_size = len(combined_dataset) - train_size
train_dataset, val_dataset = random_split(combined_dataset, [train_size, val_size])

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Initialize your model
model = timm.create_model('swin_base_patch4_window7_224', pretrained=True)


# Freeze the feature extractors
for param in model.parameters():
    param.requires_grad = False

# Unfreeze the classification head
for param in model.head.parameters():
    param.requires_grad = True

# Unfreeze the nth block in the last stage
for param in model.layers[3].blocks[1].parameters():
    param.requires_grad = True

# Unfreeze the n-1th block in the last stage
for param in model.layers[3].blocks[0].parameters():
    param.requires_grad = True

# Update the classification head
num_classes = 135  # Example: Change to the number of classes in your dataset
model.head.fc = nn.Linear(model.head.fc.in_features, num_classes)

# Define optimizer and loss function
optimizer = torch.optim.AdamW([
    {'params': model.head.fc.parameters(), 'lr': 1e-3},  # Higher LR for the head
    {'params': model.layers[3].blocks[0].parameters(), 'lr': 1e-4},  # Lower LR for the last block
    {'params': model.layers[3].blocks[1].parameters(), 'lr': 5e-4},  # Lower LR for the last block
], weight_decay=1e-4)

criterion = nn.CrossEntropyLoss()

# Define learning rate scheduler based on training loss
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10, eta_min=1e-6)

## Model Training

In [None]:
# Initialize lists to store validation loss and accuracy
val_losses = []
val_accuracies = []

# Training loop with validation
epochs = 5  # Adjust the number of epochs as needed
for epoch in range(epochs):
    # Training phase
    model.train()
    running_loss = 0.0

    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}", leave=True):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        # logits = outputs.logits
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs}, Training Loss: {running_loss / len(train_loader):.4f}")

    # Validation phase
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            # logits = outputs.logits
            loss = criterion(outputs, labels)  # Calculate validation loss
            val_loss += loss.item()  # Accumulate validation loss
            _, predicted = torch.max(outputs, 1)  # Get predicted classes
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = correct / total
    val_losses.append(val_loss / len(val_loader))  # Store average validation loss
    val_accuracies.append(accuracy * 100)  # Store validation accuracy in percentage

    print(f'Validation Loss: {val_loss / len(val_loader):.4f}, Validation Accuracy: {accuracy * 100:.2f}%')
    #Model Name
    modelName = 'CUSTOM_MODEL_NAME'
    torch.save(model.state_dict(), f'/content/{modelName}-{epoch+1}.pth')

## Plot Generation

In [None]:
# Plotting validation loss and accuracy
plt.figure(figsize=(12, 5))

# Plot validation loss
plt.subplot(1, 2, 1)
plt.plot(range(1, epochs + 1), val_losses, marker='o', label='Validation Loss', color='blue')
plt.title('Validation Loss Over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.xticks(range(1, epochs + 1))
plt.grid()
plt.legend()

# Plot validation accuracy
plt.subplot(1, 2, 2)
plt.plot(range(1, epochs + 1), val_accuracies, marker='o', label='Validation Accuracy', color='green')
plt.title('Validation Accuracy Over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Accuracy (%)')
plt.xticks(range(1, epochs + 1))
plt.grid()
plt.legend()

plt.tight_layout()
plt.show()

## Testing

In [None]:
test_images_folder = '/content/test'
test_images = os.listdir(test_images_folder)

model = timm.create_model('swin_base_patch4_window7_224', pretrained=True)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/353M [00:00<?, ?B/s]

In [None]:
# Update the classification head
num_classes = 135  # Example: Change to the number of classes in your dataset
model.head.fc = nn.Linear(model.head.fc.in_features, num_classes)

model.load_state_dict(torch.load('/content/drive/Shareddrives/CSE244A/swin_combined-b64-e15-aug-epoch_21.pth'))

  model.load_state_dict(torch.load('/content/drive/Shareddrives/CSE244A/swin_combined-b64-e15-aug-epoch_21.pth'))


<All keys matched successfully>

In [None]:
# Move model to GPU if available
import torch
print("\nPyTorch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("CUDA version:", torch.version.cuda)
    print("GPU device name:", torch.cuda.get_device_name(0))
    print("Number of GPUs:", torch.cuda.device_count())

# GPU Memory management functions
def get_gpu_memory():
    """Print GPU memory usage"""
    if torch.cuda.is_available():
        print(f"Allocated: {torch.cuda.memory_allocated(0)/1024**2:.2f}MB")
        print(f"Cached: {torch.cuda.memory_reserved(0)/1024**2:.2f}MB")

def clear_gpu_memory():
    """Clear GPU memory"""
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        torch.cuda.ipc_collect()

# Device selection function
def get_device():
    """Get appropriate device"""
    if torch.cuda.is_available():
        device = torch.device("cuda")
        print(f"Using GPU: {torch.cuda.get_device_name(0)}")
    else:
        device = torch.device("cpu")
        print("Using CPU")
    return device

# Get the device
device = get_device()
model = model.to(device)

In [None]:
# Image transforms (NO-AUGMENTATION)
transform_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [None]:

# Initialize lists for pseudo-labels
pseudo_labels = []
predictions = []

# Set the model to evaluation mode (useful for inference)
model.eval()
# count = 0
with torch.no_grad():
    for img_name in tqdm(test_images, desc="Testing images", unit="image"):
        img_path = os.path.join(test_images_folder, img_name)
        image = Image.open(img_path).convert("RGB")
        image = transform_test(image).unsqueeze(0).to(device)  # Add batch dimension

        outputs = model(image)  # Get model outputs
        prob = torch.softmax(outputs, dim=1)  # Convert to probabilities
        confidence, predicted = torch.max(prob, 1)

        predictions.append(predicted.item())
        pseudo_labels.append(img_name)  # Collect pseudo-labels

# Create a DataFrame for pseudo-labels
df_pseudo = pd.DataFrame({'image': pseudo_labels, 'id': predictions})

print(df_pseudo)

df_pseudo.to_csv('/content/report.csv', index=False)

Testing images: 100%|██████████| 8213/8213 [04:10<00:00, 32.72image/s]


          image   id
0     34748.jpg  109
1     35429.jpg   17
2     40460.jpg   24
3     36287.jpg   39
4     36975.jpg   33
...         ...  ...
8208  35186.jpg    6
8209  35202.jpg    8
8210  34167.jpg    7
8211  40431.jpg   13
8212  34657.jpg   12

[8213 rows x 2 columns]
