In [59]:
import torch
from transformers import Dinov2ForImageClassification, AutoImageProcessor
from torch.utils.data import DataLoader, TensorDataset
from PIL import Image
import os
import pandas as pd

# Define directories
base_dir = "/home/shravan/documents/deeplearning/github/ComputerVision-Research/finetuning/roam2/"
sample_dir = os.path.join(base_dir, "data/support_samples/")
support_csv_path = os.path.join(base_dir, "data/support.csv")

# Load the support_df
support_df = pd.read_csv(support_csv_path)

# Initialize processor
processor = AutoImageProcessor.from_pretrained("facebook/dinov2-small-imagenet1k-1-layer")

# Load and preprocess support samples
def preprocess_support_samples(samples):
    images = []
    labels = []
    for _, row in samples.iterrows():
        image_path = os.path.join(sample_dir, f"{row['id']}.jpg")
        image = Image.open(image_path)
        processed_image = processor(image, return_tensors="pt").pixel_values.squeeze(0)  # Remove batch dimension
        images.append(processed_image)
        labels.append(row['label'] - 1)  # Ensure labels are 0-based
    return images, labels

images, labels = preprocess_support_samples(support_df)

# Prepare dataset
images_tensor = torch.stack(images)
labels_tensor = torch.tensor(labels, dtype=torch.long)
dataset = TensorDataset(images_tensor, labels_tensor)

# Create DataLoader
loader = DataLoader(dataset, batch_size=2, shuffle=True)

# Load model and adjust the classifier layer
model = Dinov2ForImageClassification.from_pretrained(
    "facebook/dinov2-small-imagenet1k-1-layer", 
    ignore_mismatched_sizes=True
)

# Reinitialize classifier layer to match the number of classes
# Make sure the `in_features` matches the output size of the previous layer
model.classifier = torch.nn.Linear(in_features=768, out_features=3)

# Define optimizer
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)

# Move model to appropriate device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Finetune the model
model.train()
for epoch in range(10):  # Use fewer epochs for debugging
    for batch in loader:
        images_batch, labels_batch = batch
        images_batch = images_batch.to(device)
        labels_batch = labels_batch.to(device)
        
        # Forward pass
        outputs = model(images_batch)
        
        # Compute loss
        loss = torch.nn.functional.cross_entropy(outputs.logits, labels_batch)
        
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    print(f"Epoch {epoch + 1}: Loss = {loss.item()}")

Epoch 1: Loss = 0.06403858959674835
Epoch 2: Loss = 0.00019464935758151114
Epoch 3: Loss = 0.0003507118090055883
Epoch 4: Loss = 0.0002638925798237324
Epoch 5: Loss = 0.00013713787484448403
Epoch 6: Loss = 0.00023284090275410563
Epoch 7: Loss = 0.0003437346313148737
Epoch 8: Loss = 5.882755795028061e-05
Epoch 9: Loss = 7.724326860625297e-05
Epoch 10: Loss = 0.0001516813790658489


In [60]:
# Save the finetuned model
model.save_pretrained("./finetuned_dinov2_model")

In [61]:
# Set the model to evaluation mode
model.eval()

Dinov2ForImageClassification(
  (dinov2): Dinov2Model(
    (embeddings): Dinov2Embeddings(
      (patch_embeddings): Dinov2PatchEmbeddings(
        (projection): Conv2d(3, 384, kernel_size=(14, 14), stride=(14, 14))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): Dinov2Encoder(
      (layer): ModuleList(
        (0-11): 12 x Dinov2Layer(
          (norm1): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
          (attention): Dinov2Attention(
            (attention): Dinov2SelfAttention(
              (query): Linear(in_features=384, out_features=384, bias=True)
              (key): Linear(in_features=384, out_features=384, bias=True)
              (value): Linear(in_features=384, out_features=384, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): Dinov2SelfOutput(
              (dense): Linear(in_features=384, out_features=384, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
       

In [62]:
csv_file_path = os.path.join(base_dir, "data/solutionAugust5.csv")
df = pd.read_csv(csv_file_path)

In [63]:
image_processor = processor

In [64]:
# Prepare a list to store the predictions
predictions = []

# Iterate over the images and make predictions
image_dir = os.path.join(base_dir, "data/evaluation_data/")
for idx, row in df.iterrows():
    # Construct the full image path
    image_filename = f"{row['id']}.jpg"
    image_path = os.path.join(image_dir, image_filename)
    
    # Open and process the image
    image = Image.open(image_path)
    inputs = image_processor(image, return_tensors="pt")

    # Move inputs to the same device as the model
    inputs = {key: value.to(model.device) for key, value in inputs.items()}

    with torch.no_grad():
        outputs = model(**inputs)
    
    logits = outputs.logits
    predicted_label = logits.argmax(-1).item()
    
    # Append the prediction to the list
    predictions.append({'id': row['id'], 'label': predicted_label})

In [65]:
# Convert the list to a DataFrame
submission_df = pd.DataFrame(predictions)

# Save the submission DataFrame to a CSV file with the required format
submission_file_path = os.path.join(base_dir, "submission.csv")
submission_df.to_csv(submission_file_path, index=False)

In [72]:
submission_df.head()

Unnamed: 0,id,label
0,afc50dc671ea44fb8375b560c8019b43,1
1,621af6f5776541c78bf344b177bdb7ad,0
2,1287bddbad1c47e79965dfb5458b8098,0
3,a735c1ba09cb47f8be1f21cbdb95c84e,0
4,b6142dd35c4e4a888b9fb835c38cb6e2,0


In [73]:
submission_df.groupby('label').count()

Unnamed: 0_level_0,id
label,Unnamed: 1_level_1
0,4246
1,754


In [68]:
import numpy as np
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import f1_score

In [69]:
true_labels = df['label'].tolist()

In [70]:
predicted_labels = submission_df['label'].tolist()

In [71]:
# Convert lists to numpy arrays
true_labels = np.array(true_labels)
predicted_labels = np.array(predicted_labels)

# Calculate Precision, Recall, and F1-Score for each class
precision, recall, f1, _ = precision_recall_fscore_support(true_labels, predicted_labels, average=None)

# Calculate the weighted geometric mean of F1-Scores
def geometric_mean(f1_scores, weights):
    """Calculate the weighted geometric mean of F1-scores."""
    f1_scores = np.array(f1_scores)
    weights = np.array(weights)
    weighted_f1 = np.power(np.prod(np.power(f1_scores, weights)), 1.0 / np.sum(weights))
    return weighted_f1

# Get class frequencies (weights for each class)
class_weights = np.bincount(true_labels) / len(true_labels)

# Calculate weighted geometric mean F1-Score
weighted_geom_mean_f1 = geometric_mean(f1, class_weights)

print(f"Geometric Mean Weighted F1-Score: {weighted_geom_mean_f1:.4f}")


Geometric Mean Weighted F1-Score: 0.0000


  _warn_prf(average, modifier, msg_start, len(result))
