# Demo Code for Vision Transformer Model

This notebook will generate the predictions from the potato_test test dataset using the model from deeplearning_vit.pt and deeplearning_vit.ipynb
The predictions will be contained at the csv file deeplearning_vit_predictions.csv


## Imports


In [None]:
import os
import torch
from torchvision import transforms
from PIL import Image
import pandas as pd
import numpy as np
import torch.nn as nn

## Variables and Parameters


In [None]:

EPOCHS = 200
BATCH_SIZE = 32  # Reduced from 64 for better gradient updates
IMG_SIZE = 224
PATCH_SIZE = 16
IN_CHANNELS = 3
EMBED_DIM = 256
NUM_PATCHES = (IMG_SIZE // PATCH_SIZE) ** 2
NUM_HEADS = 8
NUM_ENCODERS = 6
NUM_CLASSES = 6
LEARNING_RATE = 1e-4  # Reduced from 3e-4
DROPOUT = 0.1
ACTIVATION = 'gelu'
WARMUP_EPOCHS = 5  # For learning rate warmup

## Model

In [None]:
class PatchEmbedding(nn.Module):
    def __init__(self, in_channels, patch_size, embed_dim, num_patches, dropout):
        super(PatchEmbedding, self).__init__()
        self.patcher = nn.Sequential(
            nn.Conv2d(in_channels=in_channels, out_channels=embed_dim, 
                      kernel_size=patch_size, stride=patch_size),
            nn.Flatten(2)
        )
        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
        self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim))
        self.dropout = nn.Dropout(dropout)
        
        # Initialize weights properly
        nn.init.trunc_normal_(self.cls_token, std=.02)
        nn.init.trunc_normal_(self.pos_embed, std=.02)

    def forward(self, x):
        x = self.patcher(x).permute(0, 2, 1)
        cls_tokens = self.cls_token.expand(x.size(0), -1, -1)
        x = torch.cat((cls_tokens, x), dim=1)
        x = x + self.pos_embed
        return self.dropout(x)

class ViT(nn.Module):
    def __init__(self):
        super(ViT, self).__init__()
        self.patch_embed = PatchEmbedding(IN_CHANNELS, PATCH_SIZE, EMBED_DIM, NUM_PATCHES, DROPOUT)
        self.norm = nn.LayerNorm(EMBED_DIM)  # Additional layer norm
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=EMBED_DIM, 
            nhead=NUM_HEADS, 
            activation=ACTIVATION,
            dropout=DROPOUT,
            batch_first=True  # Use batch_first for better compatibility
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=NUM_ENCODERS)
        self.mlp_head = nn.Sequential(
            nn.LayerNorm(EMBED_DIM),
            nn.Linear(EMBED_DIM, EMBED_DIM//2),  # Narrower
            nn.GELU(),
            nn.Dropout(0.3),
            nn.Linear(EMBED_DIM//2, EMBED_DIM//4),
            nn.GELU(),
            nn.Dropout(0.2),
            nn.Linear(EMBED_DIM//4, NUM_CLASSES)
        )
    def forward(self, x):
        x = self.patch_embed(x)
        x = self.norm(x)  # Additional normalization
        x = self.transformer(x)
        return self.mlp_head(x[:, 0])  # Class token


## Setup and Preprocessing

In [None]:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ViT().to(device)
# Define the same transforms used during training
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Adjust to your model's expected input size
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # ImageNet stats
])

# Class names mapping
class_names = {
    0: "Bacteria",
    1: "Fungi",
    2: "Healthy",
    3: "Pest",
    4: "Phytopthora",
    5: "Virus"
}

## Using the Model and Generating Predictions

In [None]:



def predict_folder(model, folder_path, output_csv="group9_model2_prediction"):
    """
    Predict classes for all images in a folder and save results to CSV sorted by numerical filename.
    
    Args:
        model: Trained PyTorch model
        folder_path: Path to folder containing images
        output_csv: Path to save the predictions CSV file
    """
    # Get all image files in the folder
    image_files = [f for f in os.listdir(folder_path) 
                 if f.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff'))]
    
    if not image_files:
        print(f"No images found in {folder_path}")
        return
    
    # Prepare to store predictions
    predictions = []
    
    # Set model to evaluation mode
    model.eval()
    
    # Process each image
    for img_file in image_files:
        try:
            img_path = os.path.join(folder_path, img_file)
            
            # Load and preprocess image
            img = Image.open(img_path).convert('RGB')
            img_tensor = transform(img).unsqueeze(0).to(device)  # Add batch dimension
            
            # Make prediction
            with torch.no_grad():
                outputs = model(img_tensor)
                probs = torch.nn.functional.softmax(outputs, dim=1)
                conf, pred = torch.max(probs, 1)
            
            # Get class name and confidence
            class_id = pred.item()
            class_name = class_names.get(class_id, "Unknown")
            confidence = conf.item()
            
            # Extract number from filename (assuming format like "1.jpg", "2.png", etc.)
            file_num = int(''.join(filter(str.isdigit, img_file)))
            
            predictions.append({
                'filename': img_file,
                'file_number': file_num,  # Add numerical value for sorting
                'class_id': class_id,
                'class_name': class_name,
                'confidence': confidence
            })
            
            print(f"{img_file}: {class_name} (confidence: {confidence:.2f})")
            
        except Exception as e:
            print(f"Error processing {img_file}: {str(e)}")
            predictions.append({
                'filename': img_file,
                'file_number': -1,  # Default for error cases
                'class_id': -1,
                'class_name': 'Error',
                'confidence': 0.0
            })
    
    # Convert to DataFrame and sort by numerical filename
    df = pd.DataFrame(predictions)
    df = df.sort_values(by='file_number')
    
    # Drop the temporary file_number column if you don't want it in the output
    df = df.drop(columns=['file_number'])
    
    # Save to CSV
    df.to_csv(output_csv, index=False)
    print(f"\nPredictions saved to {output_csv}, sorted by numerical filename")
    
    return df
    
# Load your trained model
model = ViT().to(device)
model.load_state_dict(torch.load("deeplearning_vit.pt")) ## Change your model name here
model.eval()


folder_path = "potato_test"
predict_folder(model, folder_path, "group9_model2_predictions") ## Change your csv name here

0.jpeg: Healthy (confidence: 0.36)
1.jpeg: Healthy (confidence: 0.77)
10.jpeg: Healthy (confidence: 0.84)
100.jpeg: Phytopthora (confidence: 0.68)
101.jpeg: Bacteria (confidence: 0.78)
102.jpeg: Fungi (confidence: 0.44)
103.jpeg: Healthy (confidence: 0.58)
104.jpeg: Healthy (confidence: 0.42)
105.jpeg: Virus (confidence: 0.57)
106.jpeg: Virus (confidence: 0.54)
107.jpeg: Pest (confidence: 0.29)
108.jpeg: Pest (confidence: 0.54)
109.jpeg: Fungi (confidence: 0.38)
11.jpeg: Healthy (confidence: 0.69)
110.jpeg: Virus (confidence: 0.35)
111.jpeg: Fungi (confidence: 0.68)
112.jpeg: Fungi (confidence: 0.58)
113.jpeg: Bacteria (confidence: 0.45)
114.jpeg: Fungi (confidence: 0.60)
115.jpeg: Fungi (confidence: 0.42)
116.jpeg: Virus (confidence: 0.49)
117.jpeg: Phytopthora (confidence: 0.56)
118.jpeg: Bacteria (confidence: 0.46)
119.jpeg: Fungi (confidence: 0.71)
12.jpeg: Virus (confidence: 0.62)
120.jpeg: Fungi (confidence: 0.61)
121.jpeg: Virus (confidence: 0.36)
122.jpeg: Virus (confidence: 0.

Unnamed: 0,filename,class_id,class_name,confidence
0,0.jpeg,2,Healthy,0.359010
1,1.jpeg,2,Healthy,0.770729
112,2.jpeg,2,Healthy,0.560473
223,3.jpeg,2,Healthy,0.540117
235,4.jpeg,2,Healthy,0.372226
...,...,...,...,...
219,296.jpeg,5,Virus,0.533953
220,297.jpeg,5,Virus,0.407108
221,298.jpeg,5,Virus,0.641145
222,299.jpeg,3,Pest,0.564876
