In [1]:
import os
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image
from tqdm import tqdm

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# dataset_folder = '/kaggle/input/smai-25-sec-a-project-phase-2-lat-long-prediction/'

# Define paths
model_path = '/kaggle/input/best_convnext_model/pytorch/default/1/best_convnext_model.pth'  # Path to your saved model
test_img_dir = '/kaggle/input/images-test/images_test'  # Test images directory (assuming this is what you have)
output_test_csv = 'predicted_regions_test.csv'  # Output predicted regions for test set

# Image transformation for prediction (same as validation transform)
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Function to get all image filenames from a directory
def get_image_filenames(directory):
    image_extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff']
    filenames = []
    
    for filename in os.listdir(directory):
        if any(filename.lower().endswith(ext) for ext in image_extensions):
            filenames.append(filename)
    
    return sorted(filenames)

# Get test image filenames
test_filenames = get_image_filenames(test_img_dir)
test_df = pd.DataFrame({'filename': test_filenames})
print(f"Found {len(test_df)} test images")

# Dataset class for prediction
class PredictionDataset(Dataset):
    def __init__(self, filenames, img_dir, transform=None):
        self.filenames = filenames
        self.img_dir = img_dir
        self.transform = transform
        
    def __len__(self):
        return len(self.filenames)
    
    def __getitem__(self, idx):
        img_name = self.filenames[idx]
        img_path = os.path.join(self.img_dir, img_name)
        
        try:
            image = Image.open(img_path).convert('RGB')
        except:
            print(f"Error loading image: {img_path}")
            # Return a placeholder image in case of error
            image = Image.new('RGB', (256, 256), color='gray')
            
        if self.transform:
            image = self.transform(image)
            
        # Return image and index (to map back to dataframe)
        return image, idx

# Create prediction dataset for test images
test_pred_dataset = PredictionDataset(test_df['filename'].tolist(), test_img_dir, transform)

# DataLoader for prediction
batch_size = 32  # Can use larger batch size for prediction
test_pred_loader = DataLoader(test_pred_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

# Load the model architecture (must match how it was defined during training)
model = models.convnext_base(weights=None)  # No need to download weights
num_ftrs = model.classifier[2].in_features
model.classifier = nn.Sequential(
    model.classifier[0],
    model.classifier[1],
    nn.Dropout(0.2),
    nn.Linear(num_ftrs, 15)  # 15 regions (0-14 for PyTorch)
)

# Load the saved model weights
model.load_state_dict(torch.load(model_path, map_location=device))
model = model.to(device)
model.eval()

def predict_regions(dataloader):
    """Make region predictions for all images in dataloader"""
    predictions = []
    indices = []
    
    with torch.no_grad():
        for images, idx in tqdm(dataloader, desc="Predicting"):
            images = images.to(device)
            outputs = model(images)
            # Get predicted class (region)
            _, preds = torch.max(outputs, 1)
            
            predictions.extend(preds.cpu().numpy())
            indices.extend(idx.numpy())
    
    # Convert from 0-14 back to 1-15 for Region_ID
    predictions = [p + 1 for p in predictions]
    
    return indices, predictions

# Predict regions for test set
print("Predicting regions for test set...")
indices, predictions = predict_regions(test_pred_loader)

# Create result DataFrame
result_df = pd.DataFrame({
    'filename': [test_df['filename'][i] for i in indices],
    'Region_ID': predictions
})

# Save the predictions to CSV file
result_df.to_csv(output_test_csv, index=False)

print(f"Saved predicted regions for test set to {output_test_csv}")

# Print region distribution in predictions
print("\nPredicted Region ID distribution (Test):")
print(result_df['Region_ID'].value_counts().sort_index())

Using device: cuda
Found 369 test images


  model.load_state_dict(torch.load(model_path, map_location=device))


Predicting regions for test set...


Predicting: 100%|██████████| 12/12 [00:04<00:00,  2.43it/s]

Saved predicted regions for test set to predicted_regions_test.csv

Predicted Region ID distribution (Test):
Region_ID
1     19
2     22
3     29
4     26
5     28
6     28
7     24
8     26
9     14
10    30
11    26
12    27
13    25
14     9
15    36
Name: count, dtype: int64



