In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision.models import resnet50, ResNet50_Weights
from torchvision import transforms, models
from ms_coco_data_pipeline import MSCOCODatasetImages
import os
from PIL import Image

In [None]:
train_dir = "/kaggle/input/coco-2017-dataset/coco2017/train2017"
val_dir = "/kaggle/input/coco-2017-dataset/coco2017/val2017"
train_ann_dir = "/kaggle/input/coco-2017-dataset/coco2017/annotations/captions_train2017.json"
val_ann_dir = "/kaggle/input/coco-2017-dataset/coco2017/annotations/captions_val2017.json"

In [None]:
preprocessing_transformer = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.RandomRotation(5),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomResizedCrop(224, scale=(0.9, 1.0)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [None]:
train_dataset = MSCOCODatasetImages(train_dir,train_ann_dir, transform = preprocessing_transformer)
val_dataset = MSCOCODatasetImages(val_dir,val_ann_dir, transform = preprocessing_transformer)

In [None]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
weights = ResNet50_Weights.DEFAULT
model = resnet50(weights=weights)
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001) 

In [None]:
from tqdm import tqdm

epochs = 10

for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    train_loader_tqdm = tqdm(train_loader, desc=f'Epoch [{epoch+1}/{epochs}] Training', leave=False)
    for images in train_loader_tqdm:
        images = images.to(device)

        optimizer.zero_grad()
        outputs = model(images)

        targets = torch.zeros(images.size(0), dtype=torch.long).to(device)

        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f'Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_loader):.4f}')

    # Validation
    model.eval()
    all_predictions = []
    val_loader_tqdm = tqdm(val_loader, desc=f'Epoch [{epoch+1}/{epochs}] Validation', leave=False)
    with torch.no_grad():
        for images in val_loader_tqdm:
            images = images.to(device)
            outputs = model(images)

            _, predicted = torch.max(outputs.data, 1)
            all_predictions.extend(predicted.cpu().numpy())