In [None]:
from google.colab import drive, userdata
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import wandb
import os
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report
import time

# ==========================================
# STEP 0: Initialize wandb with Colab Secrets
# ==========================================
if wandb.run is not None:
    wandb.finish()

try:
    os.environ["WANDB_API_KEY"] = userdata.get('WANDB_API_KEY')
    wandb.login()
except Exception as e:
    print(f"Error accessing Secrets: {e}. Please ensure 'WANDB_API_KEY' is enabled in the Secrets tab.")

wandb.init(
    project="VGG-flowers-v3",
    name="vgg16-transfer-learning-run1",
    reinit=True,
    config={
        "epochs": 50,
        "batch_size": 16,
        "learning_rate": 0.001,
        "architecture": "VGG16",
        "pretrained": True,
        "input_size": 224
    }
)

config = wandb.config

# =======================
# STEP 1: Mount Drive and Access Shared Dataset
# =======================
drive.mount('/content/drive')

# Your Google Drive shared folder ID
SHARED_FOLDER_ID = "1dKehfyJoRDQpHZ6qJ5VQgNgiLdzMVXeH"

print("Attempting to locate dataset...")

# Find the correct data directory
data_root = None
possible_dirs = [
    "/content/drive/MyDrive/5flowersdata",
    "/content/drive/MyDrive/flowers",
    "/content/drive/.shortcut-targets-by-id/" + SHARED_FOLDER_ID,
    "/content/drive/Shareddrives/5flowersdata",
]

# Add specific sub-path checks
for dir_path in possible_dirs:
    if os.path.exists(dir_path):
        # Check root of path
        if os.path.exists(os.path.join(dir_path, "train")):
            data_root = dir_path
            break
        # Check for nested 'flowers' folder
        elif os.path.exists(os.path.join(dir_path, "flowers", "train")):
            data_root = os.path.join(dir_path, "flowers")
            break

if data_root is None:
    print("\nFolders found in MyDrive:", os.listdir('/content/drive/MyDrive'))
    raise FileNotFoundError("Dataset not found. Please ensure the shared folder is added to your Drive as a shortcut.")

train_dir = os.path.join(data_root, "train")
val_dir = os.path.join(data_root, "val") if os.path.exists(os.path.join(data_root, "val")) else os.path.join(data_root, "validation")

# =======================
# STEP 2: Data Preparation
# =======================
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((config.input_size, config.input_size)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(15),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize((config.input_size, config.input_size)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

train_dataset = datasets.ImageFolder(root=train_dir, transform=data_transforms['train'])
val_dataset = datasets.ImageFolder(root=val_dir, transform=data_transforms['val'])

train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=config.batch_size, shuffle=False, num_workers=2)

class_names = train_dataset.classes
num_classes = len(class_names)

# ===========================
# STEP 3: Model Configuration
# ===========================
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.vgg16(pretrained=config.pretrained)

for param in model.features.parameters():
    param.requires_grad = False

model.classifier[6] = nn.Linear(4096, num_classes)
model = model.to(device)

# ===========================
# STEP 4: Training Setup
# ===========================
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=5)

def train_model(model, criterion, optimizer, scheduler, train_loader, val_loader, epochs):
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        model.eval()
        val_correct, val_total = 0, 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, preds = torch.max(outputs, 1)
                val_correct += (preds == labels).sum().item()
                val_total += labels.size(0)

        val_acc = val_correct / val_total
        scheduler.step(val_acc)
        print(f"Epoch {epoch+1}/{epochs} - Val Acc: {val_acc:.4f}")
        wandb.log({"epoch": epoch+1, "val_accuracy": val_acc, "loss": running_loss/len(train_loader)})

# Start Training
train_model(model, criterion, optimizer, scheduler, train_loader, val_loader, config.epochs)
wandb.finish()