In [None]:

import os
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image
from tqdm import tqdm  # Import tqdm for the progress bar

# Import configuration settings
import config

# Define a PyTorch Dataset
class SmokeAlarmDataset(Dataset):
    def __init__(self, df, image_dir, transform=None):
        self.df = df
        self.image_dir = image_dir
        self.transform = transform
        self.label_map = {"Approved": 1, "Declined": 0}
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        job_no = str(self.df.iloc[idx]["job_no"])  # Convert job_no to string
        image_name = self.df.iloc[idx]["image_name"]
        label = self.df.iloc[idx]["image_status"]
        label = self.label_map.get(label, 0)  # Default to 0 if label is missing
        
        image_path = os.path.join(self.image_dir, job_no, image_name)
        
        if not os.path.exists(image_path):
            print(f"Warning: Image not found at {image_path}. Skipping this file.")
            return None, None
        
        image = Image.open(image_path).convert("RGB")
        
        if self.transform:
            image = self.transform(image)
        
        return image, label

# Custom collate function to handle missing files
def collate_fn(batch):
    batch = [item for item in batch if item[0] is not None]
    return torch.utils.data.dataloader.default_collate(batch)

# Define transformations for data preprocessing
transform = transforms.Compose([
    transforms.Resize((224, 224)),  
    transforms.ToTensor(),          
    transforms.Normalize(mean=config.NORMALIZE_MEAN, std=config.NORMALIZE_STD)
])

# Load the CSV file
df = pd.read_csv(config.CSV_FILE_PATH)

# Initialize the dataset and dataloader with the custom collate function
dataset = SmokeAlarmDataset(df, config.IMAGE_DIR, transform=transform)
dataloader = DataLoader(dataset, batch_size=config.BATCH_SIZE, shuffle=True, collate_fn=collate_fn)

# Define the model
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.model = getattr(models, config.MODEL_NAME)(pretrained=True)
        self.model.fc = nn.Linear(self.model.fc.in_features, 1)
    
    def forward(self, x):
        return torch.sigmoid(self.model(x))

model = SimpleCNN()

# Define loss and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=config.LEARNING_RATE)

# Training loop with tqdm progress bar
for epoch in range(config.NUM_EPOCHS):
    model.train()
    running_loss = 0.0
    
    # Use tqdm to add a progress bar
    progress_bar = tqdm(dataloader, desc=f"Epoch {epoch + 1}/{config.NUM_EPOCHS}", leave=False)
    for images, labels in progress_bar:
        labels = labels.unsqueeze(1).float()  # Reshape labels for binary output
        
        optimizer.zero_grad()
        
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
        # Update progress bar description with current loss
        progress_bar.set_postfix(loss=loss.item())
    
    print(f"Epoch [{epoch + 1}/{config.NUM_EPOCHS}], Loss: {running_loss / len(dataloader)}")

print("Training complete.")
