In [97]:
import os
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
from PIL import Image
from torch import nn, optim
from sklearn.model_selection import train_test_split


In [99]:
# Paths for the dataset and Excel files
train_excel = '/Users/lakshyajha/Downloads/Datasets/car_Crash/train.xlsx'
val_excel = '/Users/lakshyajha/Downloads/Datasets/car_Crash/val.xlsx'
test_excel = '/Users/lakshyajha/Downloads/Datasets/car_Crash/test.xlsx'
data_dir = '/Users/lakshyajha/Downloads/Datasets/car_Crash/archive/dataset'

# Read the original dataset
excel_data = pd.read_excel('/Users/lakshyajha/Downloads/Datasets/car_Crash/archive/dataset_database.xlsx', sheet_name=0)

# Split the data into train, validation, and test sets (70% train, 15% validation, 15% test)
train, temp = train_test_split(excel_data, test_size=0.3, random_state=42)
val, test = train_test_split(temp, test_size=0.5, random_state=42)

# Save the split data to Excel files
train.to_excel(train_excel, index=False)
val.to_excel(val_excel, index=False)
test.to_excel(test_excel, index=False)

# Print dataset sizes for verification
print(f'Train dataset size: {len(train)}')
print(f'Val dataset size: {len(val)}')
print(f'Test dataset size: {len(test)}')


Train dataset size: 7000
Val dataset size: 1500
Test dataset size: 1500


In [101]:
class CustomDataset(Dataset):
    def __init__(self, img_folder=data_dir, excel_file=None, transform=None):
        self.img_folder = img_folder
        self.data = pd.read_excel(excel_file)
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_name = os.path.join(self.img_folder, self.data.iloc[idx, 0])
        image = Image.open(img_name)
        label = 1 if self.data.iloc[idx, 1] == 'y' else 0

        if self.transform:
            image = self.transform(image)

        return image, label


In [103]:
# Define data augmentation and preprocessing steps
data_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.RandomCrop(224, padding=4),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # ImageNet normalization
])

# Create datasets
train_dataset = CustomDataset(excel_file=train_excel, transform=data_transforms)
val_dataset = CustomDataset(excel_file=val_excel, transform=data_transforms)
test_dataset = CustomDataset(excel_file=test_excel, transform=data_transforms)

# Print dataset sizes
print(f'Train dataset size: {len(train_dataset)}')
print(f'Val dataset size: {len(val_dataset)}')
print(f'Test dataset size: {len(test_dataset)}')


Train dataset size: 7000
Val dataset size: 1500
Test dataset size: 1500


In [122]:
# Define batch size
batch_size = 16

# Create DataLoader instances for train, validation, and test sets
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0)


In [124]:
# Load the pre-trained VGG16 model
model = models.vgg16(pretrained=True)

# Modify the final layer to match the number of classes (2 classes: 'collision' and 'no collision')
num_ftrs = model.classifier[6].in_features
model.classifier[6] = nn.Linear(num_ftrs, 2)

# Set requires_grad for feature layers (optional: freezing them if necessary)
for param in model.features.parameters():
    param.requires_grad = True

# Move the model to the available device (GPU if available)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)




In [126]:
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.0001, momentum=0.9)

# Learning rate scheduler
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min')


In [None]:
# Function to train and evaluate the model
def train_model(model, criterion, optimizer, scheduler, num_epochs=16):
    best_model_wts = model.state_dict()
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f'\nEpoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        # Loop through both train and validation phases
        for phase in ['train', 'val']:
            model.train() if phase == 'train' else model.eval()

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data
            for inputs, labels in (train_loader if phase == 'train' else val_loader):
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(train_dataset if phase == 'train' else val_dataset)
            epoch_acc = running_corrects.double() / len(train_dataset if phase == 'train' else val_dataset)
            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # Save best model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = model.state_dict()

        # Step the learning rate scheduler
        scheduler.step(epoch_loss)

    print(f'Best val Acc: {best_acc:.4f}')
    model.load_state_dict(best_model_wts)
    return model

# Train the model
model = train_model(model, criterion, optimizer, scheduler, num_epochs=8)



Epoch 0/7
----------


In [None]:
# Final evaluation on the test dataset
model.eval()
running_corrects = 0

for inputs, labels in test_loader:
    inputs = inputs.to(device)
    labels = labels.to(device)

    with torch.no_grad():
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        running_corrects += torch.sum(preds == labels.data)

test_acc = running_corrects.double() / len(test_dataset)
print(f'Test Accuracy: {test_acc:.4f}')
