In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, models
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import zipfile

In [None]:
# Extract images from the two zip files

!git clone https://github.com/Prashant-AV/Qualcomm-DL-Hackathon.git

#zip_file1 = ''
#zip_file2 = 'path_to_zip2.zip'

#image_files1 = extract_images(zip_file1)
#image_files2 = extract_images(zip_file2)

# Combine the images and save them to a new zip file
#combined_image_files = image_files1 + image_files2
#output_zip = 'combined_images.zip'
#combine_images_to_zip(combined_image_files, output_zip)

In [None]:
extract_dir = "/content"
os.makedirs(extract_dir, exist_ok=True)

# Open and extract the zip file
with zipfile.ZipFile("images.zip", 'r') as zip_ref:
    zip_ref.extractall(extract_dir)

print(f"Contents extracted to {extract_dir}")

Contents extracted to /content


In [None]:
# Emergency Dataset Class
class EmergencyDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.data_frame = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.data_frame)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, self.data_frame.iloc[idx, 0])
        image = Image.open(img_name).convert("RGB")
        label = int(self.data_frame.iloc[idx, 1])

        if self.transform:
            image = self.transform(image)

        return image, label

In [None]:
# Split train.csv into train and test sets
train_csv = '/content/train.csv'
data = pd.read_csv(train_csv)
# Split the data into train and test sets
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

# Further split the train_data into train and validation sets
train_data, val_data = train_test_split(train_data, test_size=0.2, random_state=42)

# Save the split data into new CSV files
train_data.to_csv('train_split.csv', index=False)
val_data.to_csv('val_split.csv', index=False)
test_data.to_csv('test_split.csv', index=False)

print("Data has been split into train_split.csv, val_split.csv, and test_split.csv")

# Define transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

Data has been split into train_split.csv, val_split.csv, and test_split.csv


In [None]:
# Load Data
root_dir = '/content/images'
train_dataset = EmergencyDataset(csv_file='train_split.csv', root_dir=root_dir, transform=transform)
val_dataset = EmergencyDataset(csv_file='val_split.csv', root_dir=root_dir, transform=transform)
test_dataset = EmergencyDataset(csv_file='test_split.csv', root_dir=root_dir, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)

# Initially freeze all layers
for param in model.parameters():
    param.requires_grad = False

# Unfreeze the last layer
for param in model.layer4.parameters():
    param.requires_grad = True

# Modify the fully connected layer
model.fc = nn.Sequential(
    nn.Dropout(0.5),
    nn.Linear(model.fc.in_features, 1),  # Output layer with 1 unit for binary classification
    nn.Sigmoid()  # Sigmoid activation for binary classification
)
model = model.to(device)

# Define different learning rates for different layers
optimizer = optim.Adam([
    {'params': model.layer4.parameters(), 'lr': 1e-4},
    {'params': model.fc.parameters(), 'lr': 1e-3}
])

# Define the loss function
criterion = nn.BCELoss()  # Binary Cross-Entropy Loss

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 169MB/s]


In [None]:
num_epochs = 50
best_val_loss = float('inf')

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        # Reshape labels to match the output shape
        labels = labels.view(-1, 1).float()

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        predicted = (outputs > 0.5).float()
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(train_loader)
    epoch_acc = 100 * correct / total
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.2f}%")

    # Validation step
    model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0
    with torch.no_grad():
        for val_inputs, val_labels in val_loader:
            val_inputs, val_labels = val_inputs.to(device), val_labels.to(device)
            val_labels = val_labels.view(-1, 1).float()
            val_outputs = model(val_inputs)
            val_loss += criterion(val_outputs, val_labels).item()
            val_predicted = (val_outputs > 0.5).float()
            val_total += val_labels.size(0)
            val_correct += (val_predicted == val_labels).sum().item()

    val_loss /= len(val_loader)
    val_acc = 100 * val_correct / val_total
    print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_acc:.2f}%")

    # Save the model if validation loss has decreased
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), 'vehicle_classifier.pth')
        print("Model saved!")

# Load the best model
model.load_state_dict(torch.load('vehicle_classifier.pth'))

FileNotFoundError: [Errno 2] No such file or directory: '/content/images/589.jpg'

In [None]:
# Custom Dataset Class for Test Data
class TestDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.data_frame = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.data_frame)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, self.data_frame.iloc[idx, 0])
        image = Image.open(img_name).convert("RGB")

        if self.transform:
            image = self.transform(image)

        return image, self.data_frame.iloc[idx, 0]

# Load the test data
test_csv = '/content/test.csv'
test_dataset = TestDataset(csv_file=test_csv, root_dir=root_dir, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Perform inference on the test data and write results to sample_submissions.csv
results = []

with torch.no_grad():
    for inputs, img_names in test_loader:
        inputs = inputs.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)

        for img_name, pred in zip(img_names, predicted):
            results.append([img_name, pred.item()])

# Save the results to sample_submissions.csv
submission_df = pd.DataFrame(results, columns=['image_names', 'emergency_or_not'])
submission_df.to_csv('sample_submissions.csv', index=False)

print("Results have been written to sample_submissions.csv")

Results have been written to sample_submissions.csv
