In [1]:
import os, cv2, random, torch, pandas as pd
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import torch
import torchvision.models as models
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split

In [2]:
image_folder_path = './Images'  
label_folder_path = './Labels'  

In [3]:
def load_images_from_folder(folder):
    images = []
    for filename in os.listdir(folder):
        if filename.endswith(".jpg") or filename.endswith(".png"):  
            img_path = os.path.join(folder, filename)
            img = Image.open(img_path)
            images.append((filename, img))
    return images

images = load_images_from_folder(image_folder_path)

In [4]:
# Verify the loaded images
for name, img in images:
    print(f"Loaded image: {name} with size {img.size}")

Loaded image: b4a0dee3-out_2_1_114.png with size (720, 480)
Loaded image: 1e8a186e-out_2_1_115.png with size (720, 480)
Loaded image: 43b22044-out_2_3_224.png with size (720, 480)
Loaded image: c81d8cf6-out_2_3_28.png with size (720, 480)
Loaded image: 7effeb40-out_2_1_519.png with size (720, 480)
Loaded image: 89b8ceb1-out_2_1_631.png with size (720, 480)
Loaded image: 0b521441-out_2_2_52.png with size (720, 480)
Loaded image: fdea3287-out_2_1_216.png with size (720, 480)
Loaded image: e7aff56a-out_2_1_63.png with size (720, 480)
Loaded image: 7a15af8b-out_2_2_123.png with size (720, 480)
Loaded image: ba795fad-out_2_1_52.png with size (720, 480)
Loaded image: 4ec07b08-out_2_2_920.png with size (720, 480)
Loaded image: a34025ea-out_2_2_423.png with size (720, 480)
Loaded image: bba60658-out_2_2_22.png with size (720, 480)
Loaded image: 7a64aa6c-out_2_3_329.png with size (720, 480)
Loaded image: 4edd2682-out_2_3_25.png with size (720, 480)
Loaded image: ed9a35a8-out_2_1_15.png with siz

In [5]:
# Load labels from text file
label_files = os.listdir(label_folder_path)
labels = []
for file in label_files:
    with open(f"./{label_folder_path}/{file}", "r") as f:
        for line in f.read().split("\n")[:-1]:
            item = [file.replace(".txt", ".png")]
            item.extend(line.split(" "))
            labels.append(item)
            

In [6]:
# Check if the labels list is populated correctly
if labels:
    print(f"First few label entries: {labels[:5]}")
else:
    print("No labels found.")


First few label entries: [['f46fd5e4-out_2_3_230.png', '0', '0.06639748766262897', '0.038358008075370126', '0.08434275459847465', '0.0740242261103634'], ['f46fd5e4-out_2_3_230.png', '0', '0.10094212651413191', '0.07267833109017496', '0.11215791834903543', '0.08344549125168235'], ['f46fd5e4-out_2_3_230.png', '0', '0.063257065948856', '0.2913862718707941', '0.08524001794526694', '0.0847913862718708'], ['f46fd5e4-out_2_3_230.png', '0', '0.07133243606998654', '0.6386271870794079', '0.07985643786451323', '0.12516823687752357'], ['f46fd5e4-out_2_3_230.png', '0', '0.3965903992821893', '0.40376850605652764', '0.0897263346792284', '0.07537012113055191']]


In [7]:
# Create the DataFrame
df = pd.DataFrame(labels, columns=["file", "class", "x_center", "y_center", "width", "height"])

In [8]:
df[['x_center', 'y_center', 'width', 'height']] = df[['x_center', 'y_center', 'width', 'height']].astype(float)

df

Unnamed: 0,file,class,x_center,y_center,width,height
0,f46fd5e4-out_2_3_230.png,0,0.066397,0.038358,0.084343,0.074024
1,f46fd5e4-out_2_3_230.png,0,0.100942,0.072678,0.112158,0.083445
2,f46fd5e4-out_2_3_230.png,0,0.063257,0.291386,0.085240,0.084791
3,f46fd5e4-out_2_3_230.png,0,0.071332,0.638627,0.079856,0.125168
4,f46fd5e4-out_2_3_230.png,0,0.396590,0.403769,0.089726,0.075370
...,...,...,...,...,...,...
1171,b23d330b-out_2_2_21.png,0,0.322118,0.871467,0.107672,0.122476
1172,56ef93a7-09152008flight2tape3_940.png,0,0.591297,0.928668,0.118439,0.061911
1173,56ef93a7-09152008flight2tape3_940.png,0,0.580081,0.964334,0.112158,0.055182
1174,56ef93a7-09152008flight2tape3_940.png,0,0.845671,0.885599,0.070884,0.080754


In [9]:
df["class"].unique()

array(['0'], dtype=object)

In [10]:
# Draw bounding boxes with RGB colors
def draw_bounding_boxes(image, labels):
    draw = ImageDraw.Draw(image)
    img_width, img_height = image.size
    colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255)]  # RGB colors: Red, Green, Blue
    vehicle_count = 0
    for _, row in labels.iterrows():
        x_center = float(row['x_center']) * img_width
        y_center = float(row['y_center']) * img_height
        width = float(row['width']) * img_width
        height = float(row['height']) * img_height
        x1 = x_center - width / 2
        y1 = y_center - height / 2
        x2 = x_center + width / 2
        y2 = y_center + height / 2
        color = random.choice(colors)  # Choose a random color for each bounding box
        draw.rectangle([x1, y1, x2, y2], outline=color, width=2)
        vehicle_count += 1

    # Display the number of vehicles
    font = ImageFont.load_default()
    text = f"Vehicles: {vehicle_count}"
    text_bbox = draw.textbbox((0, 0), text, font=font)
    text_width, text_height = text_bbox[2] - text_bbox[0], text_bbox[3] - text_bbox[1]
    draw.rectangle([(0, 0), (text_width + 10, text_height + 10)], fill="black")
    draw.text((5, 5), text, fill="white", font=font)
    
    return image, vehicle_count

In [11]:
def pick_random_image(folder):
    image_files = [file for file in os.listdir(folder) if file.endswith(('.jpg', '.png'))]
    random_image = random.choice(image_files)
    return random_image

# Pick a random image
random_image_file = pick_random_image(image_folder_path)
print(f"Randomly selected image: {random_image_file}")

Randomly selected image: 75e5b04d-out_2_2_216.png


In [12]:
# Select an image to display
image_to_display = random_image_file
image_labels = df[df['file'] == image_to_display]
image_path = os.path.join(image_folder_path, image_to_display)

image = Image.open(image_path)
image_with_boxes, vehicle_count = draw_bounding_boxes(image, image_labels)
print(f"Number of vehicles: {vehicle_count}")
image_with_boxes.show()

Number of vehicles: 1


In [13]:
# Normalize the bounding box coordinates
df[['x_center', 'y_center', 'width', 'height']] = df[['x_center', 'y_center', 'width', 'height']].astype(float)

In [14]:
# Resize images to a consistent size and preprocess the labels
image_size = (320, 320)
preprocessed_images = []
preprocessed_labels = []

output_image_folder = './data/PreprocessedImages'
os.makedirs(output_image_folder, exist_ok=True)

In [15]:
for name, img in images:
    
    # Resize image
    img_resized = img.resize(image_size)
    img_array = np.array(img_resized)
    
    # Save the resized image
    img_resized.save(os.path.join(output_image_folder, name))
    
    # Get corresponding labels
    img_labels = df[df['file'] == name]
    
    for _, row in img_labels.iterrows():
        x_center = row['x_center'] * image_size[0]
        y_center = row['y_center'] * image_size[1]
        width = row['width'] * image_size[0]
        height = row['height'] * image_size[1]
        
        # Normalize bounding box coordinates relative to the resized image dimensions
        norm_x_center = x_center / image_size[0]
        norm_y_center = y_center / image_size[1]
        norm_width = width / image_size[0]
        norm_height = height / image_size[1]
        
        preprocessed_labels.append([name, row['class'], norm_x_center, norm_y_center, norm_width, norm_height])



In [16]:
# Convert labels to DataFrame
preprocessed_labels_df = pd.DataFrame(preprocessed_labels, columns=["file", "class", "x_center", "y_center", "width", "height"])

In [17]:
# Save the preprocessed labels to a CSV file
preprocessed_labels_df.to_csv('preprocessed_labels.csv', index=False)

In [18]:
# Display sample preprocessed image with bounding boxes
def draw_bounding_boxes(image_array, labels):
    img = Image.fromarray(image_array)
    draw = ImageDraw.Draw(img)
    img_width, img_height = img.size
    colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255)]  # RGB colors: Red, Green, Blue
    vehicle_count = 0
    for _, row in labels.iterrows():
        x_center = float(row['x_center']) * img_width
        y_center = float(row['y_center']) * img_height
        width = float(row['width']) * img_width
        height = float(row['height']) * img_height
        x1 = x_center - width / 2
        y1 = y_center - height / 2
        x2 = x_center + width / 2
        y2 = y_center + height / 2
        color = random.choice(colors)  # Choose a random color for each bounding box
        draw.rectangle([x1, y1, x2, y2], outline=color, width=2)
        vehicle_count += 1

    # Display the number of vehicles
    font = ImageFont.load_default()
    text = f"Vehicles: {vehicle_count}"
    text_bbox = draw.textbbox((5, 5), text, font=font)
    draw.rectangle([text_bbox[0], text_bbox[1], text_bbox[2], text_bbox[3]], fill="black")
    draw.text((5, 5), text, fill="white", font=font)
    
    return img, vehicle_count

In [19]:
# Example of displaying a preprocessed image with bounding boxes
example_image_name = random_image_file
example_image_path = os.path.join(output_image_folder, example_image_name)
example_image_array = np.array(Image.open(example_image_path))
example_labels = preprocessed_labels_df[preprocessed_labels_df['file'] == example_image_name]
image_with_boxes, vehicle_count = draw_bounding_boxes(example_image_array, example_labels)
print(f"Number of vehicles in preprocessed image: {vehicle_count}")
image_with_boxes.show()

Number of vehicles in preprocessed image: 1


In [20]:
class CustomImageDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = Image.open(img_path).convert("RGB")
        label = int(self.img_labels.iloc[idx, 1])
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label

# Define transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Split dataset into training and validation sets
train_df, val_df = train_test_split(preprocessed_labels_df, test_size=0.2, random_state=42)
train_df.to_csv('train_labels.csv', index=False)
val_df.to_csv('val_labels.csv', index=False)

# Create datasets
train_dataset = CustomImageDataset(annotations_file='train_labels.csv', img_dir=output_image_folder, transform=transform)
val_dataset = CustomImageDataset(annotations_file='val_labels.csv', img_dir=output_image_folder, transform=transform)

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)


In [21]:
num_classes = len(preprocessed_labels_df['class'].unique())
model = models.resnet50(pretrained=True)
model.fc = torch.nn.Linear(model.fc.in_features, num_classes)


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /Users/vinithshetty/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████████████████████████████████| 97.8M/97.8M [00:01<00:00, 66.9MB/s]


In [22]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')
    
    # Validation step
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
    print(f'Validation Loss: {val_loss/len(val_loader):.4f}')


Epoch [1/10], Loss: 0.0000
Validation Loss: 0.0000
Epoch [2/10], Loss: 0.0000
Validation Loss: 0.0000
Epoch [3/10], Loss: 0.0000
Validation Loss: 0.0000
Epoch [4/10], Loss: 0.0000
Validation Loss: 0.0000
Epoch [5/10], Loss: 0.0000
Validation Loss: 0.0000
Epoch [6/10], Loss: 0.0000
Validation Loss: 0.0000
Epoch [7/10], Loss: 0.0000
Validation Loss: 0.0000
Epoch [8/10], Loss: 0.0000
Validation Loss: 0.0000
Epoch [9/10], Loss: 0.0000
Validation Loss: 0.0000
Epoch [10/10], Loss: 0.0000
Validation Loss: 0.0000


In [23]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Evaluate the model on the validation set
model.eval()
all_labels = []
all_preds = []

with torch.no_grad():
    for inputs, labels in val_loader:
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        all_labels.extend(labels.cpu().numpy())
        all_preds.extend(preds.cpu().numpy())

# Calculate performance metrics
accuracy = accuracy_score(all_labels, all_preds)
precision = precision_score(all_labels, all_preds, average='weighted')
recall = recall_score(all_labels, all_preds, average='weighted')
f1 = f1_score(all_labels, all_preds, average='weighted')

print(f'Validation Accuracy: {accuracy:.4f}')
print(f'Validation Precision: {precision:.4f}')
print(f'Validation Recall: {recall:.4f}')
print(f'Validation F1 Score: {f1:.4f}')


Validation Accuracy: 1.0000
Validation Precision: 1.0000
Validation Recall: 1.0000
Validation F1 Score: 1.0000


In [24]:
# Save the model
torch.save(model.state_dict(), 'model.pth')

In [32]:
# Define the model and load pre-trained weights
model = models.resnet50(pretrained=True)
num_classes = 1  # Replace with the actual number of classes in your dataset
model.fc = torch.nn.Linear(model.fc.in_features, num_classes)
model.load_state_dict(torch.load('model.pth'))
model.eval()

# Define the transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [33]:
# Define the predict function
def predict(image_path, model, transform):
    image = Image.open(image_path).convert("RGB")
    image = transform(image).unsqueeze(0)  # Add batch dimension
    with torch.no_grad():
        outputs = model(image)
        _, preds = torch.max(outputs, 1)
    return preds.item()


In [34]:
def pick_random_image(folder):
    image_files = [file for file in os.listdir(folder) if file.endswith(('.jpg', '.png'))]
    return random.choice(image_files) if image_files else None

# Function to draw bounding boxes
def draw_bounding_boxes(image_path, labels_df):
    image = Image.open(image_path).convert("RGB")
    draw = ImageDraw.Draw(image)
    img_width, img_height = image.size
    colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255)]  # RGB colors: Red, Green, Blue
    vehicle_count = 0
    for _, row in labels_df.iterrows():
        x_center = float(row['x_center']) * img_width
        y_center = float(row['y_center']) * img_height
        width = float(row['width']) * img_width
        height = float(row['height']) * img_height
        x1 = x_center - width / 2
        y1 = y_center - height / 2
        x2 = x_center + width / 2
        y2 = y_center + height / 2
        color = random.choice(colors)  # Choose a random color for each bounding box
        draw.rectangle([x1, y1, x2, y2], outline=color, width=2)
        vehicle_count += 1

    # Display the number of vehicles
    font = ImageFont.load_default()
    text = f"Vehicles: {vehicle_count}"
    text_bbox = draw.textbbox((0, 0), text, font=font)
    text_width, text_height = text_bbox[2] - text_bbox[0], text_bbox[3] - text_bbox[1]
    draw.rectangle([(0, 0), (text_width + 10, text_height + 10)], fill="black")
    draw.text((5, 5), text, fill="white", font=font)
    
    return image, vehicle_count

In [44]:
# Load preprocessed labels
preprocessed_labels_df = pd.read_csv('preprocessed_labels.csv')

# Example of inference with a randomly selected images
image_folder_path = './data/PreprocessedImages'
random_image_file = pick_random_image(image_folder_path)

if random_image_file:
    example_image_path = os.path.join(image_folder_path, random_image_file)
    predicted_class = predict(example_image_path, model, transform)
    print(f'Randomly selected image: {random_image_file}')
    print(f'Predicted Class: {predicted_class}')
    
    # Draw bounding boxes on the selected image
    image_labels = preprocessed_labels_df[preprocessed_labels_df['file'] == random_image_file]
    image_with_boxes, vehicle_count = draw_bounding_boxes(example_image_path, image_labels)
    print(f'Number of vehicles: {vehicle_count}')
    image_with_boxes.show()
else:
    print('No images found in the specified directory.')

Randomly selected image: 31b1c125-out_2_2_914.png
Predicted Class: 0
Number of vehicles: 1
