In [8]:
#import required libraries
import os
from PIL import Image
import random
import numpy as np

import zipfile

import torch
from torch import nn
from torchvision import transforms
from torchsummary import summary
from torch.utils.data import Dataset, DataLoader, random_split
import torch
from torch.utils.data import DataLoader, WeightedRandomSampler


In [2]:
#connect to drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [13]:
# Class to preprocess images
class ConstructionSiteDataset(Dataset):
    def __init__(self, base_dir, transform=None, device='cpu'):
        self.base_dir = base_dir
        self.transform = transform
        self.device = device
        self.classes = ['underdeveloped', 'ground_broken', 'concrete_pad', 'framing_up', 'near_completion']
        self.class_to_idx = {cls_name: idx for idx, cls_name in enumerate(self.classes)}
        self.images = []
        self.class_counts = {cls_name: 0 for cls_name in self.classes}

        for _class in self.classes:
            class_dir = os.path.join(base_dir, _class)
            class_images = [img_name for img_name in os.listdir(class_dir) if img_name.lower().endswith(('png', 'jpg', 'jpeg'))]
            self.images.extend([(os.path.join(class_dir, img), _class) for img in class_images])
            self.class_counts[_class] += len(class_images)

    def __getitem__(self, idx):
        path, _class = self.images[idx]
        image = Image.open(path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        label = self.class_to_idx[_class]
        return image, label

    def __len__(self):
        return len(self.images)

    def get_class_counts(self):
        return self.class_counts

# Define transformations
train_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1),
    transforms.ToTensor()  # Make sure this is the last step
])

test_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()  # Make sure this is the last step
])

# Paths to dataset directories
train_dir = '/content/drive/My Drive/NYUDatathon/Train_data'
test_dir = '/content/drive/My Drive/NYUDatathon/test_data'

# Creating dataset instances with transformations
train_dataset = ConstructionSiteDataset(train_dir, transform=train_transforms, device='cpu')
test_dataset = ConstructionSiteDataset(test_dir, transform=test_transforms, device='cpu')

# Print class counts from the train dataset
print("Train dataset class counts:", train_dataset.get_class_counts())

# Calculate weights for each sample in the dataset
class_weights = {cls_name: 1.0 / count for cls_name, count in train_dataset.get_class_counts().items()}
sample_weights = [class_weights[_class] for _, _class in train_dataset.images]

# Create the WeightedRandomSampler
sampler = WeightedRandomSampler(sample_weights, len(sample_weights), replacement=True)

# DataLoader using the sampler
train_loader = DataLoader(train_dataset, batch_size=10, sampler=sampler, shuffle=False)  # shuffle is False when using a sampler

# DataLoader for the test dataset
test_loader = DataLoader(test_dataset, batch_size=10, shuffle=False)

# Verifying one sample batch
for images, labels in train_loader:
    print("Batch images shape:", images.shape)
    print("Batch labels:", labels)
    break  # Only print the first batch

Train dataset class counts: {'underdeveloped': 16, 'ground_broken': 120, 'concrete_pad': 15, 'framing_up': 17, 'near_completion': 33}
Batch images shape: torch.Size([10, 3, 224, 224])
Batch labels: tensor([0, 3, 3, 3, 0, 1, 1, 2, 3, 0])


In [14]:
import torch.optim as optim
import torch.nn as nn

# Define a simple CNN model
model = nn.Sequential(
    nn.Conv2d(3, 32, 3, padding=1),  # Example layer
    nn.ReLU(),
    nn.MaxPool2d(2, 2),
    nn.Flatten(),
    nn.Linear(32 * 112 * 112, 5)  # Assuming the output size from Conv2d and pooling
)
model.to('cpu')  # Or 'cuda' if you have GPU support

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
for epoch in range(10):  # Loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        # Get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # Print statistics
        running_loss += loss.item()
        if i % 10 == 9:  # Print every 10 mini-batches
            print(f'[{epoch + 1}, {i + 1}] loss: {running_loss / 10:.3f}')
            running_loss = 0.0

print('Finished Training')

[1, 10] loss: 17.815
[1, 20] loss: 7.329
[2, 10] loss: 4.551
[2, 20] loss: 1.851
[3, 10] loss: 1.866
[3, 20] loss: 1.432
[4, 10] loss: 1.351
[4, 20] loss: 1.141
[5, 10] loss: 1.019
[5, 20] loss: 0.942
[6, 10] loss: 0.941
[6, 20] loss: 0.934
[7, 10] loss: 0.945
[7, 20] loss: 0.763
[8, 10] loss: 0.805
[8, 20] loss: 0.715
[9, 10] loss: 0.611
[9, 20] loss: 0.666
[10, 10] loss: 0.604
[10, 20] loss: 0.563
Finished Training


In [15]:
def evaluate_model(model, test_loader):
    model.eval()  # Set the model to evaluation mode
    correct = 0
    total = 0
    with torch.no_grad():  # No gradients needed
        for images, labels in test_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f'Accuracy of the model on the test images: {accuracy:.2f}%')
    return accuracy


In [16]:
model_accuracy = evaluate_model(model, test_loader)

Accuracy of the model on the test images: 67.27%


In [17]:
from sklearn.metrics import classification_report

def detailed_evaluation(model, test_loader):
    model.eval()
    all_predictions = []
    all_labels = []

    with torch.no_grad():
        for images, labels in test_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            all_predictions.extend(predicted.tolist())
            all_labels.extend(labels.tolist())

    # Print detailed classification report
    print(classification_report(all_labels, all_predictions, target_names=['underdeveloped', 'ground_broken', 'concrete_pad', 'framing_up', 'near_completion']))

# Example call to this function
detailed_evaluation(model, test_loader)

                 precision    recall  f1-score   support

 underdeveloped       0.48      1.00      0.65        11
  ground_broken       1.00      0.38      0.56        26
   concrete_pad       0.89      1.00      0.94         8
     framing_up       0.33      0.50      0.40         2
near_completion       0.70      0.88      0.78         8

       accuracy                           0.67        55
      macro avg       0.68      0.75      0.66        55
   weighted avg       0.81      0.67      0.66        55



In [22]:
##Final Model

In [18]:
#taking into consideration the limitations of the older model, we enhance the cnn
class ComplexCNN(nn.Module):
    def __init__(self):
        super(ComplexCNN, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(128),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer3 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(256),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.dropout = nn.Dropout(0.5)
        self.fc1 = nn.Linear(256 * 28 * 28, 1024)  # Adjust the size according to your final feature map dimensions
        self.fc2 = nn.Linear(1024, 5)  # Output layer for 5 classes

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = x.view(x.size(0), -1)  # Flatten the features into a vector
        x = self.dropout(x)
        x = self.fc1(x)
        x = self.fc2(x)
        return x

# Model instantiation
model = ComplexCNN()
model.to('cpu')  # Use 'cuda' if GPU is available

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
for epoch in range(10):  # Loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        # Get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # Print statistics
        running_loss += loss.item()
        if i % 10 == 9:  # Print every 10 mini-batches
            print(f'[{epoch + 1}, {i + 1}] loss: {running_loss / 10:.3f}')
            running_loss = 0.0

print('Finished Training')

[1, 10] loss: 150.109
[1, 20] loss: 127.345
[2, 10] loss: 120.672
[2, 20] loss: 88.520
[3, 10] loss: 29.168
[3, 20] loss: 26.536
[4, 10] loss: 22.217
[4, 20] loss: 15.994
[5, 10] loss: 10.265
[5, 20] loss: 11.023
[6, 10] loss: 11.267
[6, 20] loss: 10.409
[7, 10] loss: 7.727
[7, 20] loss: 9.665
[8, 10] loss: 7.311
[8, 20] loss: 5.311
[9, 10] loss: 9.902
[9, 20] loss: 10.038
[10, 10] loss: 10.960
[10, 20] loss: 10.620
Finished Training


In [19]:
model_accuracy = evaluate_model(model, test_loader)

Accuracy of the model on the test images: 61.82%


In [20]:
from sklearn.metrics import classification_report

def detailed_evaluation(model, test_loader):
    model.eval()
    all_predictions = []
    all_labels = []

    with torch.no_grad():
        for images, labels in test_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            all_predictions.extend(predicted.tolist())
            all_labels.extend(labels.tolist())

    # Print detailed classification report
    print(classification_report(all_labels, all_predictions, target_names=['underdeveloped', 'ground_broken', 'concrete_pad', 'framing_up', 'near_completion']))

# Example call to this function
detailed_evaluation(model, test_loader)

                 precision    recall  f1-score   support

 underdeveloped       0.57      0.36      0.44        11
  ground_broken       0.89      0.65      0.76        26
   concrete_pad       0.36      0.62      0.45         8
     framing_up       0.14      0.50      0.22         2
near_completion       0.88      0.88      0.88         8

       accuracy                           0.62        55
      macro avg       0.57      0.60      0.55        55
   weighted avg       0.72      0.62      0.65        55

