In [12]:
# Import necessary libraries
import os
import torch
import torch.nn as nn
from torchvision.io import read_image
from torchvision.ops.boxes import masks_to_boxes
from torchvision import tv_tensors
import torch.nn.functional as F
from torchvision.transforms import transforms
from torch.utils.data import Dataset, DataLoader
from torch.utils.data import random_split

# Import custom dataset class
from utils.dataset import SolarPanelDataset

In [13]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [14]:
dataset_name = "Kasmi_2023"

In [15]:
# Define the transformations
transform = transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
)

# Define the root directory
root_dir = f"data/{dataset_name}"

# Create the full dataset
full_dataset = SolarPanelDataset(root_dir, transform)

# Define the sizes of your splits
train_size = int(0.7 * len(full_dataset))
val_size = int(0.15 * len(full_dataset))
test_size = len(full_dataset) - train_size - val_size

# Create the datasets
train_dataset, val_dataset, test_dataset = random_split(
    full_dataset, [train_size, val_size, test_size]
)

# Create the data loaders
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

In [16]:
# Print the first 5 items
for i in range(5):
    sample = train_dataset[i]
    print(sample)

(Image([[[111, 107,  99,  ...,  45,  36,  36],
        [111, 107,  99,  ...,  45,  45,  45],
        [ 99, 103, 103,  ...,  45,  45,  45],
        ...,
        [ 67,  78,  75,  ..., 139, 139, 139],
        [ 67,  75,  74,  ..., 151, 167, 147],
        [ 67,  74,  75,  ..., 159, 159, 139]],

       [[111, 107, 107,  ...,  36,  36,  36],
        [111, 107, 107,  ...,  45,  45,  45],
        [107, 111, 111,  ...,  36,  36,  45],
        ...,
        [ 71,  74,  75,  ..., 123, 123, 123],
        [ 71,  75,  74,  ..., 147, 139, 131],
        [ 67,  74,  75,  ..., 159, 159, 139]],

       [[ 99,  93,  99,  ...,  36,  32,  32],
        [ 99,  93,  99,  ...,  40,  40,  40],
        [ 99, 107, 107,  ...,  36,  36,  40],
        ...,
        [ 55,  62,  63,  ...,  99,  99,  99],
        [ 55,  63,  58,  ..., 131, 119, 115],
        [ 55,  58,  63,  ..., 139, 139, 123]]], dtype=torch.uint8, ), tensor([[[0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],

In [17]:
full_dataset

<utils.dataset.SolarPanelDataset at 0x1581363d0>

In [18]:
print(
    f"The trainset has {train_size} images. The validation set has {val_size} images. The testset has {test_size} images."
)

The trainset has 9312 images. The validation set has 1995 images. The testset has 1996 images.


In [19]:
train_loader

<torch.utils.data.dataloader.DataLoader at 0x14feb6750>

In [25]:
# Define a Model
class SolarPanelDetector(nn.Module):
    def __init__(self):
        super(SolarPanelDetector, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 97 * 97, 120)  # Adjusted to match the actual size
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 3 * 400 * 400)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 97 * 97)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x.view(-1, 3, 400, 400)


# Instantiate the model
model = SolarPanelDetector()

# Define a loss function and optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training loop
epochs = 2
for epoch in range(epochs):  # loop over the dataset multiple times
    for i, data in enumerate(train_loader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels, _ = data

        # Convert inputs and labels to float tensors
        inputs = inputs.float()
        labels = labels.float()

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        if i % 100 == 0:
            print(f"Epoch {epoch + 1}/{epochs}, Batch {i + 1}/{len(train_loader)}")
            print(f"Loss: {loss.item()}")

print("Finished Training")

Epoch 1/10, Batch 1/582
Loss: 0.7551432251930237
Epoch 1/10, Batch 101/582
Loss: -67767066624.0
Epoch 1/10, Batch 201/582
Loss: -7825545756672.0
Epoch 1/10, Batch 301/582
Loss: -95595880513536.0
Epoch 1/10, Batch 401/582
Loss: -665795025698816.0
Epoch 1/10, Batch 501/582
Loss: -1616000504037376.0
Epoch 2/10, Batch 1/582
Loss: -4361068721733632.0
Epoch 2/10, Batch 101/582
Loss: -1.0567780990451712e+16
Epoch 2/10, Batch 201/582
Loss: -2.330561138707661e+16
Epoch 2/10, Batch 301/582
Loss: -3.401253535927501e+16
Epoch 2/10, Batch 401/582
Loss: -5.505738763088691e+16
Epoch 2/10, Batch 501/582
Loss: -8.561976072889958e+16
Finished Training


In [27]:
# Create the directory if it doesn't exist
if not os.path.exists("model"):
    os.makedirs("model")

version = 0
filename = f"model/model_{dataset_name}.pth"

# Check if the file exists and increment version number until a unique filename is found
while os.path.exists(filename):
    version += 1
    filename = f"model_{dataset_name}_v{version}.pth"

# Save the model
torch.save(model.state_dict(), filename)

In [29]:
model.eval()
with torch.no_grad():
    for data in val_loader:
        print(data)  # Temporarily print data to inspect its structure
        break  # Break after the first batch to avoid printing too much data

[tensor([[[[ 84,  91,  91,  ..., 195, 195, 195],
          [ 89,  92,  91,  ..., 195, 195, 195],
          [ 91,  92,  91,  ..., 195, 195, 195],
          ...,
          [128, 186, 215,  ..., 166, 166, 166],
          [174, 215, 243,  ..., 174, 166, 158],
          [226, 251, 251,  ..., 166, 166, 158]],

         [[ 84,  91,  91,  ..., 195, 195, 195],
          [ 89,  96,  91,  ..., 195, 195, 195],
          [ 91,  96,  91,  ..., 195, 195, 195],
          ...,
          [139, 187, 215,  ..., 166, 166, 166],
          [174, 215, 243,  ..., 174, 166, 158],
          [226, 251, 251,  ..., 166, 166, 158]],

         [[ 74,  84,  84,  ..., 179, 179, 179],
          [ 74,  80,  84,  ..., 179, 179, 179],
          [ 84,  80,  84,  ..., 179, 179, 179],
          ...,
          [124, 179, 203,  ..., 159, 159, 159],
          [163, 203, 235,  ..., 163, 159, 154],
          [214, 235, 235,  ..., 159, 159, 154]]],


        [[[215, 251, 255,  ...,  28,  36,  56],
          [227, 215, 215,  ...,  4

In [34]:
from sklearn.metrics import precision_score, recall_score, jaccard_score


def evaluate(predictions, targets):
    # Flatten the tensors and convert them to binary
    predictions = (predictions > 0.5).flatten().numpy()
    targets = targets.flatten().numpy()

    # Calculate metrics
    precision = precision_score(targets, predictions, average="weighted")
    recall = recall_score(targets, predictions, average="weighted")
    IoU = jaccard_score(targets, predictions, average="weighted")

    return precision, recall, IoU

In [35]:
# Evaluate the model
model.eval()
with torch.no_grad():
    for images, masks, targets in val_loader:
        images = images.float()  # Convert images to float

        predictions = model(images)

        # Evaluate the model's performance on the validation set
        precision, recall, IoU = evaluate(predictions, masks)
        print(f"Precision: {precision.item()}")
        print(f"Recall: {recall.item()}")
        print(f"IoU: {IoU.item()}")

        # Make predictions on new images
        new_image = torch.randn((1, 3, 224, 224))
        prediction = model(new_image)

        # Apply post-processing steps like non-maximal suppression (NMS) to refine the predictions
        refined_prediction = refine_prediction(prediction)

        # Visualize the predicted mask
        visualize_mask(refined_prediction)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Precision: 0.9861208081899278
Recall: 0.2881854166666667
IoU: 0.28813978590398054


RuntimeError: shape '[-1, 150544]' is invalid for input of size 44944