### To do list for implementation from scratch

*Resources*

* https://brsoff.github.io/tutorials/beginner/finetuning_torchvision_models_tutorial.html
* https://rumn.medium.com/part-1-ultimate-guide-to-fine-tuning-in-pytorch-pre-trained-model-and-its-configuration-8990194b71e
* https://pytorch.org/vision/stable/models/generated/torchvision.models.vgg16.html#torchvision.models.vgg16

*Stappenplan*
* Look at details of how SMILIES implemented VGG.
* Prepare and transform data (according to needs of pretrained model)
* Look into what pytorch calls 'feature extraction' (only changing classification head), not 'fine tuning' 

### Imports

In [1]:
import torch
import os
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import rasterio
import numpy as np
from torchvision import models, transforms

  Referenced from: <E03EDA44-89AE-3115-9796-62BA9E0E2EDE> /Users/nadja/miniconda3/envs/torchvision/lib/python3.11/site-packages/torchvision/image.so
  warn(


### Dataset class definition

The pretrained VGG "Accepts PIL.Image, batched (B, C, H, W) and single (C, H, W) image torch.Tensor objects"

* must convert the TIF into a format that it can be read by PIL, or use single Tensor objects
* But if using batch i assume PIL must be used?

In [66]:

class ImageDataset(Dataset):
    def __init__(self, image_dir, labels_df):
        self.image_dir = image_dir
        self.labels_df = labels_df

    def __len__(self):
        return len(self.labels_df)

    def __getitem__(self, idx):
        img_name = self.labels_df.index[idx]
        img_path = os.path.join(self.image_dir, f"{img_name}.tif")

        # Open the TIF image using rasterio
        with rasterio.open(img_path) as src:
            # Read the image data
            image_data = src.read()
        image_array = np.array(image_data)
        image_tensor = torch.from_numpy(image_array)
        image_tensor = image_tensor.float()

        label = self.labels_df.iloc[idx, 0]

        return image_tensor, label



### Data preparation 

In [67]:
# image_dir = "/home/nadjaflechner/Palsa_data/dataset_100m/"
# labels_file = "/home/nadjaflechner/Palsa_data/binary_palsa_labels_100m.csv"

image_dir = "/Users/nadja/Documents/UU/Thesis/Data/100m"
labels_file = "/Users/nadja/Documents/UU/Thesis/Data/100m_palsa_labels.csv"

# Load the labels from the CSV file
labels_df = pd.read_csv(labels_file, index_col=0).head(100)

# Split the dataset into training and validation sets
train_df = labels_df.head(800)
val_df = labels_df.drop(train_df.index)

# Create the datasets and data loaders
train_dataset = ImageDataset(image_dir, train_df )
val_dataset = ImageDataset(image_dir, val_df )

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=20, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=20, shuffle=False)

### Loading VGG model 

In [77]:
# VGG = models.vgg16_bn(pretrained = True)
# VGG.eval()
weights = models.VGG16_BN_Weights.DEFAULT
transforms = weights.transforms()

for imgs, labels in train_loader:
    first_batch = imgs
    first_labels = labels
    break

transformed_batch = transforms(first_batch)

prediction = VGG(transformed_batch).softmax(1)
class_id = torch.argmax(prediction, dim = 1)

# score = prediction[class_id]
# category_name = weights.meta["categories"][class_id]
# print(f"{category_name}: {100 * score:.1f}%")



In [83]:
prediction = VGG(transformed_batch).softmax(1)
class_scores, class_indices = torch.max(prediction, dim=1)
# category_name = weights.meta["categories"][class_scores]
# print(f"{category_name}: {100 * score:.1f}%")

In [87]:
highest_scores, highest_indices = torch.max(prediction, dim=1)

In [88]:
highest_scores

tensor([1.0000, 1.0000, 0.9999, 1.0000, 0.9919, 1.0000, 1.0000, 0.9233, 0.9900,
        0.5277, 1.0000, 1.0000, 1.0000, 0.9991, 0.9987, 1.0000, 1.0000, 1.0000,
        0.9747, 1.0000], grad_fn=<MaxBackward0>)

In [90]:
category_name = weights.meta["categories"][highest_scores]


TypeError: only integer tensors of a single element can be converted to an index

In [89]:
highest_indices

tensor([470, 470, 470, 470, 470, 470, 470, 470, 470, 920, 470, 470, 470, 920,
        470, 470, 470, 470, 920, 470])

In [85]:
prediction.shape

torch.Size([20, 1000])

In [86]:
class_id.shape

torch.Size([20])

I have a tensor of shape torch.Size([20, 1000]). This is the softmax output of a batch of 20 images, for 1000 categories. I want to obtain two tensors of torch.Size([20]): one with the argmax highest score per image and another with the index of the category with the highest score. Write code which does this. 

In [12]:
VGG = models.vgg16_bn(pretrained = True)

Downloading: "https://download.pytorch.org/models/vgg16_bn-6c64b313.pth" to /Users/nadja/.cache/torch/hub/checkpoints/vgg16_bn-6c64b313.pth
100.0%


In [45]:
weights = models.VGG16_BN_Weights.DEFAULT
VGG.eval()

# Step 4: Use the model and print the predicted category
prediction = VGG(transformed_img.unsqueeze(0)).squeeze(0).softmax(0)
class_id = prediction.argmax().item()
score = prediction[class_id].item()
category_name = weights.meta["categories"][class_id]
print(f"{category_name}: {100 * score:.1f}%")

candle: 100.0%


In [63]:
weights = models.VGG16_BN_Weights.DEFAULT
VGG.eval()

# Step 4: Use the model and print the predicted category
prediction = VGG(transformed_batch).softmax(1)


# class_id = prediction.argmax().item()
# score = prediction[class_id].item()
# category_name = weights.meta["categories"][class_id]
# print(f"{category_name}: {100 * score:.1f}%")

In [64]:
prediction

tensor([[2.1019e-44, 5.5625e-30, 1.4013e-45,  ..., 2.1644e-39, 4.9325e-20,
         6.5723e-20],
        [0.0000e+00, 1.8766e-36, 0.0000e+00,  ..., 1.4013e-45, 2.4856e-23,
         9.5522e-19]], grad_fn=<SoftmaxBackward0>)

In [65]:
prediction.shape

torch.Size([2, 1000])

In [26]:
print(VGG)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace=True)
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU(inplace=True)
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (14): Conv2d(128, 256

In [9]:
VGG.classifier

Sequential(
  (0): Linear(in_features=25088, out_features=4096, bias=True)
  (1): ReLU(inplace=True)
  (2): Dropout(p=0.5, inplace=False)
  (3): Linear(in_features=4096, out_features=4096, bias=True)
  (4): ReLU(inplace=True)
  (5): Dropout(p=0.5, inplace=False)
  (6): Linear(in_features=4096, out_features=1000, bias=True)
)

In [10]:
VGG.classifier = nn.Sequential(
    nn.Linear(25088, 256),  # Additional linear layer with 256 output features
    nn.ReLU(inplace=True),         # Activation function (you can choose other activation functions too)
    nn.Dropout(0.5),               # Dropout layer with 50% probability
    nn.Linear(256, 2)    # Final prediction fc layer
)

In [11]:
VGG

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace=True)
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU(inplace=True)
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (14): Conv2d(128, 256