#### meta

In [None]:
# tutorial: object detection on an image

In [None]:
# jedenfalls

In [None]:
# 25 March 2024

In [None]:
# https://www.youtube.com/watch?v=z3kB3ISIPAg&list=PL3Dh_99BJkCEhE7Ri8W6aijiEqm3ZoGRq&index=4

#### setup

In [None]:
import os

In [None]:
import torch

In [None]:
from torch.utils.data import Dataset, DataLoader

In [None]:
import torchvision

In [None]:
import torchvision.transforms as transforms

In [None]:
from torchvision.utils import save_image

In [None]:
import ultralytics

In [None]:
from ultralytics import YOLO

In [None]:
import matplotlib.pyplot as plt

In [None]:
import numpy as np

In [None]:
import PIL.Image as Image

In [None]:
import natsort

In [None]:
training_dataset_path = './images/train'

In [None]:
test_dataset_path = './images/val'

In [112]:
# quick check

In [None]:
os.listdir('./images/train')

#### train loader

In [None]:
# https://discuss.pytorch.org/t/how-to-load-images-without-using-imagefolder/59999/7

#### transform images (to increase volume and variation of training dataset)

##### reduce size to speed up model training

##### images need to be same size

##### random horizontal flip to increase number of images

##### load and preprocess data using PyTorch’s DataLoader class

In [None]:
transform = transforms.Compose([
    transforms.Resize((224,224)),   
    transforms.ToTensor()
])

In [None]:
training_transforms = transforms.Compose([ #all the below transformations will be performed, and in the order indicated
    transforms.ToPILImage(),
    transforms.Resize((224,224)),
    transforms.RandomCrop((224,224)),
    transforms.RandomHorizontalFlip(p=0.5), #p means probability
    transforms.RandomVerticalFlip(p=0.5), #p means probability
    transforms.RandomRotation(10), #10 means 10 degrees
    transforms.ColorJitter(hue=0.3),
    transforms.RandomGrayscale(p=0.2),
    #transforms.Normalize(torch.Tensor(mean), torch.Tensor(std)), # meaning: image = (image - mean) / stdtransforms.ToTensor(), #generalisation of vectors and matrices (multidimensional array)
    transforms.Normalize(torch.Tensor([0.5352, 0.5258, 0.4332]), torch.Tensor([0.2119, 0.1732, 0.1835])), # meaning: image = (image - mean) / stdtransforms.ToTensor(), #generalisation of vectors and matrices (multidimensional array)
    transforms.ToTensor() #generalisation of vectors and matrices (multidimensional array)
    
])

In [None]:
class CustomDataSet(Dataset):
    def __init__(self, main_dir, transform):
        self.main_dir = main_dir
        self.transform = transform
        all_imgs = os.listdir(main_dir)
        self.total_imgs = natsort.natsorted(all_imgs)

    def __len__(self):
        return len(self.total_imgs)

    def __getitem__(self, idx):
        img_loc = os.path.join(self.main_dir, self.total_imgs[idx])
        image = Image.open(img_loc).convert("RGB")
        tensor_image = self.transform(image)
        return tensor_image

In [None]:
my_dataset = CustomDataSet(training_dataset_path, transform=training_transforms)

In [None]:
my_dataset = CustomDataSet(training_dataset_path, training_transforms)

In [None]:
my_dataset2 = CustomDataSet(training_dataset_path, transform)

In [None]:
type(my_dataset2)

In [None]:
#data = DatasetFolder(my_dataset,transform=transform)

In [None]:
#train_loader = my_dataset.DataLoader(my_dataset , batch_size=5, shuffle=True) #, num_workers=4, drop_last=True)

In [None]:
train_dataset = DataLoader(my_dataset,shuffle=True,batch_size=3)

In [None]:
#train_dataset = torchvision.datasets.ImageFolder(root='images/train', train=True, transform=transforms.To.Tensor(), download=True)

In [None]:
train_loader = torch.utils.data.DataLoader(dataset = train_dataset.dataset, batch_size=5, shuffle=True)

#### calculate mean and standard deviation

In [None]:
def get_mean_and_std(loader):
    mean = 0.
    std = 0.
    nb_samples = 0.
    for data in loader:
        batch_samples = data.size(0)
        data = data.view(batch_samples, data.size(1), -1)
        mean += data.mean(2).sum(0)
        std += data.std(2).sum(0)
        nb_samples += batch_samples
    mean /= nb_samples
    std /= nb_samples

    return mean, std

In [None]:
def get_mean_and_std2(loader):
    mean = 0.
    std = 0.
    total_images_count = 0
    print(loader)

    for image in loader:
        image_count_in_batch = image.size(0)
        #print(images.shape)
        
    
        images = images.view(images_count_in_batch, images.size(1), -1)        
        #print(images.shape)
        
        mean += images.mean(2).sum(0)
        
        std += images.std(2).sum(0)
        total_images_count += image_count_in_batch
        
    mean /= total_images_count # there are 3 channels, for each channel find the mean
    std /= total_images_count # there are 3 channels, for each channel find the stdev

    return mean, std

In [None]:
get_mean_and_std(my_dataset2)

In [None]:
get_mean_and_std2(train_loader)

#### load model

In [None]:
##### uses yolov8n which is the smallest one

In [None]:
model = YOLO("yolov8n.yaml") # build a new model from scratch (8n is the smallest version)

#### train model

In [None]:
results = model.train(
    data="config.yaml",
    epochs=100
)

#### save model

#### load saved model

In [None]:
loaded_model = torch.load('.\runs\detect\train112\weights\best.pt')

In [None]:
image_transforms = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(), #generalisation of vectors and matrices (multidimensional array)
    transforms.Normalize(torch.Tensor(mean), torch.Tensor(std))
])

In [None]:
def classify(loaded_model, image_transforms, image_path, classes):
    model = loaded_model.eval()
    image = Image.open(image_path)
    image = image_transforms(image).float()
    image = image.unsqueeze(0)

    output = model(imag)
    _, predicted = torch.max(output.data, 1)

    print(classes[predicted.item()])


#### test model on a batch of images

In [None]:
%pwd

In [None]:
# verify the filenames in the folder
os.listdir('./images/val')

In [None]:
#test_image_path = './images/val/DJI_20240204094120_0258_D.JPG'

In [None]:
!yolo task=detect mode=predict model=runs/detect/train112/weights/best.pt conf=0.25 source=images/val

In [None]:
#classify(model, image_transforms, "pygm", classes

#### visualise

In [None]:
dataset = train_dataset

In [None]:
type(dataset)

In [None]:
img_num = 0

In [None]:
for _ in range(10):
    for img in dataset: #, label
        save_image(img, 'img'+str(img_num)+'.png') # visualise
        img_num +=1

In [None]:
import glob

In [None]:
from IPython.display import Image, display

In [None]:
image_path = './images/val'

In [None]:
for image_path in glob.glob('runs/detect/predict6/*.JPG')[:10]:
    display(Image(filename=image_path, width=600))
    print("\n")

#### classify

#### count

#### measure

#### analyse