# Path changing

In [1]:
import torch
import torchvision
import torchvision.models as models
import torchvision.transforms as T

In [2]:
from tqdm import tqdm
from datetime import datetime
import time

import json

from PIL import Image

In [3]:
with open("COCO_metadata.json", "r") as file:
    metadata = json.load(file)
metadata['Babi'][0]

{'path': './Flowers/Babi/babi_1.jpg', 'boxes': [[63.0, 31.0, 118.0, 96.0]]}

In [5]:
metadata_array = []
for i in metadata:
    print(len(metadata[i]))
    for j in metadata[i]:
        new_meta = j.copy()
        new_meta["label"] = i 
        metadata_array.append(new_meta)
metadata_array[0]

797
309
678
462
826
331
166
487


{'path': './Flowers/Babi/babi_1.jpg',
 'boxes': [[63.0, 31.0, 118.0, 96.0]],
 'label': 'Babi'}

In [17]:
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, data):
        # Read the CSV file into a Pandas DataFrame.
        self.data = data

        # Create a list of image paths and bounding boxes.
        self.images = self.image_preprocess([i['path'] for i in data])
        self.bounding_box = [i['boxes'] for i in data]
        self.label = [i['label'] for i in data]
        
        
    def __len__(self):
        return len(self.images)

    def __getitem__(self, index):
        # Get the image path and bounding box from the index.
        # print(self.data)
        image_path = self.images[index]
        bounding_box = self.bounding_box[index]
        labels = torch.ones((bounding_box.shape[0]), dtype=torch.int64)
        
        target = {}
        target['boxes'] = torch.tensor(bounding_box)
        target['labels'] = labels

        return T.ToTensor()(image), target
    def image_preprocess(self, image_paths_arr):
        result = []
        for path in image_paths_arr:
            image = Image.open(path).convert('RGB')
            image = T.Resize((256, 256))(image)
            result.append(T.ToTensor()(image))
        return result

In [18]:
from sklearn.model_selection import train_test_split

train_inds, val_inds = train_test_split(metadata_array, test_size=0.2, random_state=333)

In [19]:
# train_inds.info()
# val_inds.info()

In [20]:
def collate_fn(batch):
    return tuple(zip(*batch))

In [21]:
train_dl = torch.utils.data.DataLoader(CustomDataset(train_inds),
                                       batch_size=2,
                                       shuffle=True,
                                       collate_fn=collate_fn,
                                       num_workers=2,
                                       pin_memory = True if torch.cuda.is_available() else False)
train_dl

<torch.utils.data.dataloader.DataLoader at 0x1498f775f00>

In [22]:
val_dl = torch.utils.data.DataLoader(CustomDataset(val_inds),
                                        batch_size=2,
                                        shuffle=True,
                                        collate_fn=collate_fn,
                                        num_workers=2,
                                        pin_memory = True if torch.cuda.is_available() else False)
val_dl

<torch.utils.data.dataloader.DataLoader at 0x1498f880eb0>

In [23]:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import patches

def view(images,labels,k,std=1,mean=0):
    figure = plt.figure(figsize=(30,30))
    images=list(images)
    labels=list(labels)
    for i in range(k):
        out=torchvision.utils.make_grid(images[i])
        inp=out.cpu().numpy().transpose((1,2,0))
        inp=np.array(std)*inp+np.array(mean)
        inp=np.clip(inp,0,1)  
        ax = figure.add_subplot(2,2, i + 1)
        ax.imshow(images[i].cpu().numpy().transpose((1,2,0)))
        l=labels[i]['boxes'].cpu().numpy()
        l[:,2]=l[:,2]-l[:,0]
        l[:,3]=l[:,3]-l[:,1]
        for j in range(len(l)):
            ax.add_patch(patches.Rectangle((l[j][0],l[j][1]),l[j][2],l[j][3],linewidth=2,edgecolor='w',facecolor='none')) 


In [None]:

images,labels=next(iter(train_dl))

view(images,labels,2)


In [21]:
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
def get_model(num_classes):
    # Load the pre-trained model
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=None)

    # Replace the classifier with a new one, that has num_classes which we want to detect
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model

In [22]:
import time
from datetime import datetime
# Define the training loop
def train(model, train_loader, optimizer, criterion, device):
    model.train()
    running_loss = 0
    start_time = time.time()
    
    for i, (inputs, targets) in enumerate(train_loader):
        inputs = [input.to(device) for input in inputs]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        optimizer.zero_grad()
        loss_dict = model(inputs, targets)
        losses = sum(loss for loss in loss_dict.values())
        losses.backward()
        optimizer.step()
        running_loss += losses.item()
        if i % 10 == 9:
            elapsed_time = time.time() - start_time
            print('[Epoch: %d, Batch: %5d] loss: %.3f, time: %.3f' %
                  (epoch + 1, i + 1, running_loss / 10, elapsed_time))
            running_loss = 0.0
            start_time = time.time()
            
            

In [None]:
# Define the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
EPOCH_COUNT = 1
# # Define the dataset and data loader
# dataset = CustomDataset(img_folder='path/to/images', annotations_file='path/to/annotations.json', transform=transforms.ToTensor())
# train_loader = DataLoader(dataset, batch_size=4, shuffle=True, num_workers=4)

# Define the model
num_classes = 2 # Number of classes (including background)
model = get_model(num_classes).to(device)


# Define the loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 0.001, momentum=0.9, weight_decay=0.0005)


print("Start training - - -")
for epoch in range(EPOCH_COUNT):
    running_loss = 0
    correct = 0
    timestamp_start = datetime.now()
    for images, targets in tqdm(train_dl):
        
        # Move the data to the device.
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        # Zero the gradients and compute the loss.
        optimizer.zero_grad()
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        loss_dict_reduced = {k: v.mean() for k, v in loss_dict.items()}
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())
        losses_reduced.backward()
        
        # Update the parameters and print the loss.
        optimizer.step()
        running_loss += losses.item()
        
    timestamp_end = datetime.now()  
        
    print('Epoch:', epoch, 'Loss:', running_loss/len(train_dl), f'{(timestamp_end - timestamp_start).total_seconds()}s')

    

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to C:\Users\trann/.cache\torch\hub\checkpoints\resnet50-0676ba61.pth
100%|█████████████████████████████████████████████████████████████████████████████| 97.8M/97.8M [00:08<00:00, 12.4MB/s]


Start training - - -


  0%|                                                                                         | 0/1622 [00:00<?, ?it/s]

In [None]:
# torch.save(model.state_dict(), model.pt)

In [None]:
images,targets=next(iter(val_dl))
images = list(image.to(device) for image in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

model.eval()
output=model(images)

with torch.no_grad():
    view(images,output,8)