In [1]:
import os
import time
import random
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from PIL import Image
import glob
import cv2
import csv
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch.nn as nn
import torchvision
from torchvision import transforms as T
from torchvision.transforms import functional as F
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN

In [2]:
DATASET = 'data'

files = glob.glob('data/**/*.txt', recursive = True) #find all *.txt files

target_txt = "train_dataset.txt"

In [3]:
train_df = pd.read_csv(target_txt, sep=',', header=None, names=['img_path', 'x0', 'y0', 'x1', 'y1', 'label'])
train_df.head()

Unnamed: 0,img_path,x0,y0,x1,y1,label
0,19-12-09_1652_624622812/000_03_04.png,463.0,204.0,560.0,241.0,2
1,19-12-09_1652_624622812/000_03_04.png,64.0,470.0,210.0,540.0,2
2,19-12-09_1652_624622812/000_03_04.png,267.0,464.0,332.0,535.0,1
3,19-12-09_1652_624622812/000_03_04.png,577.0,184.0,610.0,294.0,1
4,19-12-09_1652_624622812/000_03_04.png,346.0,268.0,421.0,366.0,1


In [4]:
#train shape
print(f"Train shape is {train_df.shape}")

#number of unique images
print(f"Unique images are {train_df['img_path'].nunique()}")

#number of images in dir
folder_images = []
for f in files:
  if "mask" not in f:
    folder_images.append(f)
print(f"Images in folder are {len(folder_images)}")

Train shape is (33581, 6)
Unique images are 6071
Images in folder are 12148


In [5]:
images_ids = train_df['img_path'].unique()

split_len = round(len(images_ids)*0.8) #80% -> train & 20% -> val

train_ids = images_ids[:split_len]
valid_ids = images_ids[split_len:]

train = train_df[train_df['img_path'].isin(train_ids)]
valid = train_df[train_df['img_path'].isin(valid_ids)]

print(f"Train shape is {train.shape} and Validation shape is {valid.shape}")

Train shape is (26861, 6) and Validation shape is (6720, 6)


In [6]:
import math
import numbers
import random
import warnings
from collections.abc import Sequence
from typing import Tuple, List, Optional
from PIL import Image, ImageOps, ImageEnhance

import torch
from torch import Tensor

try:
    import accimage
except ImportError:
    accimage = None

class ToTensor(object):
    def __call__(self, image, target):
        image = F.to_tensor(image)
        return image, target
#***********************************************

class Compose(object):
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, image, target):
        for t in self.transforms:
            image, target = t(image, target)
        return image, target
#***********************************************

class RandomHorizontalFlip(object):
    def __init__(self, prob):
        self.prob = prob

    def __call__(self, image, target):
        if random.random() < self.prob:
            height, width = image.shape[-2:]
            image = image.flip(-1)
            bbox = target["boxes"]
            bbox[:, [0, 2]] = width - bbox[:, [2, 0]]
            target["boxes"] = bbox
            
        return image, target
    
#***********************************************

class ToPILImage(object):
    def __init__(self, mode=None):
        self.mode = mode

    def __call__(self, pic):
        return F.to_pil_image(pic, self.mode)
#***********************************************

class Grayscale(object):
    def __init__(self, num_output_channels):
        self.num_output_channels = num_output_channels

    def __call__(self, img, target):
        return to_grayscale(ToPILImage()(img), target, num_output_channels=self.num_output_channels)

#***********************************************
def to_grayscale(img, target, num_output_channels=1):

    if num_output_channels == 1:
        img = img.convert('L')
    elif num_output_channels == 3:
        img = img.convert('L')
        np_img = np.array(img, dtype=np.uint8)
        np_img = np.dstack([np_img, np_img, np_img])
        img = Image.fromarray(np_img, 'RGB')
        img = ToTensor()(img,target)
    else:
        raise ValueError('num_output_channels should be either 1 or 3')

    return img

#***********************************************

def adjust_brightness(img, target, brightness_factor):
    enhancer = ImageEnhance.Brightness(img)
    img = enhancer.enhance(brightness_factor)
    img = ToTensor()(img,target)
    return img

def adjust_contrast(img, target, contrast_factor):
    enhancer = ImageEnhance.Contrast(img)
    img = enhancer.enhance(contrast_factor)
    img = ToTensor()(img,target)
    return img
#***********************************************
    
class ColorBright(object):
    def __init__(self, brightness=0):
        self.brightness = brightness

    def __call__(self, img, target):
        #transform = self.get_params(self.brightness, self.contrast,self.saturation, self.hue)        
        return adjust_brightness(ToPILImage()(img), target, self.brightness)
    
class ColorContrast(object):
    def __init__(self, contrast=0):
        self.contrast = contrast

    def __call__(self, img, target):
        #transform = self.get_params(self.brightness, self.contrast,self.saturation, self.hue)        
        return adjust_contrast(ToPILImage()(img), target, self.contrast)

In [8]:
class FallenPeople(Dataset):

    def __init__(self, dataframe, img_dir, transforms):
        super().__init__()
        self.image_ids = dataframe['img_path'].unique()
        self.df = dataframe
        self.img_dir = img_dir
        self.transforms = transforms

    def __getitem__(self, idx: int):
        image_id = self.image_ids[idx]
        records = self.df[self.df['img_path'] == image_id]
        
        image = cv2.imread(f'{self.img_dir}/{image_id}', cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0

        boxes = records[['x0', 'y0', 'x1', 'y1']].values
        boxes = torch.as_tensor(boxes, dtype=torch.float32)

        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        area = torch.as_tensor(area, dtype=torch.float32)

        # there is only one class
        labels = records['label'].values
        labels = torch.as_tensor(labels, dtype = torch.int64)
        image_id = torch.tensor([idx])
        # suppose all instances are not crowd
        iscrowd = torch.zeros((records.shape[0],), dtype=torch.uint8)

        target = {}
        target['boxes'] = boxes
        target['labels'] = labels
        target["image_id"] = image_id
        target['img_path'] = torch.tensor([idx])
        target['area'] = area
        target['iscrowd'] = iscrowd

        if self.transforms is not None:
            image, target = self.transforms(image, target)


        return image, target

    def __len__(self) -> int:
        return len(self.image_ids)

In [17]:
id = train_df[train_df['label']== 2]
id

Unnamed: 0,img_path,x0,y0,x1,y1,label
0,19-12-09_1652_624622812/000_03_04.png,463.0,204.0,560.0,241.0,2
1,19-12-09_1652_624622812/000_03_04.png,64.0,470.0,210.0,540.0,2
5,19-12-09_1652_624622812/000_04_01.png,969.0,695.0,1085.0,719.0,2
6,19-12-09_1652_624622812/000_04_01.png,952.0,369.0,1061.0,415.0,2
9,19-12-09_1652_624622812/000_04_01.png,430.0,169.0,502.0,202.0,2
...,...,...,...,...,...,...
33470,19-12-05_2245_300199546/000_05_00.png,1143.0,699.0,1221.0,719.0,2
33471,19-12-05_2245_300199546/000_05_02.png,666.0,0.0,690.0,16.0,2
33472,19-12-05_2245_300199546/000_04_00.png,371.0,342.0,421.0,355.0,2
33475,19-12-05_2245_300199546/000_04_02.png,1199.0,383.0,1279.0,468.0,2


In [22]:
class FallenPeopleAug(Dataset):

    def __init__(self, dataframe, img_dir, transforms):
        super().__init__()
        self.image_ids = dataframe['img_path'].unique()
        self.df = dataframe
        self.img_dir = img_dir
        self.transforms = transforms

    def __getitem__(self, idx: int):
        image_id = self.image_ids[idx]
        records = self.df[self.df['img_path'] == image_id]
        
        image = cv2.imread(f'{self.img_dir}/{image_id}', cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0

        boxes = records[['x0', 'y0', 'x1', 'y1']].values
        boxes = torch.as_tensor(boxes, dtype=torch.float32)

        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        area = torch.as_tensor(area, dtype=torch.float32)

        # there is only one class
        labels = records['label'].values
        labels = torch.as_tensor(labels, dtype = torch.int64)
        image_id = torch.tensor([idx])
        # suppose all instances are not crowd
        iscrowd = torch.zeros((records.shape[0],), dtype=torch.uint8)

        target = {}
        target['boxes'] = boxes
        target['labels'] = labels
        target["image_id"] = image_id
        target['img_path'] = torch.tensor([idx])
        target['area'] = area
        target['iscrowd'] = iscrowd

        if self.transforms is not None:
            image, target = self.transforms(image, target)


        return image, target

    def __len__(self) -> int:
        return len(self.image_ids)

In [9]:
def get_transform(train):
    transforms = []
    # converts the image, a PIL image, into a PyTorch Tensor
    transforms.append(ToTensor())
    if train:
        # during training, randomly flip the training images
        # and ground-truth for data augmentation
        transforms.append(RandomHorizontalFlip(1))
        transforms.append(Grayscale(num_output_channels=3))
        #transforms.append(ColorBright(brightness=2))
        #transforms.append(ColorContrast(contrast=2))
    return Compose(transforms)

In [12]:
def get_transform_balance():
    transforms = []
    # converts the image, a PIL image, into a PyTorch Tensor
    transforms.append(ToTensor())
    transforms.append(ColorBright(brightness=2))
    transforms.append(ColorContrast(contrast=2))
    return Compose(transforms)

In [10]:
train_dataset_normal = FallenPeople(train, DATASET, get_transform(train=False))
valid_dataset = FallenPeople(valid, DATASET, get_transform(train=False))

In [11]:
train_dataset_aug = FallenPeople(train, DATASET, get_transform(train=True))

In [24]:
train_dataset_dup = FallenPeopleAug(train[train['label']== 2], DATASET, get_transform_balance())

In [25]:
print(len(train_dataset_normal))
print(len(train_dataset_aug))
print(len(train_dataset_dup))

4857
4857
2158


In [26]:
train_dataset = torch.utils.data.ConcatDataset([train_dataset_normal,train_dataset_aug,train_dataset_dup])

In [29]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth


  0%|          | 0.00/160M [00:00<?, ?B/s]

In [30]:
print(model.roi_heads.box_predictor)

num_classes = 3 # fall or no fall or not(background)

# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features

# replace the pre-trained model's head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

print(model.roi_heads.box_predictor)

FastRCNNPredictor(
  (cls_score): Linear(in_features=1024, out_features=91, bias=True)
  (bbox_pred): Linear(in_features=1024, out_features=364, bias=True)
)
FastRCNNPredictor(
  (cls_score): Linear(in_features=1024, out_features=3, bias=True)
  (bbox_pred): Linear(in_features=1024, out_features=12, bias=True)
)


In [31]:
def collate_fn(batch):
  return tuple(zip(*batch))

train_data_loader = DataLoader(
  train_dataset,
  batch_size=4,
  shuffle=False,
  collate_fn=collate_fn
)

valid_data_loader = DataLoader(
  valid_dataset,
  batch_size=4,
  shuffle=False,
  collate_fn=collate_fn
)

# test_data_loader = DataLoader(
#   test_dataset,
#   batch_size=4,
#   shuffle=False,
#   collate_fn=collate_fn
# )

In [32]:
# train on the GPU or on the CPU, if a GPU is not available
device = torch.device('cuda:1') if torch.cuda.is_available() else torch.device('cpu')

# move model to the right device
model.to(device)

# create an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

# create a learning rate scheduler
#lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)
lr_scheduler = None

# train it for 10 epochs
num_epochs = 10

In [33]:
log_txt = "trainlogs/train_log_17_11_2021.txt"

In [35]:
pip install -U 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'

Collecting git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI
  Cloning https://github.com/cocodataset/cocoapi.git to /tmp/pip-req-build-jhfa3jnn
  Running command git clone --filter=blob:none -q https://github.com/cocodataset/cocoapi.git /tmp/pip-req-build-jhfa3jnn
  Resolved https://github.com/cocodataset/cocoapi.git to commit 8c9bcc3cf640524c4c20a9c40e89cb6a2f2fa0e9
  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: pycocotools
  Building wheel for pycocotools (setup.py) ... [?25ldone
[?25h  Created wheel for pycocotools: filename=pycocotools-2.0-cp36-cp36m-linux_x86_64.whl size=266220 sha256=000f2fe8323a6eb0fe5e764f688a408c4a70f6676ea104800c42765e451eb187
  Stored in directory: /tmp/pip-ephem-wheel-cache-9en3ur6h/wheels/25/c1/63/8bee2969883497d2785c9bdbe4e89cae5efc59521553d528bf
Successfully built pycocotools
Installing collected packages: pycocotools
Successfully installed pycocotools-2.0
Note: you may need to restart the ke

In [36]:
from engine import evaluate
import sys

original_stdout = sys.stdout

In [37]:
from tqdm.notebook import tqdm as tqdm
#training funzionante
itr = 1

total_train_loss = []
total_valid_loss = []

losses_value = 0.0

f_log = open(log_txt, "w")
for epoch in range(num_epochs):

  start_time = time.time()

  # train ------------------------------
  running_corrects = 0

  model.train()
  train_loss = []
  pbar = tqdm(train_data_loader, desc='let\'s train')
    
  for images, targets in pbar:    

    images = list(image.to(device) for image in images)

    targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
    
    loss_dict = model(images, targets)
    losses = sum(loss for loss in loss_dict.values())

    losses_value = losses.item()
    train_loss.append(losses_value)   

    optimizer.zero_grad()
    losses.backward()
    optimizer.step()
    
    f_log.write(f"Epoch: {epoch+1}, Batch: {itr}, Loss: {losses_value}\n")
    pbar.set_description(f"Epoch: {epoch+1}, Batch: {itr}, Loss: {losses_value}")
    itr += 1
    
  epoch_train_loss = np.mean(train_loss)
  total_train_loss.append(epoch_train_loss)

  # update the learning rate
  if lr_scheduler is not None:
    lr_scheduler.step()
    
  #new valid
  sys.stdout = f_log
  evaluate(model, valid_data_loader, device=device)
  sys.stdout = original_stdout
  # valid -------------------------------------

#   with torch.no_grad():
#     valid_loss = []
    
#     for images, targets in valid_data_loader:
#       images = list(image.to(device) for image in images)
#       targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        
#       loss_dict = model(images, targets)
#       losses = sum(loss for loss in loss_dict.values())
#       loss_value = losses.item()
#       valid_loss.append(loss_value)
    
#   epoch_valid_loss = np.mean(valid_loss)
#   total_valid_loss.append(epoch_valid_loss)  

#   # print ------------------------------
#   f_log.write(f"Epoch Completed: {epoch+1}/{num_epochs}, Time: {time.time()-start_time}, "
#         f"Train Loss: {epoch_train_loss}, Valid Loss: {epoch_valid_loss}\n")
#   f_log.write("\nTESTING PHASE: ")

#   print(f"Epoch Completed: {epoch+1}/{num_epochs}, Time: {time.time()-start_time}, "
#         f"Train Loss: {epoch_train_loss}, Valid Loss: {epoch_valid_loss}")
#   print("\nTESTING PHASE... ")

#   #testing --------------------------------------
#   sys.stdout = f_log
#   evaluate(model, test_data_loader, device=device)
#   sys.stdout = original_stdout
#   with torch.no_grad():
#     valid_loss = []
#     model.eval()
    
#     for images, targets, image_ids in test_data_loader:
#       images = list(image.to(device) for image in images)
#       targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
#       labels = targets[0]['boxes']
#       prediction = model(images)
#       outputs = prediction[0]['boxes']
# #       intersection = torch.logical_and(labels, outputs)
# #       union = torch.logical_or(labels, outputs)
# #       iou_score = torch.sum(intersection) / torch.sum(union)
# #       print("///////////////////////////////")
# #       print(iou_score)
# #       print("///////////////////////////////")
#       iou_score = bb_intersection_over_union(outputs, labels)
#       f_log.write(f"IOU: {iou_score}\n")
#       print("IoU: ", iou_score)
f_log.close()

let's train:   0%|          | 0/2968 [00:00<?, ?it/s]

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


let's train:   0%|          | 0/2968 [00:00<?, ?it/s]

let's train:   0%|          | 0/2968 [00:00<?, ?it/s]

let's train:   0%|          | 0/2968 [00:00<?, ?it/s]

let's train:   0%|          | 0/2968 [00:00<?, ?it/s]

let's train:   0%|          | 0/2968 [00:00<?, ?it/s]

let's train:   0%|          | 0/2968 [00:00<?, ?it/s]

let's train:   0%|          | 0/2968 [00:00<?, ?it/s]

KeyboardInterrupt: 