**Importing Modules**

In [29]:
import pandas as pd
import numpy as np
import cv2
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt
import os
import glob
import shutil
from pathlib import Path
import torch

**Root directory to dataset**

In [30]:
dataset_root = "/home/simenallum/development/AOF_raw"

**Data Paths**

In [31]:
#Set up our img paths 
part_1 = os.path.join(dataset_root, "PART_1/PART_1/images")
part_2 = os.path.join(dataset_root, "PART_2/PART_2/images")
part_3 = os.path.join(dataset_root, "PART_3/PART_3/images")

# Annotation paths
annotation_path = os.path.join(dataset_root, "PART_1/PART_1/2categories")

In [32]:
root_img_path = Path(os.path.join(dataset_root, "working/images"))
root_ann_path = Path(os.path.join(dataset_root, "working/annotations"))

if not root_img_path.exists():
        print(f"Path {root_img_path} does not exit")
        os.makedirs(root_img_path)
        print(f"Folder at '{root_img_path}' created")
        
if not root_ann_path.exists():
        print(f"Path {root_ann_path} does not exit")
        os.makedirs(root_ann_path)
        print(f"Folder at '{root_ann_path}' created")

In [33]:
# Copy files from our img paths to our image folder in our working directory
def copy_files(src, dest):
    files = os.listdir(src)
    for file in files:
        shutil.copy2(os.path.join(src,file), dest)
        
copy_files(part_1, root_img_path)
copy_files(part_2, root_img_path)
copy_files(part_3, root_img_path)
copy_files(annotation_path, root_ann_path)

In [39]:
images_path = os.path.join(dataset_root, "working/images")
images_list = glob.glob(os.path.join(root_img_path , "*.jpg"))
print(len(images_list))

3641


In [40]:
#Visualise a single annotation
annot_path = os.path.join(dataset_root, "working/annotations")
annot_list = glob.glob(os.path.join(root_ann_path , "*.txt"))
# annot_list[0]
with open(annot_list[0], 'r') as f:
    file = f.read()
    print(file)




**Split Data into Train, Test & Validation**

In [36]:
# Recall: 
# These have been then split into three parts: the training (67,4% of objects), the test (19,12% of objects), 
# and the validation set (13,48% of objects). In order to prevent overfitting of the model to the given data, 
# the test set contains selected frames from nine videos that were not used in either the training or validation sets.

def split_data(file_list, img_path, ann_path, mode):
    #Check if we have our mode folders
    root_file = Path( os.path.join(dataset_root, "working/Dataset/images/")+  mode)
    if not root_file.exists():
        print(f"Path {root_file} does not exit")
        os.makedirs(root_file)

    root_file = Path( os.path.join(dataset_root, "working/Dataset/labels/")+  mode)
    if not root_file.exists():
        print(f"Path {root_file} does not exit")
        os.makedirs(root_file)
        
    #Creates the name of our label file from the img name and creates our source file
    for file in file_list:               
        name = file.replace('.jpg', '')        
        img_src_file = img_path + '/' + name + '.jpg'        
        annot_src_file = ann_path + '/' + name + '.txt'
        
        #move image
        IMG_DIR = os.path.join(dataset_root, "working/Dataset/images/") +  mode
        print(IMG_DIR)
        img_dest_file = IMG_DIR + '/' + name + '.jpg'
        if os.path.isfile(img_src_file):
            shutil.move(img_src_file, img_dest_file)
            
        # Copy annotations
        ANNOT_DIR = os.path.join(dataset_root, "working/Dataset/labels/") +  mode
        annot_dest_file = ANNOT_DIR + '/' + name + '.txt'
        if os.path.isfile(annot_src_file):
            shutil.move(annot_src_file, annot_dest_file)

In [37]:
#Get our images list
train_imgs = os.path.join(dataset_root, "PART_1/PART_1/train.txt")
test_imgs = os.path.join(dataset_root, "PART_1/PART_1/test.txt")
val_imgs = os.path.join(dataset_root, "PART_1/PART_1/validation.txt")
with open(train_imgs, 'r') as f:
    train_img_list = [line.strip() for line in f.readlines()]
    
with open(test_imgs, 'r') as f:
    test_img_list = [line.strip() for line in f.readlines()]
    
with open(val_imgs, 'r') as f:
    val_img_list = [line.strip() for line in f.readlines()]

print(train_img_list[0], test_img_list[0], val_img_list[0])

a_102.jpg k2_38.jpg a_101.jpg


In [41]:
#Split Data
split_data(train_img_list, images_path, annot_path, 'train')
split_data(test_img_list, images_path, annot_path, 'test')
split_data(val_img_list, images_path, annot_path, 'val')

/home/simenallum/development/AOF_raw/working/Dataset/images/train
/home/simenallum/development/AOF_raw/working/Dataset/images/train
/home/simenallum/development/AOF_raw/working/Dataset/images/train
/home/simenallum/development/AOF_raw/working/Dataset/images/train
/home/simenallum/development/AOF_raw/working/Dataset/images/train
/home/simenallum/development/AOF_raw/working/Dataset/images/train
/home/simenallum/development/AOF_raw/working/Dataset/images/train
/home/simenallum/development/AOF_raw/working/Dataset/images/train
/home/simenallum/development/AOF_raw/working/Dataset/images/train
/home/simenallum/development/AOF_raw/working/Dataset/images/train
/home/simenallum/development/AOF_raw/working/Dataset/images/train
/home/simenallum/development/AOF_raw/working/Dataset/images/train
/home/simenallum/development/AOF_raw/working/Dataset/images/train
/home/simenallum/development/AOF_raw/working/Dataset/images/train
/home/simenallum/development/AOF_raw/working/Dataset/images/train
/home/sime