In [1]:
import glob
import numpy as np
import cv2
import matplotlib.pyplot as plt
import math
from random import randint
import os
import imageio
import requests, zipfile, io, pickle

def download_traffic_images():
    r = requests.get("https://sid.erda.dk/public/archives/ff17dc924eba88d5d01a807357d6614c/FullIJCNN2013.zip")
    z = zipfile.ZipFile(io.BytesIO(r.content))

    z.extractall()
    
def generate_dataset(traffic_path, times=1):
    # Download images if not already downloaded
    if not os.path.exists(traffic_path):
        download_traffic_images()
    
    # Get the images paths
    all_img_paths = glob.glob(traffic_path+'*.ppm')
    np.random.shuffle(all_img_paths)

    # Open the metadata file
    f=open(traffic_path+'gt.txt')
    lines=f.readlines()
    f.close()

    # Create the directory of the dataset
    if not os.path.exists('TrafficDataset/'):
        os.mkdir('TrafficDataset/')

    # File where the new coordinates will be saved
    g = open('TrafficDataset/coordinates.txt', 'w')

    imgs = []
    coords = []
    for i in range(times):
        idx = i*len(lines)
        for line in lines:
            # Extract the coordinates
            splits = line.split(';')
            coords = splits[1:5]

            # Read the images and resize 4 times smaller
            img_path = traffic_path+splits[0]
            img = cv2.imread(img_path)
            img2 = cv2.resize(img,(340,200))
            x1 = math.floor(int(coords[0])/4)
            y1 = math.floor(int(coords[1])/4)
            x2 = math.ceil(int(coords[2])/4)
            y2 = math.ceil(int(coords[3])/4)
            h = abs(y1-y2)
            w = abs(x1-x2)

            # Randomly crop the images to have size 32x32 around the bounding box
            rnd1 = 10000
            rnd2 = 10000
            while ((y1-rnd1)<0 or (y1-rnd1+32)>200):
                try:
                    rnd1 = randint(0,(32-h))
                except:
                    rnd1 = 0
            while ((x1-rnd2)<0 or (x1-rnd2+32)>340):
                try:
                    rnd2 = randint(0,(32-w))
                except:
                    rnd2 = 0
            crop_img = img2[y1-rnd1:y1-rnd1+32, x1-rnd2:x1-rnd2+32]

            # Save the new image
            new_path = 'TrafficDataset/'+str(idx).zfill(6)+'.ppm'
            imageio.imwrite(new_path, crop_img)

            # Save the new bounding box
            g.write(str(idx).zfill(6)+'.ppm')
            g.write(';')
            g.write(str(rnd2))
            g.write(';')
            g.write(str(rnd1))
            g.write(';')
            if rnd2+w>31:
                g.write('31')

            else:
                g.write(str(w))
            g.write(';')
            if rnd1+h>31:
                g.write('31')
            else:
                g.write(str(h))
            g.write('\n')

            idx+=1
    g.close()

generate_dataset('FullIJCNN2013/', times=3)