### **Library Imports**

In [1]:
import os
import cv2
import numpy as np
import pandas as pd
import xml.etree.ElementTree as ET 

### **Utilites and Helpers**

In [2]:
def breaker() -> None:
    print("\n" + 50*"*" + "\n")


def get_size(ele) -> tuple:
    return int(ele[0].text), int(ele[1].text), int(ele[2].text)


def get_bbox(ele, w: int, h: int) -> tuple:
    return int(ele[0].text)/w, int(ele[1].text)/h, int(ele[2].text)/w, int(ele[3].text)/h

def get_image(path: str, size: int=224) -> np.ndarray:
    return cv2.resize(src=cv2.cvtColor(src=cv2.imread(path, cv2.IMREAD_COLOR), code=cv2.COLOR_BGR2RGB), dsize=(size, size), interpolation=cv2.INTER_AREA)


def get_images(base_path: str, filenames: np.ndarray, size: int=224) -> np.ndarray:
    images = np.zeros((len(filenames), size, size, 3), dtype=np.uint8)
    
    i = 0
    for filename in filenames:
        images[i] = get_image(os.path.join(base_path, filename), size=size)
        i += 1
    
    return images

def get_statistics(images: np.ndarray) -> tuple:
    return (images[:, :, :, 0].mean()/255, images[:, :, :, 1].mean()/255, images[:, :, :, 2].mean()/255), (images[:, :, :, 0].std()/255, images[:, :, :, 1].std()/255, images[:, :, :, 2].std()/255)


XML_PATH   = "../input/road-sign-detection/annotations"
IMAGE_PATH = "../input/road-sign-detection/images"

### **Process XML Files**

In [3]:
xml_filenames = sorted(os.listdir(XML_PATH))
filenames = [filename[:-3]+"png" for filename in xml_filenames]

labels, x1s, y1s, x2s, y2s = [], [], [], [], []

for xml_file in xml_filenames:
    tree = ET.parse(os.path.join(XML_PATH, xml_file))
    root = tree.getroot()
    w, h, c = get_size(root[2])
    (x1, y1, x2, y2) = get_bbox(root[-1][-1], w, h)
    labels.append(root[-1][0].text)
    x1s.append(x1)
    y1s.append(y1)
    x2s.append(x2)
    y2s.append(y2)

### **Put data in Dataframe**

In [4]:
df = pd.DataFrame(data=None, columns=["filenames", "labels", "x1", "y1", "x2", "y2"])

df.filenames = filenames
df.labels = labels
df.x1 = x1s
df.y1 = y1s
df.x2 = x2s
df.y2 = y2s

df.to_csv("data.csv", index=False)
df.head(5)

Unnamed: 0,filenames,labels,x1,y1,x2,y2
0,road0.png,trafficlight,0.367041,0.155,0.779026,0.58
1,road1.png,trafficlight,0.385,0.222615,0.645,0.992933
2,road10.png,trafficlight,0.265,0.011236,0.61,0.985019
3,road100.png,speedlimit,0.0875,0.012987,0.9075,0.846753
4,road101.png,speedlimit,0.4875,0.035,0.98,0.97


### **Get Images**

In [5]:
sizes = [224, 256, 384, 512]
stats = dict()
means = dict()
stds  = dict()

for size in sizes:
    images = get_images(IMAGE_PATH, df.filenames, size)
    mean, std = get_statistics(images)
    means[f"{size}"] = mean
    stds[f"{size}"]  = std
    np.save(f"images_{size}.npy", images)

stats["means"] = means
stats["stds"] = stds

### **Show Statistics**

**Mean**

In [6]:
breaker()
for k, v in stats["means"].items():
    print(f"{k} x {k} Means\n")
    for i in range(len(v)):
        if i == 0:
            print(f"Red   : {v[i]:.5f}")
        if i == 1:
            print(f"Green : {v[i]:.5f}")
        if i == 2:
            print(f"Blue  : {v[i]:.5f}")
    breaker()


**************************************************

224 x 224 Means

Red   : 0.46738
Green : 0.53833
Blue  : 0.56577

**************************************************

256 x 256 Means

Red   : 0.46737
Green : 0.53832
Blue  : 0.56576

**************************************************

384 x 384 Means

Red   : 0.46697
Green : 0.53793
Blue  : 0.56536

**************************************************

512 x 512 Means

Red   : 0.46708
Green : 0.53804
Blue  : 0.56547

**************************************************



**Standard Deviation**

In [7]:
breaker()
for k, v in stats["stds"].items():
    print(f"{k} x {k} Stds\n")
    for i in range(len(v)):
        if i == 0:
            print(f"Red   : {v[i]:.5f}")
        if i == 1:
            print(f"Green : {v[i]:.5f}")
        if i == 2:
            print(f"Blue  : {v[i]:.5f}")
    breaker()


**************************************************

224 x 224 Stds

Red   : 0.22459
Green : 0.24051
Blue  : 0.25858

**************************************************

256 x 256 Stds

Red   : 0.22514
Green : 0.24107
Blue  : 0.25905

**************************************************

384 x 384 Stds

Red   : 0.22641
Green : 0.24237
Blue  : 0.26017

**************************************************

512 x 512 Stds

Red   : 0.22706
Green : 0.24302
Blue  : 0.26072

**************************************************

