# item 1 - Get data

In [None]:
#!/usr/bin/python
# -*- coding: utf-8 -*-

%matplotlib inline

import numpy as np
import os, sys
import imageio
import pandas as pd
import matplotlib.pyplot as plt

path_txt = os.path.join('dataset', 'train', 'gt')
path_mask = os.path.join('dataset', 'train', 'mask')

dirs_txt = os.listdir(path_txt)
dirs_mask = os.listdir(path_mask)

data = dict()

for gt in dirs_txt:
    with open(os.path.join(path_txt, gt)) as f:
        lines = f.readlines()
        
        im_name = gt.replace('gt', 'mask').replace('txt', 'png')
        im_open = (imageio.imread(os.path.join(path_mask, im_name)))

        lista = list()
        for l in lines:
            tly, tlx, bry, brx, tipo = l.split(' ')  
            tly, tlx, bry, brx = map(float, [tly, tlx, bry, brx])
            d = dict()
            d['type'] = tipo.strip()
            
            w = brx - tlx
            h = bry - tly
            
            d['width'] = w
            d['height'] = h
            d['bbox_area'] = w*h
            d['form_factor'] = w/h
            
            d['tly'] = round(tly)
            d['tlx'] = round(tlx)
            d['bry'] = round(bry)
            d['brx'] = round(brx)
            
            sub_mask = im_open[d['tly']:d['bry'], d['tlx']:d['brx']]
            mask_area = np.count_nonzero(sub_mask)
            d['mask_area'] = mask_area
            d['filling_ratio'] = mask_area / d['bbox_area']
            
            lista.append(d)
            
        data[gt] = lista
    

In [None]:
# Write table
columns = ['type','width','height','form_factor','bbox_area','mask_area','filling_ratio']
df = pd.DataFrame.from_dict({(i,n): data[i][n]
                        for i in data.keys()
                        for n,v in enumerate(data[i])}, columns=columns, orient='index').sort_values(['type'])
print(df)
#df['form_factor'].plot(figsize=(10, 7))

# Count signals
print(df['type'].value_counts(sort=False).reindex(['A','B','C','D','E','F']))
df['type'].value_counts(sort=False,normalize=True).reindex(['A','B','C','D','E','F']).plot(figsize=(10, 7),kind='bar',sort_columns=True)




In [None]:
type_counts = df.groupby('type').aggregate(np.std)
type_counts

#type_counts = df.groupby('type').aggregate(np.median)
#type_counts = df.groupby('type').aggregate(np.average)
#type_counts = df.groupby('type').aggregate(np.std)


In [None]:
# Group data by "type" (letter) and get statistics:

# Function = get_stats (only shape, aspect ratio, etc, NOT COLOUR)

# Queremos estadisticas de tamaño y forma en función de cada letra        

In [None]:
# Plot and compare results

MOSTRAR IMÁGENES REPRESENTATIVAS DE CADA MUESTRA QUE MUESTREN LOS COLORES TIPICOS QUE ESPERAMOS ENCONTRAR - 
ARGUMENTAR POR QUE ESPERAMOS VER 4

# Item 2 - Split training dataset

Al separar por tipo, tenemos que armar un algoritmo que nos separe el set de training en dos carpetas, 
separando mismos tipos en 70% / 30%.

# Item 3 - Separation by colour


Para cada imagen de prueba, nos basamos en el diccionario y usamos el bounding box que ya calculamos.

Recortamos la imagen, le calculamos el histograma. Sumamos los histogramas de todas las imágenes y obtenemos la suma total.

Luego hacemos lo mismo y al recortar la roi de cada imagen convertimos la roi de rgb a hsv, calculamos el histograma
y obtenemos la suma de todos los histogramas. 

Con estas cosas deberíamos poder obtener los thresholds a aplicar luego.


Hacer gráficos chetos de los histogramas. Ajustar con multiples gaussianas, calcular promedio, mediana, std, etc.
Compararlos y elegir el mejor.

Luego aplicar las máscaras con los thresholds calculados

In [None]:
# Convert RGB images tu HUE images

# Get stats from HUE images

# Plot and compare

# Select thresholds (RGB y HUE)

# Create masks using previous thresholds



# Calculate histograms:

In [None]:
from skimage import color

path_jpg = os.path.join('dataset', 'train', 'jpg')
dirs_jpg = os.listdir(path_jpg)

def color_histogram(gt_dictionary, path_jpg, color_space):

    c0_hist = np.zeros(255)
    c1_hist = np.zeros(255)
    c2_hist = np.zeros(255)

    for gt, values in list(gt_dictionary.items()):
        for v in values:

            jpg_name = gt.replace('gt.', '').replace('txt', 'jpg')
            mask_name = gt.replace('gt', 'mask').replace('txt', 'png')
            
            jpg_roi = imageio.imread(os.path.join(path_jpg, jpg_name))[v['tly']:v['bry'], v['tlx']:v['brx']]
            mask_roi = imageio.imread(os.path.join(path_mask, mask_name))[v['tly']:v['bry'], v['tlx']:v['brx']]

            if color_space=='rgb':
                final_roi = jpg_roi
                r0 = 0
                rf = 255
                
            elif color_space=='hsv':
                final_roi = color.rgb2hsv(jpg_roi) 
                r0 = 0
                rf = 1
                
            elif color_space=='ycbcr':
                final_roi = color.rgb2ycbcr(jpg_roi)
                r0 = 0
                rf = 255
                
            elif color_space=='xyz':
                final_roi = color.rgb2xyz(jpg_roi)
                r0 = 0
                rf = 1
                
            mask_roi[mask_roi==0] = 0
            mask_roi[mask_roi!=0] = 1
            
            bins = np.histogram(final_roi[:,:,0] * mask_roi, bins=255, range=(r0,rf))[1]
            c0_hist += np.histogram(final_roi[:,:,0] * mask_roi, bins=255, range=(r0,rf))[0]
            c1_hist += np.histogram(final_roi[:,:,1] * mask_roi, bins=255, range=(r0,rf))[0]
            c2_hist += np.histogram(final_roi[:,:,2] * mask_roi, bins=255, range=(r0,rf))[0]
    
    return bins, c0_hist, c1_hist, c2_hist, r0, rf


################################################
# Calculate histograms with traffic signal data:
################################################

# RGB histograms:

bins, r_hist, g_hist, b_hist, rgb0, rgbf = color_histogram(data, path_jpg, 'rgb')

# HSV histograms:

hbins, h_hist, s_hist, v_hist, hsv0, hsvf = color_histogram(data, path_jpg, 'hsv')

# yCbCr histograms:

ybins, y_hist, cb_hist, cr_hist, ycbcr0, ycbcrf = color_histogram(data, path_jpg, 'ycbcr')

# X,Y,Z histograms:

xbins, xx_hist, yy_hist, zz_hist, xyz0, xyzf = color_histogram(data, path_jpg, 'xyz')


# Plot histograms:

In [None]:
# Questions: why can't I use the variable 'bins' to plot the histograms?
# Why do I get a divergence at 255?
# Why am I dividing by zero when converting to HSV?

def plot_histogram(hist, r0, rf, color_name, color_plot):

    x = np.linspace(r0, rf, 255)
    
    plt.figure(figsize=(7,7))
    plt.bar(x, hist, color=color_plot, width=0.8*(rf-r0)/255)
    plt.ylim((0,60000))
    plt.title(color_name + ' histogram')
    plt.xlabel('8bit quantification')
    plt.ylabel('Total number of px')
    plt.savefig('figures/' + str(color_name) + '_hist.png')
    plt.show()
    
# Plot RGB histograms:

plot_histogram(r_hist, rgb0, rgbf, 'Red', 'r')
plot_histogram(g_hist, rgb0, rgbf, 'Green', 'g')
plot_histogram(b_hist, rgb0, rgbf, 'Blue', 'b')

# Plot HSV histograms:

plot_histogram(h_hist, hsv0, hsvf, 'H', 'r')
plot_histogram(s_hist, hsv0, hsvf, 'S', 'g')
plot_histogram(v_hist, hsv0, hsvf, 'V', 'b')

# Plot yCbCr histograms:

plot_histogram(y_hist, ycbcr0, ycbcrf, 'y', 'r')
plot_histogram(cb_hist, ycbcr0, ycbcrf, 'Cb', 'g')
plot_histogram(cr_hist, ycbcr0, ycbcrf, 'Cr', 'b')

# Plot xyz histograms:

plot_histogram(xx_hist, xyz0, xyzf, 'X', 'r')
plot_histogram(yy_hist, xyz0, xyzf, 'Y', 'g')
plot_histogram(zz_hist, xyz0, xyzf, 'Z', 'b')


# Normalized RGB histogram:

In [None]:
from skimage import color

path_jpg = os.path.join('dataset', 'train', 'jpg')
dirs_jpg = os.listdir(path_jpg)

def norm_histogram(gt_dictionary, path_jpg, color_space):

    c0_hist = np.zeros(255)
    c1_hist = np.zeros(255)
    c2_hist = np.zeros(255)

    for gt, values in list(gt_dictionary.items()):
        for v in values:

            jpg_name = gt.replace('gt.', '').replace('txt', 'jpg')
            mask_name = gt.replace('gt', 'mask').replace('txt', 'png')
           
            # Important: in order to normalize we need to read THE FULL IMAGE. If we normalize the rois, 
            # we will be training our algorithm poorly.After trying this, we saw all images have saturated
            # pixels (aka normalized image = original image). 
            
            # We will normalize the ROIs, knowing this is poorly training the algorithm.
       
            jpg_roi = imageio.imread(os.path.join(path_jpg, jpg_name))[v['tly']:v['bry'], v['tlx']:v['brx']]
            mask_roi = imageio.imread(os.path.join(path_mask, mask_name))[v['tly']:v['bry'], v['tlx']:v['brx']]

            jpg_max_0 = np.max(jpg_roi[:,:,0])
            jpg_max_1 = np.max(jpg_roi[:,:,1])
            jpg_max_2 = np.max(jpg_roi[:,:,2])
            
            if color_space=='rgb':
                final_roi = jpg_roi
                jpg_max_0, jpg_max_1, jpg_max_2 = jpg_max_0, jpg_max_1, jpg_max_2 
                                
            mask_roi[mask_roi==0] = 0
            mask_roi[mask_roi!=0] = 1
 
            bins = np.histogram(final_roi[:,:,0] * mask_roi, bins=255, range=(0,1))[1]
            c0_hist += np.histogram(final_roi[:,:,0] / jpg_max_0 * mask_roi, bins=255, range=(0,1))[0]
            c1_hist += np.histogram(final_roi[:,:,1] / jpg_max_1 * mask_roi, bins=255, range=(0,1))[0]
            c2_hist += np.histogram(final_roi[:,:,2] / jpg_max_2 * mask_roi, bins=255, range=(0,1))[0]
    
    return bins, c0_hist, c1_hist, c2_hist


################################################
# Calculate histograms with traffic signal data:
################################################

# RGB histograms:

bins_norm, r_hist_norm, g_hist_norm, b_hist_norm = norm_histogram(data, path_jpg, 'rgb')



# Plot normalized histogram:

In [None]:
plot_histogram(r_hist_norm, 0, 1, 'Red_norm', 'r')
plot_histogram(g_hist_norm, 0, 1, 'Green_norm', 'g')
plot_histogram(b_hist_norm, 0, 1, 'Blue_norm', 'b')

# Item 4 - Evaluate colour masks 

Comparar las máscaras obtenidas con el ground truth.

Podemos ver si sirven las funciones que ya nos dieron hechas.

# Lo que sigue a partir de aqui fueron pruebas hechas el martes:

In [None]:
print(data['gt.00.005025.txt'][0])
print(data['gt.00.005025.txt'][1])

for gt in dirs_txt:
    print(data[gt][0]['filling_ratio'])

In [None]:
for mask in dirs_mask:
    gt = mask.replace('mask', 'gt').replace('png', 'txt')
    m = (imageio.imread(os.path.join(path_mask, mask)))
    mask_area = m.sum()
    d = data[gt]
    d['mask_area'] = mask_area
    d['filling_ratio'] = mask_area / d['bbox_area']
    
    print(d['mask_area'], d['filling_ratio'], mask)
    

In [None]:
print(dirs_mask[0].replace('mask', 'gt').replace('png', 'txt'))
print(dirs_txt[0])


In [None]:
import matplotlib.pyplot as plt

% matplotlib inline

m = imageio.imread(os.path.join(path_mask,'mask.00.005025.png')).astype(np.int8)

plt.imshow(m[146:201, 1324:1375])
