In [None]:
import xml.etree.ElementTree as ET
import cv2
from PIL import Image
from os import listdir as ld
from os.path import join as pj
import numpy as np
from scipy import ndimage
import h5py
from scipy import sparse
from tqdm import tqdm

from IO.loader import parse_annotations, file_id
from evaluation.classification.statistics import compute_anno_stats, compute_average_size, compute_size_correction
from evaluation.classification.visualize import plot_size_of_anno, plot_size_by_class_of_anno
from utils.crop import *
from utils.annotate import *
from IO.build_ds import build_classification_ds

import matplotlib.pyplot as plt
plt.style.use("dark_background")
%matplotlib inline

# Load data

In [None]:
root = "/home/tanida/workspace/Insect_Phenology_Detector/data"
anno_folders = ["annotations_20200806"]
annos = []
for anno_folder in anno_folders:
    annos_name = ld(pj(root, anno_folder))
    annos.extend([pj(root, anno_folder, x) for x in annos_name])
imgs  = ld(pj(root, "refined_images"))
imgs  = [pj(root, "refined_images", x) for x in imgs if x != ".ipynb_checkpoints"]

In [None]:
images = {file_id(im):np.array(Image.open(im)) for im in imgs}
annotations = {idx: list(filter(lambda x:idx in x, annos)) for idx in images}
annotations = {k:v for  k,v in annotations.items() if len(v)>0}

anno = {}
for k,v in annotations.items():
    anno[k]=[]
    for x in filter(lambda x:x.endswith(".xml"), v):
        anno[k].extend(parse_annotations(x))

# Make datasets

In [None]:
X,Y = build_classification_ds(anno, images, crop=crop_adjusted_std)

# Save classification dataset

In [None]:
def get_lbl_map(save_lbl):
    new_id = np.arange(len(save_lbl))
    lbl_map = {}
    for i in range(len(save_lbl)):
        lbl_map.update({save_lbl[i]:new_id[i]})
    return lbl_map

In [None]:
save_lbl = [1, 2, 3, 6, 8, 9]

In [None]:
lbl_map = get_lbl_map(save_lbl)

In [None]:
lbl_map

- aquatic only

In [None]:
X2 = []
Y2 = []
for i,y in enumerate(Y):
    if y in save_lbl:
        X2.append(X[i,:])
        Y2.append(lbl_map[Y[i]])

X2 = np.asarray(X2)
Y2 = np.asarray(Y2)

- aquatic + others

In [None]:
X2 = []
Y2 = []
for i,y in enumerate(Y):
    if y in save_lbl:
        X2.append(X[i,:])
        Y2.append(lbl_map[Y[i]])
    else:
        X2.append(X[i,:])
        Y2.append(len(save_lbl))

X2 = np.asarray(X2)
Y2 = np.asarray(Y2)

In [None]:
idx, count = np.unique(Y2, return_counts=True)
count

In [None]:
with h5py.File("/home/tanida/workspace/Insect_Phenology_Detector/data/all_classification_data/classify_insect_std_only_20200806") as f:
    f.create_dataset("X", data=X2)
    f.create_dataset("Y", data=Y2)

# Visualize Image

In [None]:
with h5py.File("/home/tanida/workspace/Insect_Phenology_Detector/data/all_classification_data/classify_insect_std_20200806") as f:
    X = f["X"][:]
    Y = f["Y"][:]

In [None]:
idx, count = np.unique(Y, return_counts=True)
count