In [None]:
import xml.etree.ElementTree as ET
import cv2
from PIL import Image
from os import getcwd as cwd
from os import listdir as ld
from os.path import join as pj
import numpy as np
from scipy import ndimage
import h5py
from scipy import sparse
from tqdm import tqdm

from IO.loader import parse_annotations, file_id
from IO.build_ds import create_annotation, adopt_DBSCAN, load_label_dic, build_classification_ds, load_anno, create_annotation
from IO.create_bbox2size_ds import divide_target_and_body, get_new_anno_with_size
from evaluation.classification.statistics import compute_anno_stats, compute_average_size, compute_size_correction
from evaluation.classification.visualize import plot_size_of_anno, plot_size_by_class_of_anno
from utils.crop import *
from utils.annotate import *

import matplotlib.pyplot as plt
%matplotlib inline

# Load data

In [None]:
root = "/home/tanida/workspace/Insect_Phenology_Detector/data"
anno_folders = ["annotations_0", "annotations_2", "annotations_3", "annotations_4", "annotations_20200806"]
annos = []
for anno_folder in anno_folders:
    annos_name = ld(pj(root, anno_folder))
    annos.extend([pj(root, anno_folder, x) for x in annos_name])
imgs  = ld(pj(root, "refined_images"))
imgs  = [pj(root, "refined_images", x) for x in imgs if x != ".ipynb_checkpoints"]

In [None]:
images = {file_id(im):np.array(Image.open(im)) for im in imgs}
annotations = {idx: list(filter(lambda x:idx in x, annos)) for idx in images}
annotations = {k:v for  k,v in annotations.items() if len(v)>0}

anno = {}
for k,v in annotations.items():
    anno[k]=[]
    for x in filter(lambda x:x.endswith(".xml"), v):
        anno[k].extend(parse_annotations(x))

# Adopt DBSCAN

In [None]:
new_anno = create_annotation(images, anno, unused_labels=[']'], centering=False, percent=True)
new_anno_not_percent = create_annotation(images, anno, unused_labels=[']'], centering=False, percent=False)

label_dic = load_label_dic(new_anno, each_flag=True, plus_other=False, target_with_other=False)

new_anno = adopt_DBSCAN(label_dic, new_anno, new_anno_not_percent)

anno = new_anno

# Make datasets

In [None]:
X,Y = build_classification_ds(anno, images, crop=crop_adjusted)

# Save classification dataset

In [None]:
def get_lbl_map(save_lbl):
    new_id = np.arange(len(save_lbl))
    lbl_map = {}
    for i in range(len(save_lbl)):
        lbl_map.update({save_lbl[i]:new_id[i]})
    return lbl_map

In [None]:
save_lbl = [1, 2, 3, 6, 8, 9]

In [None]:
lbl_map = get_lbl_map(save_lbl)

In [None]:
lbl_map

- aquatic only

In [None]:
X2 = []
Y2 = []
for i,y in enumerate(Y):
    if y in save_lbl:
        X2.append(X[i,:])
        Y2.append(lbl_map[Y[i]])

X2 = np.asarray(X2)
Y2 = np.asarray(Y2)

- aquatic + others

In [None]:
X2 = []
Y2 = []
for i,y in enumerate(Y):
    if y in save_lbl:
        X2.append(X[i,:])
        Y2.append(lbl_map[Y[i]])
    else:
        X2.append(X[i,:])
        Y2.append(len(save_lbl))

X2 = np.asarray(X2)
Y2 = np.asarray(Y2)

In [None]:
idx, count = np.unique(Y2, return_counts=True)
count

In [None]:
with h5py.File("/home/tanida/workspace/Insect_Phenology_Detector/data/all_classification_data/classify_insect_20200806") as f:
    f.create_dataset("X", data=X2)
    f.create_dataset("Y", data=Y2)

### Create image2size dataset

In [None]:
data_root = pj(cwd(), "data")
img_folder = "refined_images"
anno_folders = ["annotations_0", "annotations_2", "annotations_3", "annotations_4", "annotations_20200806"]

unused_labels = [']', 'Coleoptera', 'Hemiptera', 
                 'Hymenoptera', 'Megaloptera', 'Unknown', 
                 'unknown', 'medium insect', 'small insect', 
                 'snail', 'spider']

In [None]:
images, anno = load_anno(data_root, img_folder, anno_folders, return_body=True)
new_anno = create_annotation(images, anno, unused_labels, False, False)
new_anno_div_body = divide_target_and_body(new_anno)
new_anno_with_size = get_new_anno_with_size(new_anno_div_body)

imgs, lbls, sizes = build_classification_ds(new_anno_with_size, images, crop=crop_adjusted_std, return_sizes=True)

In [None]:
idx, count = np.unique(lbls, return_counts=True)
count

In [None]:
with h5py.File("/home/tanida/workspace/Insect_Phenology_Detector/data/all_classification_data/classify_insect_std_20200806_with_size") as f:
    f.create_dataset("X", data=imgs)
    f.create_dataset("Y", data=lbls)
    f.create_dataset("size", data=sizes)

# Visualize Image

In [None]:
lbl_to_name = {
    0: 'Diptera', 
    1: 'Ephemeridae', 
    2: 'Ephemeroptera', 
    3: 'Lepidoptera', 
    4: 'Plecoptera', 
    5: 'Trichoptera', 
}

In [None]:
all_data_path = pj(cwd(), "data/all_classification_data/classify_insect_std_20200806")
with h5py.File(all_data_path) as f:
    X = f["X"][:]
    Y = f["Y"][:]
Y.shape

In [None]:
diptera_filter = Y == 0
ephemeridae_filter = Y == 1
ephemeroptera_filter = Y == 2
lepidoptera_filter = Y == 3
plecoptera_filter = Y == 4
trichoptera_filter = Y == 5

In [None]:
insect_filter = trichoptera_filter

In [None]:
filtered_X = X[insect_filter]
filtered_Y = Y[insect_filter]
filtered_Y.shape

In [None]:
idx = 0

In [None]:
img = filtered_X[idx]
label = lbl_to_name[filtered_Y[idx]]
plt.imshow(img)
print(label)
idx += 1