In [1]:
import xml.etree.ElementTree as ET
import cv2
from PIL import Image
import os
from os import listdir as ld
from os.path import join as pj
from os import getcwd as cwd
import numpy as np
import pandas as pd
from scipy import ndimage
import h5py
from scipy import sparse
from tqdm import tqdm

from IO.loader import parse_annotations, file_id
from evaluation.classification.statistics import compute_anno_stats, compute_average_size, compute_size_correction
from evaluation.classification.visualize import plot_size_of_anno, plot_size_by_class_of_anno
from utils.crop import *
from utils.annotate import *
from IO.build_ds import build_classification_ds

import matplotlib.pyplot as plt
plt.style.use("dark_background")
%matplotlib inline

# Load data

In [2]:
root = "/home/tanida/workspace/Insect_Phenology_Detector/data"
anno_folders = ["annotations_0","annotations_2","annotations_3","annotations_4"]
annos = []
for anno_folder in anno_folders:
    annos_name = ld(pj(root, anno_folder))
    annos.extend([pj(root, anno_folder, x) for x in annos_name])
imgs  = ld(pj(root, "refined_images"))
imgs  = [pj(root, "refined_images", x) for x in imgs if x != ".ipynb_checkpoints"]

In [3]:
images = {file_id(im):np.array(Image.open(im)) for im in imgs}
annotations = {idx: list(filter(lambda x:idx in x, annos)) for idx in images}
annotations = {k:v for  k,v in annotations.items() if len(v)>0}

anno = {}
for k,v in annotations.items():
    anno[k]=[]
    for x in filter(lambda x:x.endswith(".xml"), v):
        anno[k].extend(parse_annotations(x))

# Make datasets

In [4]:
X,Y,sizes = build_classification_ds(anno, images, crop=crop_adjusted_std_resize, return_sizes=True)

100%|██████████| 65/65 [18:38<00:00, 18.99s/it]


# Visualize intraclass size variance

In [51]:
def get_statistics_all_insects(Y, sizes):
    idx, count = np.unique(Y, return_counts=True)
    size_mean = []
    size_std = []
    for i in idx:
        lbl_filter = Y == i
        filtered_Y = Y[lbl_filter]
        filtered_sizes = sizes[lbl_filter]
        size_mean.append(filtered_sizes.mean())
        size_std.append(np.std(filtered_sizes))
    return size_mean, size_std

In [52]:
size_mean, size_std = get_statistics_all_insects(Y, sizes)

In [53]:
cls_index = ['Coleoptera', 'Diptera', 'Ephemeridae', 'Ephemeroptera', 'Hemiptera', 'Lepidoptera', 'Plecoptera', 'Trichoptera', 'medium insect', 'small insect', 'snail', 'spider']

In [54]:
cls_df = pd.DataFrame({"count": count, "size_mean": size_mean, "size_std": size_std}, index=cls_index)

In [55]:
cls_df

Unnamed: 0,count,size_mean,size_std
Coleoptera,30,2036.5,1952.114354
Diptera,408,6350.215686,5171.425803
Ephemeridae,51,13069.333333,5057.934743
Ephemeroptera,178,3043.348315,5069.216379
Hemiptera,19,1005.526316,498.162663
Lepidoptera,267,9980.932584,10216.298719
Plecoptera,130,4068.161538,6544.920359
Trichoptera,248,3722.778226,2082.264777
medium insect,505,1031.544554,671.450066
small insect,1681,388.804878,211.339354


In [64]:
figure_root = pj(cwd(), "figure/classification", "visualize_intraclass_size_variance")
if os.path.exists(figure_root) is False:
    os.makedirs(figure_root)

In [66]:
cls_df.to_csv(pj(figure_root, "intraclass_size_variance.csv"))