In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import json
from PIL import Image
from collections import Counter
import pickle

In [None]:
base_path = '..'
generate_all_datset_annots = True
tile_categories = ['blood_vessel']
input_imgs_path = f'{base_path}/dataset1_files/all_dataset1_imgs' if not generate_all_datset_annots else f'{base_path}/all_dataset_files/all_dataset_imgs'
output_annotations_path = f'{base_path}/dataset1_files/all_dataset1_annotations' if not generate_all_datset_annots else f'{base_path}/all_dataset_files/all_dataset_annotations'
if not os.path.exists(output_annotations_path):
    os.mkdir(output_annotations_path)

In [None]:
def get_count_type(tiles_dict):
  ids_with_info = []
  for tile in tiles_dict:
    cur_dict = {'id': tile['id'], 'blood_vessel': 0, 'glomerulus': 0, 'unsure': 0}
    for annot in tile['annotations']:
      cur_dict[annot['type']] += 1
    ids_with_info.append(cur_dict)
  return ids_with_info

In [None]:
with open(f'{base_path}/polygons.jsonl', 'r') as json_file:
    json_list = list(json_file)
    
    
tiles_dicts = []
for json_str in json_list:
    tiles_dicts.append(json.loads(json_str))

In [None]:
tile_df = pd.read_csv(f'{base_path}/tile_meta.csv')
# Find the same stats as the above cell for annotated images
annotated_ids_with_info = get_count_type(tiles_dicts)
annotated_ids_with_info_df = pd.DataFrame.from_dict(annotated_ids_with_info)
tile_df_annotated = pd.merge(tile_df, annotated_ids_with_info_df, on='id', how='inner')
tile_df_annotated.head()

In [None]:
tile_df_annotated.loc[tile_df_annotated['glomerulus']>0].head()

In [None]:
dataset_1_tile_ids = sorted(tile_df_annotated.loc[tile_df_annotated['dataset']==1, 'id'].values)
# dataset_2_tile_ids = sorted(tile_df_annotated.loc[tile_df_annotated['dataset']==2, 'id'].values)

In [None]:
tgt_tile_dicts = [x for x in tiles_dicts if x['id'] in dataset_1_tile_ids] if not generate_all_datset_annots else tiles_dicts
print(len(tgt_tile_dicts))

In [None]:
tgt_tile_dicts[0]

In [None]:
def calculate_area(coordinates):
    num_points = len(coordinates)
    if num_points < 3:
        return 0

    area = 0
    for i in range(num_points - 1):
        x_i, y_i = coordinates[i]
        x_iplus1, y_iplus1 = coordinates[i + 1]
        area += (x_i * y_iplus1) - (x_iplus1 * y_i)

    x_n, y_n = coordinates[-1]
    x_0, y_0 = coordinates[0]
    area += (x_n * y_0) - (x_0 * y_n)

    area = abs(area / 2)
    return area

In [None]:
for tgt_tile_dict in tgt_tile_dicts:
  cur_tile_coco_annots = []
  img_id = tgt_tile_dict['id']
  annotations = [annot for annot in tgt_tile_dict['annotations'] if annot['type'] in tile_categories]
  for annot in annotations:
    coords = annot['coordinates'][0]
    segmentations = [[pt for pair in coords for pt in pair]]
    segmentation_area = calculate_area(coords)
    min_x = min(coords, key=lambda x: x[0])[0]
    max_x = max(coords, key=lambda x: x[0])[0]
    min_y = min(coords, key=lambda x: x[1])[1]
    max_y = max(coords, key=lambda x: x[1])[1]
    segmentation_bbox = [min_x, min_y, max_x-min_x, max_y-min_y]
    category_id = tile_categories.index(annot['type'])
    cur_tile_coco_annots.append({
      'segmentation': segmentations,
      'area': segmentation_area,
      'bbox': segmentation_bbox,
      'category_id': category_id
    })
  with open(f'{output_annotations_path}/{img_id}.pkl', 'wb') as f:
    pickle.dump(cur_tile_coco_annots, f, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
img_annot_files = os.listdir(output_annotations_path)
img_annot_files[67:72]

In [None]:
import random
img_annot_file = random.choice(img_annot_files)

In [None]:
img_annot_file

In [None]:
# helper function for data visualization
def visualize(**images):
    """PLot images in one row."""
    n = len(images)
    plt.figure(figsize=(16, 5))
    for i, (name, image) in enumerate(images.items()):
        plt.subplot(1, n, i + 1)
        plt.xticks([])
        plt.yticks([])
        plt.title(' '.join(name.split('_')).title())
        plt.imshow(image)
    plt.show()

In [None]:
import copy
tgt_annot_files = ['5ea525251225.pkl']
# for i in range(10):
#   img_annot_file = random.choice(img_annot_files)
for img_annot_file in tgt_annot_files:
  img_id = img_annot_file.split('.')[0]
  with open(f'{output_annotations_path}/{img_annot_file}', 'rb') as f:
    tgt_annots = pickle.load(f)
  # print(tgt_annots)
  tgt_annots = [annot for annot in tgt_annots if annot['category_id']==0]
  base_image = cv2.imread(f'{input_imgs_path}/{img_id}.png')
  base_image = cv2.cvtColor(base_image, cv2.COLOR_BGR2RGB)
  annotated_image_orig = copy.deepcopy(base_image)
  annotated_image_coco = copy.deepcopy(base_image)
  imgs_dict = dict()
  imgs_dict['base_image'] = base_image
  for tgt_annot in tgt_annots:
    coords = [[x, y] for x, y in zip(tgt_annot['segmentation'][0][::2], tgt_annot['segmentation'][0][1::2])]
    min_x, min_y, width, height = tgt_annot['bbox']
    cv2.fillPoly(annotated_image_coco, pts=[np.array(coords)], color=(0,255,0))
    top_left = (int(tgt_annot['bbox'][0]), int(tgt_annot['bbox'][1]))
    min_x = min(coords, key=lambda x: x[0])[0]
    min_y = min(coords, key=lambda x: x[1])[1]
    max_x = max(coords, key=lambda x: x[0])[0]
    max_y = max(coords, key=lambda x: x[1])[1]
    bottom_right = (int(tgt_annot['bbox'][0]+tgt_annot['bbox'][2]), int(tgt_annot['bbox'][1]+tgt_annot['bbox'][3]))
    annotated_image_coco = cv2.rectangle(annotated_image_coco, top_left, bottom_right, (255,0,0), 2)

  tgt_orig_annots = [annot for annot in [x for x in tgt_tile_dicts if x['id']==img_id][0]['annotations'] if annot['type']=='blood_vessel']
  for tgt_orig_annot in tgt_orig_annots:
    coords = tgt_orig_annot['coordinates']
    cv2.fillPoly(annotated_image_orig, pts=[np.array(coords)], color=(0,255,0))
  imgs_dict['annotated_image_orig'] = annotated_image_orig
  imgs_dict['annotated_image_coco'] = annotated_image_coco
  visualize(**imgs_dict)

In [None]:
import os
import pickle
train_annots_path = '/home/ec2-user/hubmap-hacking-the-human-vasculature/dataset1_files/all_dataset1_annotations_train_0'
train_annot_files = os.listdir(train_annots_path)
category_ids = set()
for train_annot_file in train_annot_files:
    with open(f'{train_annots_path}/{train_annot_file}', 'rb') as f:
        annots = pickle.load(f)
    for annot in annots:
        category_ids.add(annot['category_id'])

In [None]:
category_ids

In [None]:
glomerulus_annots = []
blood_vessel_annots = []
for tgt_tile_dict in tgt_tile_dicts:
  cur_tile_coco_annots = []
  img_id = tgt_tile_dict['id']
  for annot in annotations:
    coords = annot['coordinates'][0]
    segmentations = [[pt for pair in coords for pt in pair]]
    segmentation_area = calculate_area(coords)
    min_x = min(coords, key=lambda x: x[0])[0]
    max_x = max(coords, key=lambda x: x[0])[0]
    min_y = min(coords, key=lambda x: x[1])[1]
    max_y = max(coords, key=lambda x: x[1])[1]
    segmentation_bbox = [min_x, min_y, max_x-min_x, max_y-min_y]
    cur_annot = {
      'segmentation': segmentations,
      'area': segmentation_area,
      'bbox': segmentation_bbox,
      'category_id': category_id
    }
    if annot['type'] == 'glomerulus':
        glomerulus_annots.append(cur_annot)
    elif annot['type'] == 'blood_vessel':
        blood_vessel_annots.append(cur_annot)