In [8]:
import glob, sys, os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
import warnings
warnings.filterwarnings(action = 'ignore')
import pandas as pd
import json
import tensorflow as tf
import tensorflow_io as tfio
import cv2
from tensorflow import image

In [9]:
img_paths = sorted(glob.glob('../input/hubmap-organ-segmentation/train_images/*tiff'))
print(len(img_paths))

plt.figure(figsize = (18,10))
for i in range(3) :
    img = Image.open(img_paths[i])
    plt.subplot(1,3,i+1)
    plt.imshow(img)
plt.show()

In [11]:
df = pd.read_csv('../input/hubmap-organ-segmentation/train.csv')
print(df.shape)
df.head(3)

In [12]:
df.info()

In [13]:
print(df['organ'].unique())

columns = ['organ', 'sex', 'pixel_size']
plt.figure(figsize = (15,6))
for i,col in enumerate(columns) :
    plt.subplot(1,3,i+1)
    sns.countplot(df[col])
    plt.title(col, size = 20)
plt.show()

In [14]:
m = df[df['sex'] == 'Male']
m_count = []
for i in m['organ'].unique() :
    a = m[m['organ'] == i]
    m_count.append(len(a))
f = df[df['sex'] == 'Female']
f_count = [0]
for i in f['organ'].unique() :
    a = f[f['organ'] == i]
    f_count.append(len(a))
print(m.shape,f.shape)

fig = plt.figure(figsize=(12, 8))
idx = np.arange(1,10,2)
plt.bar(idx+1.7, m_count, color = 'blue', width = 0.5, label = 'Male')
plt.bar(idx+2.2, f_count, color = 'red', width = 0.5, label = 'Female')
plt.xticks(idx+2, df['organ'].unique(), size = 15)
plt.legend()
plt.xlabel('Organ', size = 20)
plt.ylabel('Counts', size = 20)
plt.title('Organ distribution according to sex', size = 25)

In [34]:
sns.scatterplot(df['img_width'], df['img_height'], )

In [43]:
# images shape
shape = []
for i in range(len(df)):
    s = (df['img_width'][i], df['img_height'][i])
    shape.append(s)
print(set(shape))

In [46]:
print(df['tissue_thickness'].unique(), df['data_source'].unique())

In [60]:
test_path = glob.glob('../input/hubmap-organ-segmentation/test_images/*tiff')
print(test_path)
test_img = Image.open(test_path[0])
test_img = np.array(test_img)
print(test_img.shape)
plt.figure(figsize = (15,5))
plt.imshow(test_img)

In [63]:
df_test = pd.read_csv('../input/hubmap-organ-segmentation/test.csv')
print(df_test)
df_sub = pd.read_csv('../input/hubmap-organ-segmentation/sample_submission.csv')
df_sub

In [86]:
# get train_annotations
import json

ann_paths = glob.glob('../input/hubmap-organ-segmentation/train_annotations/*json')
anns = []
for path in ann_paths :
    with open(path) as f:
        ann = json.load(f)
        anns.append(ann)
anns = np.array(anns)
print(len(anns))
anns.shape

In [88]:
with open(ann_paths[0]) as f :
    a = json.load(f)
len(a)

In [89]:
# 시작점과 끝나는점 리턴하는 함수 생성
def Rle_split(rle) :
  n = np.array(rle.split(' '), dtype = int)
  start = n[::2]
  lengths = n[1::2]
  end = start+lengths
  return start,end

# 백지 만들고 mask에 해당하는 부분 그리도록 하는 함수 생성
def Draw(shape, start, end, color) :
  if len(shape) == 3 :
    h,w,c = shape
    white = np.zeros((h*w,c), dtype = np.float32)
  else :
    h,w = shape
    white = np.zeros((h*w), dtype = np.float32)

  for s,e in zip(start,end) :
    white[s:e] = color
  return white.reshape(shape).T

In [91]:
plt.figure(figsize=(15,10))
for i in range(6) :
  rle = df['rle'][i]
  start, end = Rle_split(rle)
  a = Draw((3000,3000), start, end, 1)
  plt.subplot(2,3,i+1)
  # plt.xticks([]);plt.yticks([])
  plt.axis("off")
  plt.imshow(a)