In [None]:
import os
data_folder = "../input/uw-madison-gi-tract-image-segmentation/" if os.environ.get("KAGGLE_KERNEL_RUN_TYPE", "") else "./data/"

# List all imports below
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scv_utility import *
import torch

np.random.seed(0)
torch.manual_seed(0)
pd.set_option("display.width", 120)

In [None]:
# Load small train and test datasets
data = pd.read_csv(data_folder + "train.csv", converters={"id": str, "class": str, "segmentation": str})
print(f"Classes in train set: {data['class'].unique()}")

train_cases = ["case2_", "case7_", "case15_", "case20_", "case22_", "case24_", "case29_", "case30_", "case32_", "case123_"]
test_cases = ["case156_", "case154_", "case149_"]
train_data = data[data["id"].str.contains("|".join(train_cases))]
test_data = data[data["id"].str.contains("|".join(test_cases))]
print(f"Number of train images: {len(train_data['id'].unique())}, test images: {len(test_data['id'].unique())}")

In [None]:
# Analyze train and test dataset to assure they have the same resolution
for sample_id in np.concatenate((train_data["id"].unique(), test_data["id"].unique())):
    try:
        sample_image, sample_image_res, sample_pixel_size = get_image_data_from_id(sample_id, data_folder)
#         print(f"Image shape: {sample_image.shape}, reported resolution: {sample_image_res}, reported pixel size: {sample_pixel_size}")
        assert sample_image_res == (266, 266) and sample_pixel_size == 1.50, "Incorrect resolution or pixel size"
    except Exception as e:
        print(f"Exception {e} while reading image {sample_id}")
print("Dataset analysis successfull")

In [None]:
# Display an example of a converted run-length encoding
example_id = "case123_day20_slice_0067"

# Extract the necessary data
sample_image, sample_image_res, _ = get_image_data_from_id(example_id, data_folder)
rle = extract_rle(data, example_id, "stomach")
x, y = rle_to_xy(rle, sample_image_res[0], sample_image_res[1])

# Make the images
imgplot = plt.imshow(sample_image)
plt.show()
imgplot = plt.imshow(sample_image)
plt.fill(x, y, color="r", alpha=0.3)
plt.show()