# Hubmap image conversion

> Convert tiff files to zarr files with scale factor

***

- V10: Adding test set

In [None]:
# Install zarr and load packages
!pip install -qq zarr
import cv2, zarr, tifffile
import matplotlib.pyplot as plt, numpy as np, pandas as pd
from pathlib import Path

In [None]:
def read_image(image_id, path, scale=None, verbose=1):
    "Load images with ID from path"
    
    try: 
        image = tifffile.imread(path/f"train/{image_id}.tiff")
    except:
        image = tifffile.imread(path/f"test/{image_id}.tiff")
    
    if len(image.shape) == 5:
        image = image.squeeze().transpose(1, 2, 0)
    elif image.shape[0] == 3:
        image = image.transpose(1, 2, 0)
    
    if verbose:
        print(f"[{image_id}] Image shape: {image.shape}")
    
    if scale:
        new_size = (image.shape[1] // scale, image.shape[0] // scale)
        image = cv2.resize(image, new_size)
        
        if verbose:
            print(f"[{image_id}] Resized Image shape: {image.shape}")
        
    return image

# Settings

In [None]:
scale = 2

path = Path('/kaggle/input/hubmap-kidney-segmentation')
df_train = pd.read_csv(path/"train.csv")
df_sample = pd.read_csv(path/"sample_submission.csv")
g_out = zarr.group(f'/kaggle/working/images_scale{scale}')

## Loop over images

In [None]:
for idx in df_sample['id'].tolist()+df_train['id'].tolist():
    img = read_image(idx, path, scale=scale)
    g_out[idx] = img
    print(g_out[idx].info)
    shape = g_out[idx].shape
    
    plt.imshow(cv2.resize(img, dsize=(512, 512*shape[0]//shape[1])))
    plt.show()