# Install and Import modules

In [47]:
# !tar xfvz /kaggle/input/ultralytics-for-offline-install/archive.tar.gz
# !pip install --no-index --find-links=./packages ultralytics
# !rm -rf ./packages

In [48]:
# !cp -r '/kaggle/input/hengck-czii-cryo-et-01/wheel_file' '/kaggle/working/'
# !pip install /kaggle/working/wheel_file/asciitree-0.3.3/asciitree-0.3.3
# !pip install --no-index --find-links=/kaggle/working/wheel_file zarr

In [49]:
import zarr
from ultralytics import YOLO
from tqdm import tqdm
import glob, os
import torch

In [50]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2

In [51]:
DATA_KAGGLE_DIR = '../../raw'
EXP_NAME = "baseline_cv_syn_data_transfer_mod"
LIST_CV = [
    'TS_5_4',
    'TS_69_2',
    'TS_6_4',
    'TS_6_6',
    'TS_73_6',
    'TS_86_3',
    'TS_99_9',
]

In [52]:
# WANDB = True
# WANDB_EXP_NAME = f"{EXP_NAME}"
# # EXP_NAME = "try"

# if WANDB:
#     # !pip install wandb
#     import wandb
#     import os
#     from dotenv import load_dotenv
#     load_dotenv()
#     wandb.login(key=os.environ.get("WANDB_API_KEY"))

We use a recursive function in this notebook, and we change the settings to explore the graph deep enough.

In [53]:
import sys
sys.setrecursionlimit(10000)

In [54]:
import warnings
warnings.simplefilter('ignore')
np.warnings = warnings

# Prepare trained YOLO model

In [55]:
#add by @minfuka
assert torch.cuda.device_count() == 1

In [56]:
particle_names = ['apo-ferritin', 'beta-amylase', 'beta-galactosidase', 'ribosome', 'thyroglobulin', 'virus-like-particle']

In [57]:
p2i_dict = {
        'apo-ferritin': 0,
        'beta-amylase': 1,
        'beta-galactosidase': 2,
        'ribosome': 3,
        'thyroglobulin': 4,
        'virus-like-particle': 5
    }

i2p = {v:k for k, v in p2i_dict.items()}

In [58]:
particle_radius = {
        'apo-ferritin': 60,
        'beta-amylase': 65,
        'beta-galactosidase': 90,
        'ribosome': 150,
        'thyroglobulin': 130,
        'virus-like-particle': 135,
    }

# define Main process class
There are many variables and functions going back and forth. We can easily manage variables by defining classes.

In [59]:
class PredAggForYOLO:
    def __init__(self, first_conf=0.2, final_conf=0.3, conf_coef=0.75):
        self.first_conf = first_conf # threshold of confidence yolo
        self.final_conf = final_conf # final threshold score (not be used in version 14)
        self.conf_coef = conf_coef # if found many points, give bonus
        self.particle_confs = [0.5, 0.0, 0.2, 0.5, 0.2, 0.5] # be strict to easy labels 

    def convert_to_8bit(self, x):
        lower, upper = np.percentile(x, (0.5, 99.5))
        x = np.clip(x, lower, upper)
        x = (x - x.min()) / (x.max() - x.min() + 1e-12) * 255
        return x.round().astype("uint8")

    # depth first search.
    # aggregate the coordinates and confidence scores of connected graphs.
    def dfs(self, v):
        self.passed[v] = True
        self.conf_sum += self.pdf.iloc[v].confidence
        self.cx += self.pdf.iloc[v].x
        self.cy += self.pdf.iloc[v].y
        self.cz += self.pdf.iloc[v].z
        self.nv += 1
        for next_v in self.adjacency_list[v]:
            if (self.passed[next_v]): continue
            self.dfs(next_v)

    # main routine.
    # change by @minfuka
    # def make_predict_yolo(self, r, model):
    def make_predict_yolo(self, r, model, device_no):
        vol = zarr.open(f'{valid_dir}/static/ExperimentRuns/{r}/VoxelSpacing10.000/denoised.zarr', mode='r')
        vol = vol[0]
        vol2 = self.convert_to_8bit(vol)
        n_imgs = vol2.shape[0]
    
        df = pd.DataFrame()
    
        pts = []
        confs = []
        xs = []
        ys = []
        zs = []
        
        for i in range(n_imgs):
            # Unfortunately the image size needs to be a multiple of 32.
            tmp_img = np.zeros((630, 630))
            tmp_img[:] = vol2[i]
    
            inp_arr = np.stack([tmp_img]*3,axis=-1)
            inp_arr = cv2.resize(inp_arr, (640,640))

            # change by @minfuka
            # res = model.predict(inp_arr, save=False, imgsz=640, conf=self.first_conf, device="0", batch=1, verbose=False)
            res = model.predict(inp_arr, save=False, imgsz=640, conf=self.first_conf, device=device_no, batch=1, verbose=False)
            for j, result in enumerate(res):
                boxes = result.boxes # Boxes object for bounding box outputs    
                for k in range(len(boxes.cls)):
                    ptype = i2p[boxes.cls.cpu().numpy()[k]] # particle type
                    conf = boxes.conf.cpu().numpy()[k] # confidence score
                    # YOLO can infer (start_x, end_x, start_y, end_y)
                    xc = (boxes.xyxy[k,0] + boxes.xyxy[k,2]) / 2.0 * 10 * (63/64)
                    yc = (boxes.xyxy[k,1] + boxes.xyxy[k,3]) / 2.0 * 10 * (63/64)
                    zc = i * 10 + 5
    
                    pts.append(ptype)
                    confs.append(conf)
                    xs.append(xc.cpu().numpy())
                    ys.append(yc.cpu().numpy())
                    zs.append(zc)           
                
        df['experiment'] = [r] * len(xs)
        df['particle_type'] = pts
        df['confidence'] = confs
        df['x'] = xs
        df['y'] = ys
        df['z'] = zs

        # df includes overall canditate of CIRCLE. 
        df = df.sort_values(['particle_type', 'z'], ascending=[True, True])

        return df

In [60]:
# instance main class
agent = PredAggForYOLO(first_conf=0.01, final_conf=0.2, conf_coef=0.5) # final_conf is not used after version 14
# subs = []

In [61]:
# subs = []

In [62]:
import time
#add by @minfuka
from concurrent.futures import ProcessPoolExecutor #add by @minfuka

# main loop of inference

In [63]:
valid_dir =f'{DATA_KAGGLE_DIR}/train'
list_model_path = [
    f"../../runs/detect/{EXP_NAME}/weights/best.pt",
    f"../../runs/detect/{EXP_NAME}2/weights/best.pt",
    f"../../runs/detect/{EXP_NAME}3/weights/best.pt",
    f"../../runs/detect/{EXP_NAME}4/weights/best.pt",
    f"../../runs/detect/{EXP_NAME}5/weights/best.pt",
    f"../../runs/detect/{EXP_NAME}6/weights/best.pt",
    f"../../runs/detect/{EXP_NAME}7/weights/best.pt",
]

In [64]:
#add by @minfuka
def inference(runs, model, device_no):
    subs = []
    for r in tqdm(runs, total=len(runs)):
        df = agent.make_predict_yolo(r, model, device_no)
        subs.append(df)
    
    return subs

In [65]:

# tick = time.time()
#change by @minfuka
subs = []
for r, model_path in tqdm(zip(LIST_CV, list_model_path), total=len(LIST_CV)):
    model = YOLO(model_path)
    df = agent.make_predict_yolo(r, model, "0")
    subs.append(df)
# with ProcessPoolExecutor(max_workers=2) as executor:
#     results = list(executor.map(inference, (runs1, runs2), (model, model), ("0", "1")))
# tock = time.time()

100%|██████████| 7/7 [02:20<00:00, 20.06s/it]


In [66]:
df.head()

Unnamed: 0,experiment,particle_type,confidence,x,y,z
0,TS_99_9,apo-ferritin,0.056937,334.58252,770.4638,5
1,TS_99_9,apo-ferritin,0.039369,1748.5933,1216.0859,5
2,TS_99_9,apo-ferritin,0.023246,658.88586,839.3009,5
3,TS_99_9,apo-ferritin,0.01857,440.79077,618.9655,5
6,TS_99_9,apo-ferritin,0.051313,411.9216,65.310455,25


In [67]:
#submission = pd.concat(subs).reset_index(drop=True)
#change by @minfuka
# submission1 = pd.concat(results[1])
# if len(valid_id) == 1:
#     submission = submission1.copy()
# else:
#     submission0 = pd.concat(results[0])
#     submission = pd.concat([submission0, submission1]).reset_index(drop=True)
submission = pd.concat(subs).reset_index(drop=True)
# submission.insert(0, 'id', range(len(submission)))

In [68]:
submission.to_csv(f"../../proc/pred_yolo/pred_yolo_{EXP_NAME}.csv", index=False)
submission.head()

Unnamed: 0,experiment,particle_type,confidence,x,y,z
0,TS_5_4,apo-ferritin,0.150403,5464.939,1967.273,5
1,TS_5_4,apo-ferritin,0.127206,4143.2324,278.86343,5
2,TS_5_4,apo-ferritin,0.114715,5384.1123,5010.9272,5
3,TS_5_4,apo-ferritin,0.092003,5524.3286,6233.701,5
4,TS_5_4,apo-ferritin,0.043141,4973.415,4839.5684,5


In [69]:
submission.shape

(104732, 6)