# Install and Import modules

In [1]:
# !tar xfvz /kaggle/input/ultralytics-for-offline-install/archive.tar.gz
# !pip install --no-index --find-links=./packages ultralytics
# !rm -rf ./packages

In [2]:
# !cp -r '/kaggle/input/hengck-czii-cryo-et-01/wheel_file' '/kaggle/working/'
# !pip install /kaggle/working/wheel_file/asciitree-0.3.3/asciitree-0.3.3
# !pip install --no-index --find-links=/kaggle/working/wheel_file zarr

In [3]:
import zarr
from ultralytics import YOLO
from tqdm import tqdm
import glob, os
import torch

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import DBSCAN
import cv2

In [5]:
DATA_KAGGLE_DIR = '../../raw'
EXP_NAME = "baseline_cv_syn_data_transfer_mod_last"
BEST_OR_LAST = "last"
LIST_CV = [
    'TS_5_4',
    'TS_69_2',
    'TS_6_4',
    'TS_6_6',
    'TS_73_6',
    'TS_86_3',
    'TS_99_9',
]

In [6]:
WANDB = False
WANDB_EXP_NAME = f"{EXP_NAME}_optuna_best"
# EXP_NAME = "try"

if WANDB:
    # !pip install wandb
    import wandb
    import os
    from dotenv import load_dotenv
    load_dotenv()
    wandb.login(key=os.environ.get("WANDB_API_KEY"))

We use a recursive function in this notebook, and we change the settings to explore the graph deep enough.

In [7]:
import sys
sys.setrecursionlimit(10000)

In [8]:
import warnings
warnings.simplefilter('ignore')
np.warnings = warnings

# Prepare trained YOLO model

In [9]:
#add by @minfuka
assert torch.cuda.device_count() == 1

In [10]:
particle_names = ['apo-ferritin', 'beta-amylase', 'beta-galactosidase', 'ribosome', 'thyroglobulin', 'virus-like-particle']

In [11]:
p2i_dict = {
        'apo-ferritin': 0,
        'beta-amylase': 1,
        'beta-galactosidase': 2,
        'ribosome': 3,
        'thyroglobulin': 4,
        'virus-like-particle': 5
    }

i2p = {v:k for k, v in p2i_dict.items()}

In [12]:
dict_params = {
    'apo-ferritin': {
        'threshold_conf': 0.2866886276864816,
        'particle_radius': 65 * 0.3330259963871552,
        'particle_min_sample': 1,
        'particle_metric': "euclidean",
        'weighted_mean': 1,
        'threshold_total_conf': 0.29662649021298226,
    },
    'beta-amylase': {
        'threshold_conf': 999,
        'particle_radius': 65,
        'particle_min_sample': 999,
        'particle_metric': "euclidean",
        'weighted_mean': 999,
        'threshold_total_conf': 999,
    },
    'beta-galactosidase': {
        'threshold_conf': 0.03804188733411555,
        'particle_radius': 95 * 0.6125882701766436,
        'particle_min_sample': 2,
        'particle_metric': "manhattan",
        'weighted_mean': 0,
        'threshold_total_conf': 0.5481277130533789,
    },
    'ribosome': {
        'threshold_conf': 0.18266253469712418,
        'particle_radius': 150 * 0.36301986884673526,
        'particle_min_sample': 8,
        'particle_metric': "manhattan",
        'weighted_mean': 1,
        'threshold_total_conf': 1.5194529712970373,
    },
    'thyroglobulin': {
        'threshold_conf': 0.04784271636173772,
        'particle_radius': 135 * 0.5034473145901497,
        'particle_min_sample': 6,
        'particle_metric': "manhattan",
        'weighted_mean': 1,
        'threshold_total_conf': 0.5774365781570538,
    },
    'virus-like-particle': {
        'threshold_conf': 0.10075937641227564,
        'particle_radius': 145 * 1.4179581067925828,
        'particle_min_sample': 13,
        'particle_metric': "euclidean",
        'weighted_mean': 0,
        'threshold_total_conf': 0.37854066112353174,
    },
}

# define Main process class
There are many variables and functions going back and forth. We can easily manage variables by defining classes.

In [13]:
class PredAggForYOLO:
    def __init__(self, first_conf=0.2):
        self.first_conf = first_conf # threshold of confidence yolo

    def convert_to_8bit(self, x):
        lower, upper = np.percentile(x, (0.5, 99.5))
        x = np.clip(x, lower, upper)
        x = (x - x.min()) / (x.max() - x.min() + 1e-12) * 255
        return x.round().astype("uint8")

    # main routine.
    # change by @minfuka
    # def make_predict_yolo(self, r, model):
    def make_predict_yolo(self, r, model, device_no):
        vol = zarr.open(f'{valid_dir}/static/ExperimentRuns/{r}/VoxelSpacing10.000/denoised.zarr', mode='r')
        vol = vol[0]
        vol2 = self.convert_to_8bit(vol)
        n_imgs = vol2.shape[0]
    
        df = pd.DataFrame()
    
        pts = []
        confs = []
        xs = []
        ys = []
        zs = []
        
        for i in range(n_imgs):
            # Unfortunately the image size needs to be a multiple of 32.
            tmp_img = np.zeros((630, 630))
            tmp_img[:] = vol2[i]
    
            inp_arr = np.stack([tmp_img]*3,axis=-1)
            inp_arr = cv2.resize(inp_arr, (640,640))

            # change by @minfuka
            # res = model.predict(inp_arr, save=False, imgsz=640, conf=self.first_conf, device="0", batch=1, verbose=False)
            res = model.predict(inp_arr, save=False, imgsz=640, conf=self.first_conf, device=device_no, batch=1, verbose=False)
            for j, result in enumerate(res):
                boxes = result.boxes # Boxes object for bounding box outputs    
                for k in range(len(boxes.cls)):
                    ptype = i2p[boxes.cls.cpu().numpy()[k]] # particle type
                    conf = boxes.conf.cpu().numpy()[k] # confidence score
                    # YOLO can infer (start_x, end_x, start_y, end_y)
                    xc = (boxes.xyxy[k,0] + boxes.xyxy[k,2]) / 2.0 * 10 * (63/64)
                    yc = (boxes.xyxy[k,1] + boxes.xyxy[k,3]) / 2.0 * 10 * (63/64)
                    zc = i * 10 + 5
    
                    pts.append(ptype)
                    confs.append(conf)
                    xs.append(xc.cpu().numpy().item())  # numpy.float64 -> float
                    ys.append(yc.cpu().numpy().item())  # numpy.float64 -> float
                    zs.append(float(zc))  # 念のためfloatに変換         

        df['experiment'] = [r] * len(xs)
        df['particle_type'] = pts
        df['confidence'] = confs
        df['x'] = xs
        df['y'] = ys
        df['z'] = zs

        # df includes overall canditate of CIRCLE. 
        df = df.sort_values(['particle_type', 'z'], ascending=[True, True])

        final = []
        for pidx, p in enumerate(particle_names):
            if p == 'beta-amylase':
                continue
            params = dict_params[p]
            list_groups = []
    
            pdf = df[df['particle_type'] == p].reset_index(drop=True)
            ###
            #debug
            # if pidx==0:
            #     pdf = pdf[:1]
            ###
            pdf = pdf[pdf['confidence'] > params["threshold_conf"]].reset_index(drop=True)
            if pdf.empty:
                # 空の場合は空の DataFrame を追加するか、次の粒子にスキップする
                list_groups.append(pd.DataFrame(columns=df.columns))
                continue
            p_rad = params["particle_radius"]
    
            grouped = pdf.groupby(['experiment'])
    
            for exp, group in grouped:
                group = group.reset_index(drop=True)
                # display(group.head(2)) # debug
                if group.empty:
                    continue  # グループ内にデータがない場合はスキップ
                # 1行だけの場合はクラスタリングせずそのまま扱う
                if len(group) < 2:
                    group = group[['experiment', 'particle_type', 'x', 'y', 'z']]
                    list_groups.append(group)
                    continue
    
                coords = group[['x', 'y', 'z']].values
                db = DBSCAN(
                    eps=p_rad,
                    min_samples=params["particle_min_sample"],
                    metric=params["particle_metric"]
                ).fit(coords)
                labels = db.labels_
    
                group['cluster'] = labels
            
                # 各クラスタごとに集約処理
                cluster_list = []
                for cluster_id in np.unique(labels):
                    if cluster_id == -1:
                        continue
    
                    cluster_points = group[group['cluster'] == cluster_id]
                    if cluster_points.empty:
                        continue  # 万が一、クラスタが空ならスキップ
    
                    if params["weighted_mean"]==1:
                        avg_x = (cluster_points['x'] * cluster_points['confidence']).sum() / cluster_points['confidence'].sum()
                        avg_y = (cluster_points['y'] * cluster_points['confidence']).sum() / cluster_points['confidence'].sum()
                        avg_z = (cluster_points['z'] * cluster_points['confidence']).sum() / cluster_points['confidence'].sum()
                    else:
                        avg_x = cluster_points['x'].mean()
                        avg_y = cluster_points['y'].mean()
                        avg_z = cluster_points['z'].mean()
                    total_conf = cluster_points['confidence'].sum()
    
                    # 各クラスタ内の全行を平均値で更新した上で、重複行は drop
                    cluster_points = cluster_points.copy()  # 警告回避のため
                    cluster_points.loc[:, ['x', 'y', 'z', "total_conf"]] = avg_x, avg_y, avg_z, total_conf
                    cluster_points = cluster_points.drop_duplicates(subset=['x', 'y', 'z'])
                    cluster_list.append(cluster_points)           
    
                # グループ内でクラスタ処理の結果が得られた場合
                if cluster_list:
                    group_processed = pd.concat(cluster_list, ignore_index=True)
                else:
                    group = group[['experiment', 'particle_type', 'x', 'y', 'z']]
                    group_processed = group
                list_groups.append(group_processed)
            
            # groupごとの結果を結合。もし list_groups が空なら空の DataFrame を追加
            if list_groups:
                tmp = pd.concat(list_groups, ignore_index=True)
                # total_conf による最終フィルタ（存在すれば）
                if "total_conf" in tmp.columns:
                    tmp = tmp[tmp["total_conf"] >= params["threshold_total_conf"]]
                    tmp = tmp.drop(columns=["confidence"], errors='ignore')
                final.append(tmp)
            else:
                final.append(pd.DataFrame(columns=['experiment', 'particle_type', 'x', 'y', 'z']))
    
        submission = pd.concat(final, ignore_index=True)
        submission = submission.drop(columns=['cluster'], errors='ignore')
        submission = submission.sort_values(by=['experiment', 'particle_type']).reset_index(drop=True)
    
        return submission

In [14]:
# instance main class
agent = PredAggForYOLO(first_conf=0.03) # final_conf is not used after version 14

In [15]:
# subs = []

In [16]:
import time
#add by @minfuka
from concurrent.futures import ProcessPoolExecutor #add by @minfuka

# main loop of inference

In [17]:
valid_dir =f'{DATA_KAGGLE_DIR}/train'
list_model_path = [
    f"../../runs/detect/{EXP_NAME}/weights/{BEST_OR_LAST}.pt",
    f"../../runs/detect/{EXP_NAME}2/weights/{BEST_OR_LAST}.pt",
    f"../../runs/detect/{EXP_NAME}3/weights/{BEST_OR_LAST}.pt",
    f"../../runs/detect/{EXP_NAME}4/weights/{BEST_OR_LAST}.pt",
    f"../../runs/detect/{EXP_NAME}5/weights/{BEST_OR_LAST}.pt",
    f"../../runs/detect/{EXP_NAME}6/weights/{BEST_OR_LAST}.pt",
    f"../../runs/detect/{EXP_NAME}7/weights/{BEST_OR_LAST}.pt",
]

In [18]:
#add by @minfuka
def inference(runs, model, device_no):
    subs = []
    for r in tqdm(runs, total=len(runs)):
        df = agent.make_predict_yolo(r, model, device_no)
        subs.append(df)
    
    return subs

In [19]:

# tick = time.time()
#change by @minfuka
subs = []
for r, model_path in tqdm(zip(LIST_CV, list_model_path), total=len(LIST_CV)):
    model = YOLO(model_path)
    df = agent.make_predict_yolo(r, model, "0")
    subs.append(df)
# with ProcessPoolExecutor(max_workers=2) as executor:
#     results = list(executor.map(inference, (runs1, runs2), (model, model), ("0", "1")))
# tock = time.time()

100%|██████████| 7/7 [02:02<00:00, 17.51s/it]


In [20]:
df

Unnamed: 0,experiment,particle_type,x,y,z,total_conf
0,TS_99_9,apo-ferritin,3017.554678,335.316231,169.885488,0.667459
1,TS_99_9,apo-ferritin,3835.365039,204.700128,189.343651,0.690654
2,TS_99_9,apo-ferritin,1287.048894,1751.565213,250.156308,7.679961
3,TS_99_9,apo-ferritin,1238.642352,2053.683927,261.222193,7.197177
4,TS_99_9,apo-ferritin,4093.019487,853.142745,265.410399,5.642582
...,...,...,...,...,...,...
421,TS_99_9,virus-like-particle,4205.798359,5535.412578,865.000000,19.915178
422,TS_99_9,virus-like-particle,3018.216779,2469.716336,885.000000,20.789574
423,TS_99_9,virus-like-particle,2250.981122,4311.205566,965.000000,22.332033
424,TS_99_9,virus-like-particle,2017.602473,4754.325834,1070.000000,19.736984


In [21]:
#submission = pd.concat(subs).reset_index(drop=True)
#change by @minfuka
# submission1 = pd.concat(results[1])
# if len(valid_id) == 1:
#     submission = submission1.copy()
# else:
#     submission0 = pd.concat(results[0])
#     submission = pd.concat([submission0, submission1]).reset_index(drop=True)
submission = pd.concat(subs).reset_index(drop=True)
# submission.insert(0, 'id', range(len(submission)))

In [22]:
submission.head()

Unnamed: 0,experiment,particle_type,x,y,z,total_conf
0,TS_5_4,apo-ferritin,5873.855213,5130.346958,80.622236,4.601416
1,TS_5_4,apo-ferritin,5698.472007,5004.100515,54.761439,1.028822
2,TS_5_4,apo-ferritin,5745.529136,5106.552843,94.00384,4.899431
3,TS_5_4,apo-ferritin,5468.525229,1523.358845,84.735469,2.406607
4,TS_5_4,apo-ferritin,5711.706678,5000.332761,118.042708,4.933176


# Scoring

https://www.kaggle.com/code/hengck23/3d-unet-using-2d-image-encoder/notebook

In [23]:
import sys
sys.path.append('hengck')

from czii_helper import *
from dataset import *
from model2 import *
import numpy as np
from scipy.optimize import linear_sum_assignment

In [24]:
def do_one_eval(truth, predict, threshold):
    P=len(predict)
    T=len(truth)

    if P==0:
        hit=[[],[]]
        miss=np.arange(T).tolist()
        fp=[]
        metric = [P,T,len(hit[0]),len(miss),len(fp)]
        return hit, fp, miss, metric

    if T==0:
        hit=[[],[]]
        fp=np.arange(P).tolist()
        miss=[]
        metric = [P,T,len(hit[0]),len(miss),len(fp)]
        return hit, fp, miss, metric

    #---
    distance = predict.reshape(P,1,3)-truth.reshape(1,T,3)
    distance = distance**2
    distance = distance.sum(axis=2)
    distance = np.sqrt(distance)
    p_index, t_index = linear_sum_assignment(distance)

    valid = distance[p_index, t_index] <= threshold
    p_index = p_index[valid]
    t_index = t_index[valid]
    hit = [p_index.tolist(), t_index.tolist()]
    miss = np.arange(T)
    miss = miss[~np.isin(miss,t_index)].tolist()
    fp = np.arange(P)
    fp = fp[~np.isin(fp,p_index)].tolist()

    metric = [P,T,len(hit[0]),len(miss),len(fp)] #for lb metric F-beta copmutation
    return hit, fp, miss, metric


def compute_lb(submit_df, overlay_dir):
    valid_id = list(submit_df['experiment'].unique())
    print(valid_id)

    eval_df = []
    for id in valid_id:
        truth = read_one_truth(id, overlay_dir) #=f'{valid_dir}/overlay/ExperimentRuns')
        id_df = submit_df[submit_df['experiment'] == id]
        for p in PARTICLE:
            p = dotdict(p)
            # print('\r', id, p.name, end='', flush=True)
            xyz_truth = truth[p.name]
            xyz_predict = id_df[id_df['particle_type'] == p.name][['x', 'y', 'z']].values
            hit, fp, miss, metric = do_one_eval(xyz_truth, xyz_predict, p.radius* 0.5)
            eval_df.append(dotdict(
                id=id, particle_type=p.name,
                P=metric[0], T=metric[1], hit=metric[2], miss=metric[3], fp=metric[4],
            ))
    print('')
    eval_df_all = pd.DataFrame(eval_df)
    gb_all = []
    lb_score_all = []
    for exp in LIST_CV:
        eval_df = eval_df_all[eval_df_all['id'] == exp]
        gb = eval_df.groupby('particle_type').agg('sum').drop(columns=['id'])
        gb.loc[:, 'precision'] = gb['hit'] / gb['P']
        gb.loc[:, 'precision'] = gb['precision'].fillna(0)
        gb.loc[:, 'recall'] = gb['hit'] / gb['T']
        gb.loc[:, 'recall'] = gb['recall'].fillna(0)
        gb.loc[:, 'f-beta4'] = 17 * gb['precision'] * gb['recall'] / (16 * gb['precision'] + gb['recall'])
        gb.loc[:, 'f-beta4'] = gb['f-beta4'].fillna(0)

        gb = gb.sort_values('particle_type').reset_index(drop=False)
        # https://www.kaggle.com/competitions/czii-cryo-et-object-identification/discussion/544895
        gb.loc[:, 'weight'] = [1, 0, 2, 1, 2, 1]
        lb_score = (gb['f-beta4'] * gb['weight']).sum() / gb['weight'].sum()
        gb_all.append(gb)
        lb_score_all.append(lb_score)
    return gb_all, lb_score_all


def score_submission(submission):
    #if 1:
    submit_df=submission.copy()
    gb_all, lb_score_all = compute_lb(submit_df, '../../raw/train/overlay/ExperimentRuns')
    for gb, lb_score in zip(gb_all, lb_score_all):
        display(gb)
        print(f'lb_score: {lb_score:.4f}')
        print('')
        print("--------------------------------")

    return lb_score_all


    #show one ----------------------------------
    # fig = plt.figure(figsize=(18, 8))

    # id = valid_id[0]
    # truth = read_one_truth(id,overlay_dir=f'{valid_dir}/overlay/ExperimentRuns')

    # submit_df = submit_df[submit_df['experiment']==id]
    # for p in PARTICLE:
    #     p = dotdict(p)
    #     xyz_truth = truth[p.name]
    #     xyz_predict = submit_df[submit_df['particle_type']==p.name][['x','y','z']].values
    #     hit, fp, miss, _ = do_one_eval(xyz_truth, xyz_predict, p.radius)
    #     print(id, p.name)
    #     print('\t num truth   :',len(xyz_truth) )
    #     print('\t num predict :',len(xyz_predict) )
    #     print('\t num hit  :',len(hit[0]) )
    #     print('\t num fp   :',len(fp) )
    #     print('\t num miss :',len(miss) )

    #     ax = fig.add_subplot(2, 3, p.label, projection='3d')
    #     if hit[0]:
    #         pt = xyz_predict[hit[0]]
    #         ax.scatter(pt[:, 0], pt[:, 1], pt[:, 2], alpha=0.5, color='r')
    #         pt = xyz_truth[hit[1]]
    #         ax.scatter(pt[:,0], pt[:,1], pt[:,2], s=80, facecolors='none', edgecolors='r')
    #     if fp:
    #         pt = xyz_predict[fp]
    #         ax.scatter(pt[:, 0], pt[:, 1], pt[:, 2], alpha=1, color='k')
    #     if miss:
    #         pt = xyz_truth[miss]
    #         ax.scatter(pt[:, 0], pt[:, 1], pt[:, 2], s=160, alpha=1, facecolors='none', edgecolors='k')

    #     ax.set_title(f'{p.name} ({p.difficulty})')

    # plt.tight_layout()
    # plt.show()
    
    # #--- 
    # zz=0

In [25]:
lb_score_all = score_submission(submission)

['TS_5_4', 'TS_69_2', 'TS_6_4', 'TS_6_6', 'TS_73_6', 'TS_86_3', 'TS_99_9']



Unnamed: 0,particle_type,P,T,hit,miss,fp,precision,recall,f-beta4,weight
0,apo-ferritin,58,46,42,4,16,0.724138,0.913043,0.899244,1
1,beta-amylase,0,10,0,10,0,0.0,0.0,0.0,0
2,beta-galactosidase,28,12,6,6,22,0.214286,0.5,0.463636,2
3,ribosome,34,31,22,9,12,0.647059,0.709677,0.70566,1
4,thyroglobulin,112,30,26,4,86,0.232143,0.866667,0.746622,2
5,virus-like-particle,11,11,11,0,0,1.0,1.0,1.0,1


lb_score: 0.7179

--------------------------------


Unnamed: 0,particle_type,P,T,hit,miss,fp,precision,recall,f-beta4,weight
0,apo-ferritin,69,35,35,0,34,0.507246,1.0,0.945946,1
1,beta-amylase,0,12,0,12,0,0.0,0.0,0.0,0
2,beta-galactosidase,45,16,12,4,33,0.266667,0.75,0.677741,2
3,ribosome,47,37,34,3,13,0.723404,0.918919,0.904538,1
4,thyroglobulin,120,34,28,6,92,0.233333,0.823529,0.716867,2
5,virus-like-particle,10,9,9,0,1,0.9,1.0,0.993506,1


lb_score: 0.8047

--------------------------------


Unnamed: 0,particle_type,P,T,hit,miss,fp,precision,recall,f-beta4,weight
0,apo-ferritin,97,58,54,4,43,0.556701,0.931034,0.89561,1
1,beta-amylase,0,9,0,9,0,0.0,0.0,0.0,0
2,beta-galactosidase,38,12,10,2,28,0.263158,0.833333,0.73913,2
3,ribosome,113,74,67,7,46,0.59292,0.905405,0.87818,1
4,thyroglobulin,126,30,24,6,102,0.190476,0.8,0.673267,2
5,virus-like-particle,10,10,8,2,2,0.8,0.8,0.8,1


lb_score: 0.7712

--------------------------------


Unnamed: 0,particle_type,P,T,hit,miss,fp,precision,recall,f-beta4,weight
0,apo-ferritin,80,41,40,1,40,0.5,0.97561,0.923913,1
1,beta-amylase,0,14,0,14,0,0.0,0.0,0.0,0
2,beta-galactosidase,78,11,10,1,68,0.128205,0.909091,0.669291,2
3,ribosome,28,23,21,2,7,0.75,0.913043,0.901515,1
4,thyroglobulin,187,35,28,7,159,0.149733,0.8,0.637216,2
5,virus-like-particle,23,19,17,2,6,0.73913,0.894737,0.883792,1


lb_score: 0.7603

--------------------------------


Unnamed: 0,particle_type,P,T,hit,miss,fp,precision,recall,f-beta4,weight
0,apo-ferritin,198,95,89,6,109,0.449495,0.936842,0.880675,1
1,beta-amylase,0,12,0,12,0,0.0,0.0,0.0,0
2,beta-galactosidase,33,14,9,5,24,0.272727,0.642857,0.595331,2
3,ribosome,61,46,41,5,20,0.672131,0.891304,0.874529,1
4,thyroglobulin,111,28,22,6,89,0.198198,0.785714,0.669052,2
5,virus-like-particle,23,22,22,0,1,0.956522,1.0,0.997333,1


lb_score: 0.7545

--------------------------------


Unnamed: 0,particle_type,P,T,hit,miss,fp,precision,recall,f-beta4,weight
0,apo-ferritin,107,64,64,0,43,0.598131,1.0,0.961981,1
1,beta-amylase,0,9,0,9,0,0.0,0.0,0.0,0
2,beta-galactosidase,53,23,19,4,34,0.358491,0.826087,0.767221,2
3,ribosome,68,55,46,9,22,0.676471,0.836364,0.824895,1
4,thyroglobulin,129,45,34,11,95,0.263566,0.755556,0.680801,2
5,virus-like-particle,31,29,29,0,2,0.935484,1.0,0.99596,1


lb_score: 0.8113

--------------------------------


Unnamed: 0,particle_type,P,T,hit,miss,fp,precision,recall,f-beta4,weight
0,apo-ferritin,71,36,36,0,35,0.507042,1.0,0.945904,1
1,beta-amylase,0,21,0,21,0,0.0,0.0,0.0,0
2,beta-galactosidase,70,24,13,11,57,0.185714,0.541667,0.486784,2
3,ribosome,78,65,53,12,25,0.679487,0.815385,0.805903,1
4,thyroglobulin,192,49,39,10,153,0.203125,0.795918,0.679303,2
5,virus-like-particle,15,13,13,0,2,0.866667,1.0,0.991031,1


lb_score: 0.7250

--------------------------------


In [26]:
# wandbの初期化
if WANDB:
    wandb_config = {
        # ... 既存の設定 ...
        # "epochs": CONFIG['epochs'],
        # "learning_rate": CONFIG['learning_rate'],
        # "min_lr": CONFIG["min_lr"],
        # "weight_decay": CONFIG["weight_decay"],
        # "mixup_alpha": CONFIG["mixup_alpha"],
        # "mixup_epochs": CONFIG["mixup_epochs"],  # 新しく追加
    }
    wandb.init(project="CZII", name=WANDB_EXP_NAME, config=wandb_config)

for exp, score in zip(LIST_CV, lb_score_all):
    print(f'lb_score: {score:.4f}')
    if WANDB:
        wandb.log({f"lb_score_{exp}": score})
print(f'mean: {np.mean(lb_score_all):.4f}')
if WANDB:
    wandb.log({"mean_lb_score": np.mean(lb_score_all)})
    wandb.finish()



lb_score: 0.7179
lb_score: 0.8047
lb_score: 0.7712
lb_score: 0.7603
lb_score: 0.7545
lb_score: 0.8113
lb_score: 0.7250
mean: 0.7636


In [27]:
submission.to_csv(f"../../proc/sub/submission_kaggle_{EXP_NAME}_{BEST_OR_LAST}_{np.mean(lb_score_all):.4f}.csv", index=False)


In [28]:
submission.head()

Unnamed: 0,experiment,particle_type,x,y,z,total_conf
0,TS_5_4,apo-ferritin,5873.855213,5130.346958,80.622236,4.601416
1,TS_5_4,apo-ferritin,5698.472007,5004.100515,54.761439,1.028822
2,TS_5_4,apo-ferritin,5745.529136,5106.552843,94.00384,4.899431
3,TS_5_4,apo-ferritin,5468.525229,1523.358845,84.735469,2.406607
4,TS_5_4,apo-ferritin,5711.706678,5000.332761,118.042708,4.933176
