# Setup

In [35]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedGroupKFold, GroupKFold
from sklearn.metrics import f1_score
from sklearn.base import clone

from tqdm import tqdm
import os
import random
import ipywidgets as widgets
import warnings
import json
import gc

In [36]:
class CFG:
    train_path = "/kaggle/input/MABe-mouse-behavior-detection/train.csv"
    test_path = "/kaggle/input/MABe-mouse-behavior-detection/test.csv"
    sample_submission_path = "/kaggle/input/MABe-mouse-behavior-detection/sample_submission.csv"
    train_annotation_path = "/kaggle/input/MABe-mouse-behavior-detection/train_annotation"
    train_tracking_path = "/kaggle/input/MABe-mouse-behavior-detection/train_tracking"
    test_tracking_path = "/kaggle/input/MABe-mouse-behavior-detection/test_tracking"
    output_path = "/kaggle/working/"
    
    mode = "validate"
    # mode = "submit"
    
    n_splits = 3
    cv = StratifiedGroupKFold(n_splits)
    seed = 44
    
    activate_log = True  # for mlflow

In [37]:
# SEED 
os.environ["PYTHONHASHSEED"] = str(CFG.seed) 
rnd = np.random.RandomState(CFG.seed)
random.seed(CFG.seed)
np.random.seed(CFG.seed)

In [38]:
train_df = pd.read_csv(CFG.train_path)
test_df = pd.read_csv(CFG.test_path)
ss_df = pd.read_csv(CFG.sample_submission_path)

# Test data

In [39]:
# Meta data
test_df.iloc[0]

lab_id                                                   AdaptableSnail
video_id                                                      438887472
mouse1_strain                                                CD-1 (ICR)
mouse1_color                                                      white
mouse1_sex                                                         male
mouse1_id                                                          13.0
mouse1_age                                                   8-12 weeks
mouse1_condition                                        wireless device
mouse2_strain                                                CD-1 (ICR)
mouse2_color                                                      white
mouse2_sex                                                         male
mouse2_id                                                          27.0
mouse2_age                                                   8-12 weeks
mouse2_condition                                        wireless

In [40]:
# Tracking data
test_tracking_df = pd.read_parquet("/kaggle/input/MABe-mouse-behavior-detection/test_tracking/AdaptableSnail/438887472.parquet")

In [41]:
# Submission 
ss_df

Unnamed: 0,row_id,video_id,agent_id,target_id,action,start_frame,stop_frame
0,0,438887472,mouse1,mouse2,sniff,0,1


# Data Pre-Processing

In [42]:
"""
Remove MABe labs (missing)
Count mice in per frame
"""
print(f"Original: {train_df.shape}")

# Remove 'MABe22_keypoints', 'MABe22_movies' from train data
remove_list = ["MABe22_keypoints", "MABe22_movies"]
train_without_mabe_df = train_df[~train_df["lab_id"].isin(remove_list)].copy()

# count mice in one frame (4 is MAX)
train_without_mabe_df['n_mice'] = 4 - train_without_mabe_df[['mouse1_strain', 'mouse2_strain', 'mouse3_strain', 'mouse4_strain']].isna().sum(axis=1).copy()

print(f"After removing MABe labs: {train_without_mabe_df.shape}")

Original: (8789, 38)
After removing MABe labs: (863, 39)


## Integrate with Tracking data

1. Parquet の tracking データを読み込む（座標情報）
2. bodypart（体の部位）を整理して不要部分を削除
3. (mouse_id, bodypart) × frame の特徴行列に変換（ピボット）
4. cm にスケール変換
5. 行動アノテーション（JSON or parquet）を読み込む
6. 単独行動（single mouse）/ ペア行動（pair of mice）の 特徴行列 + メタ + ラベル を 1サンプルずつ yield する

In [43]:
drop_body_parts =  [
    'headpiece_bottombackleft', 'headpiece_bottombackright', 'headpiece_bottomfrontleft', 'headpiece_bottomfrontright', 
    'headpiece_topbackleft', 'headpiece_topbackright', 'headpiece_topfrontleft', 'headpiece_topfrontright', 
    'spine_1', 'spine_2', 'tail_middle_1', 'tail_middle_2', 'tail_midpoint'
]

# Only these body parts are used
# [
#  "body_center",
#  "ear_left",
#  "ear_right",
#  "lateral_left",
#  "lateral_right",
#  "neck",
#  "nose",
#  "tail_base",
#  "tail_tip"
# ]

In [64]:
def generate_mouse_data(df, traintest, traintest_directory=None, generate_single=True, generate_pair=True):
    
    if traintest_directory is None:
        traintest_directory = f"/kaggle/input/MABe-mouse-behavior-detection/{traintest}_tracking"
        
    for idx, row in df.iterrows():
        lab_id = row.lab_id
        if lab_id.startswith('MABe22') or type(row.behaviors_labeled) != str: 
            continue
        
        video_id = row.video_id
        path = f"{traintest_directory}/{lab_id}/{video_id}.parquet"
        vid = pd.read_parquet(path)
        if len(np.unique(vid.bodypart)) > 5:
            vid = vid[~vid["bodypart"].isin(drop_body_parts)]
        pvid = vid.pivot(columns=['mouse_id', 'bodypart'], index='video_frame', values=['x', 'y'])

        del vid
        gc.collect()
        
        pvid = pvid.reorder_levels([1, 2, 0], axis=1).T.sort_index().T
        # frame | mouse1_head_x | mouse1_head_y | mouse1_nose_x | ... | mouse2_head_x | ...
    
        pvid /= row.pix_per_cm_approx  # pixcel to cm
        # print("---------- pvid ----------")
        # print(pvid.columns)
        # print()
        vid_behaviors = json.loads(row.behaviors_labeled)
        vid_behaviors = sorted(list({b.replace("'", "") for b in vid_behaviors}))
        vid_behaviors = [b.split(',') for b in vid_behaviors]
        vid_behaviors = pd.DataFrame(vid_behaviors, columns=['agent', 'target', 'action'])
        # 'mouse1,self,walk', -> agent, target, action
        # print("---------- vid_behaviors ----------")
        # print(vid_behaviors.columns)
        # print()
              
        if traintest == 'train':
            try:
                annot = pd.read_parquet(path.replace('train_tracking', 'train_annotation'))
            except FileNotFoundError:
                continue

        if generate_single:
            vid_behaviors_subset = vid_behaviors.query("target == 'self'")
            for mouse_id_str in np.unique(vid_behaviors_subset.agent):
                try:
                    mouse_id = int(mouse_id_str[-1])
                    vid_agent_actions = np.unique(vid_behaviors_subset.query("agent == @mouse_id_str").action)
                    single_mouse = pvid.loc[:, mouse_id]
                    assert len(single_mouse) == len(pvid)
                    single_mouse_meta = pd.DataFrame({
                        'video_id': video_id,
                        'agent_id': mouse_id_str,
                        'target_id': 'self',
                        'video_frame': single_mouse.index
                    })
                    if traintest == 'train':
                        single_mouse_label = pd.DataFrame(0.0, columns=vid_agent_actions, index=single_mouse.index)
                        annot_subset = annot.query("(agent_id == @mouse_id) & (target_id == @mouse_id)")
                        for i in range(len(annot_subset)):
                            annot_row = annot_subset.iloc[i]
                            single_mouse_label.loc[annot_row['start_frame']:annot_row['stop_frame'], annot_row.action] = 1.0
                        yield 'single', single_mouse, single_mouse_meta, single_mouse_label
                    else:
                        yield 'single', single_mouse, single_mouse_meta, vid_agent_actions
                except KeyError:
                    pass

        if generate_pair:
            vid_behaviors_subset = vid_behaviors.query("target != 'self'")
            if len(vid_behaviors_subset) > 0:
                for agent, target in itertools.permutations(np.unique(pvid.columns.get_level_values('mouse_id')), 2): # int8
                    agent_str = f"mouse{agent}"
                    target_str = f"mouse{target}"
                    vid_agent_actions = np.unique(vid_behaviors_subset.query("(agent == @agent_str) & (target == @target_str)").action)
                    mouse_pair = pd.concat([pvid[agent], pvid[target]], axis=1, keys=['A', 'B'])
                    assert len(mouse_pair) == len(pvid)
                    mouse_pair_meta = pd.DataFrame({
                        'video_id': video_id,
                        'agent_id': agent_str,
                        'target_id': target_str,
                        'video_frame': mouse_pair.index
                    })
                    if traintest == 'train':
                        mouse_pair_label = pd.DataFrame(0.0, columns=vid_agent_actions, index=mouse_pair.index)
                        annot_subset = annot.query("(agent_id == @agent) & (target_id == @target)")
                        for i in range(len(annot_subset)):
                            annot_row = annot_subset.iloc[i]
                            mouse_pair_label.loc[annot_row['start_frame']:annot_row['stop_frame'], annot_row.action] = 1.0
                        yield 'pair', mouse_pair, mouse_pair_meta, mouse_pair_label
                    else:
                        yield 'pair', mouse_pair, mouse_pair_meta, vid_agent_actions

In [45]:
sample_df = train_without_mabe_df.iloc[[0]]

sample_df
gen = generate_mouse_data(sample_df, 'train')

# Get the 1st item that generate_mouse_data() yields
switch, data, meta, label = next(gen)  

print("switch:", switch)
print("data:", data.head())
print("meta:", meta.head())
print("label:", label.head())

switch: single
data: bodypart    body_center            ear_left      ear_right             \
                      x          y        x   y          x          y   
video_frame                                                             
0             72.596497  32.694561      NaN NaN  71.644066  36.726250   
1             72.667686  33.659126      NaN NaN  71.248749  37.480373   
2             72.168686  34.761688      NaN NaN  71.167435  38.346626   
3             72.593246  35.736687      NaN NaN  71.186996  39.507500   
4             72.657684  36.620377      NaN NaN  71.246559  40.469189   

bodypart    lateral_left            lateral_right            neck      \
                       x          y             x          y    x   y   
video_frame                                                             
0              73.903687  31.237312     70.724190  32.908939  NaN NaN   
1              74.042686  31.809875     70.599503  33.544876  NaN NaN   
2              73.838440  33.

  has_large_values = (abs_vals > 1e6).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()


In [47]:
body_parts_tracked_list = list(np.unique(train_without_mabe_df.body_parts_tracked))
body_parts_tracked_list

['["body_center", "ear_left", "ear_right", "headpiece_bottombackleft", "headpiece_bottombackright", "headpiece_bottomfrontleft", "headpiece_bottomfrontright", "headpiece_topbackleft", "headpiece_topbackright", "headpiece_topfrontleft", "headpiece_topfrontright", "lateral_left", "lateral_right", "neck", "nose", "tail_base", "tail_midpoint", "tail_tip"]',
 '["body_center", "ear_left", "ear_right", "hip_left", "hip_right", "lateral_left", "lateral_right", "nose", "spine_1", "spine_2", "tail_base", "tail_middle_1", "tail_middle_2", "tail_tip"]',
 '["body_center", "ear_left", "ear_right", "lateral_left", "lateral_right", "neck", "nose", "tail_base", "tail_midpoint", "tail_tip"]',
 '["body_center", "ear_left", "ear_right", "lateral_left", "lateral_right", "nose", "tail_base", "tail_tip"]',
 '["body_center", "ear_left", "ear_right", "lateral_left", "lateral_right", "nose", "tail_base"]',
 '["body_center", "ear_left", "ear_right", "nose", "tail_base"]',
 '["ear_left", "ear_right", "head", "tai

## Feature Engineering

In [48]:
# None

# Training

In [49]:
from sklearn.metrics import f1_score
import xgboost as xgb
import itertools


#  threshold optimization (binary F1)
def optimize_threshold(pred, true):
    best_f1 = 0
    best_th = 0.5
    for th in np.linspace(0.05, 0.95, 19):
        f1 = f1_score(true, (pred >= th).astype(int), zero_division=0)
        if f1 > best_f1:
            best_f1 = f1
            best_th = th
    return best_th


#  Cross validation
def cross_validate_xgb(X, y_df, meta_df, tracking_key):

    meta_df = meta_df.set_index("video_frame")
    X.index = meta_df.index
    y_df.index = meta_df.index

    oof = pd.DataFrame(index=meta_df.index)
    thresholds = {}
    f1_results = []

    for action in y_df.columns:

        valid_mask = ~y_df[action].isna()
        y = y_df[action][valid_mask].astype(float).values
        X_action = X.loc[valid_mask]
        groups = meta_df.video_id[valid_mask]

        if y.sum() == 0:
            print(f"Action {action}: all negatives -> skipped")
            oof[action] = np.zeros(len(meta_df))
            thresholds[action] = 1.0
            continue

        gkf = GroupKFold(CFG.n_splits)
        oof_pred = np.zeros(len(y))

        for fold, (tr_idx, va_idx) in enumerate(gkf.split(X_action, y, groups)):

            train_data = xgb.DMatrix(X_action.iloc[tr_idx], label=y[tr_idx])
            valid_data = xgb.DMatrix(X_action.iloc[va_idx], label=y[va_idx])

            model = xgb.train(
                params=dict(
                    objective='binary:logistic',
                    eval_metric='logloss',
                    max_depth=6,
                    eta=0.05,
                    subsample=0.8,
                    colsample_bytree=0.8,
                ),
                dtrain=train_data,
                num_boost_round=500,
                early_stopping_rounds=50,
                evals=[(valid_data, 'valid')],
                verbose_eval=False
            )

            oof_pred[va_idx] = model.predict(valid_data)

            model.save_model(
                f"xgb_model_{tracking_key}_{action}_fold{fold}.json"
            )

        best_th = optimize_threshold(oof_pred, y)
        thresholds[action] = best_th

        f1 = f1_score(y, (oof_pred >= best_th).astype(int), zero_division=0)
        f1_results.append((action, f1))

        full_pred = np.zeros(len(meta_df))
        full_pred[valid_mask] = oof_pred
        oof[action] = full_pred
        
        print(f"Action {action}: F1={f1:.4f}, threshold={best_th:.2f}")

    return oof, f1_results, thresholds


#  Prediction -> convert to submission
def predict_multiclass(pred, meta_df, thresholds):

    ama = pred.values.argmax(axis=1)
    max_proba = pred.values.max(axis=1)

    th_array = np.array([thresholds.get(action, 0.5) for action in pred.columns])
    pass_mask = max_proba >= th_array[ama]

    ama = np.where(pass_mask, ama, -1)
    ama = pd.Series(ama, index=meta_df.index)

    changes = ama != ama.shift(1)
    ama_changes = ama[changes]
    meta_changes = meta_df[changes]  

    mask = ama_changes >= 0
    if len(mask) > 0:
        mask.iloc[-1] = False

    submission = pd.DataFrame({
        "video_id": meta_changes.video_id[mask].values,
        "agent_id": meta_changes.agent_id[mask].values,
        "target_id": meta_changes.target_id[mask].values,
        "action": pred.columns[ama_changes[mask].values],
        "start_frame": ama_changes.index[mask],
        "stop_frame": ama_changes.index[1:][mask[:-1]],
    })

    return submission

In [69]:
def normalize_tracking_str(raw_str):
    parts = json.loads(raw_str)
    parts = [p for p in parts if p not in drop_body_parts]  # drop unwanted
    return json.dumps(sorted(parts))

train_without_mabe_df["body_parts_tracked"] = (
    train_without_mabe_df["body_parts_tracked"].apply(normalize_tracking_str)
)

test_df["body_parts_tracked"] = (
    test_df["body_parts_tracked"].apply(normalize_tracking_str)
)

In [70]:
"""
Train
"""
all_submissions = []
all_f1 = []
all_thresholds = {}

body_parts_tracked_list = sorted(train_without_mabe_df.body_parts_tracked.unique())

# Different videos have different sets of tracked body parts.
# Therefore, need to train separate models for each tracking set.
for section in range(1, len(body_parts_tracked_list)):
    
    raw_str = body_parts_tracked_list[section]
    body_parts = json.loads(raw_str)
    body_parts_sorted_str = json.dumps(sorted(body_parts))  # to unify the order with test data
    
    print("="*30)
    print(f"{section}/{len(body_parts_tracked_list)-1} Processing videos with: {body_parts_sorted_str}\n")

    train_subset = train_without_mabe_df[
        train_without_mabe_df.body_parts_tracked == raw_str
    ]

    single_tracking_data = []
    single_label = []
    single_meta = []

    pair_tracking_data = []
    pair_label = []
    pair_meta = []

    # Pre-processing for train data
    for switch, tracking_data, meta, label in generate_mouse_data(train_subset, "train"):
        tracking_data = tracking_data.astype(np.float32)

        if switch == "single":
            single_tracking_data.append(tracking_data)
            single_label.append(label)
            single_meta.append(meta)
        else:
            pair_tracking_data.append(tracking_data)
            pair_label.append(label)
            pair_meta.append(meta)

        del tracking_data, meta, label
    gc.collect()

    # =========================
    # Single training
    # =========================
    if len(single_tracking_data) > 0:
        X = pd.concat(single_tracking_data, ignore_index=True)
        y_df = pd.concat(single_label, ignore_index=True)
        meta_df = pd.concat(single_meta, ignore_index=True)

        if "video_frame" not in meta_df.columns:
            print("[ERROR] video_frame column missing")
            print(meta_df.head())
            continue

        oof, f1_result, th = cross_validate_xgb(
            X, y_df, meta_df, tracking_key=body_parts_sorted_str
        )
        all_f1.extend(f1_result)
        all_thresholds[(body_parts_sorted_str, "single")] = th
        sub = predict_multiclass(oof, meta_df.set_index("video_frame"), th)
        all_submissions.append(sub)

    # =========================
    # Pair training (optional)
    # =========================
    if len(pair_tracking_data) > 0:
        X = pd.concat(pair_tracking_data, ignore_index=True)
        y_df = pd.concat(pair_label, ignore_index=True)
        meta_df = pd.concat(pair_meta, ignore_index=True)

        if "video_frame" not in meta_df.columns:
            print("[ERROR] video_frame column missing")
            print(meta_df.head())
            continue

        oof, f1_result, th = cross_validate_xgb(
            X, y_df, meta_df, tracking_key=body_parts_sorted_str
        )
        all_f1.extend(f1_result)
        all_thresholds[(body_parts_sorted_str, "pair")] = th
        sub = predict_multiclass(oof, meta_df.set_index("video_frame"), th)
        all_submissions.append(sub)
    print()

1/7 Processing videos with: ["body_center", "ear_left", "ear_right", "lateral_left", "lateral_right", "neck", "nose", "tail_base", "tail_tip"]

Action rear: F1=0.3027, threshold=0.15

2/7 Processing videos with: ["body_center", "ear_left", "ear_right", "lateral_left", "lateral_right", "nose", "tail_base", "tail_tip"]


3/7 Processing videos with: ["body_center", "ear_left", "ear_right", "lateral_left", "lateral_right", "nose", "tail_base"]


4/7 Processing videos with: ["body_center", "ear_left", "ear_right", "nose", "tail_base"]

Action biteobject: F1=0.0012, threshold=0.10
Action climb: F1=0.4412, threshold=0.15
Action dig: F1=0.3749, threshold=0.20
Action exploreobject: F1=0.1434, threshold=0.05
Action rear: F1=0.1763, threshold=0.10
Action selfgroom: F1=0.2214, threshold=0.10

5/7 Processing videos with: ["ear_left", "ear_right", "head", "tail_base"]

Action rear: F1=0.6347, threshold=0.30
Action rest: F1=0.4068, threshold=0.10
Action selfgroom: F1=0.1228, threshold=0.05
Action cli

# Inference & Submission

In [72]:
def submit_XGB(test_df, all_thresholds, n_folds=CFG.n_splits):

    submission_list = []

    body_parts_tracked_list = sorted(test_df.body_parts_tracked.unique())
    
    print(body_parts_tracked_list)
    
    for section, body_parts_tracked_str in enumerate(body_parts_tracked_list):

        print(f"[Test] Processing tracking set {section}: {body_parts_tracked_str}")

        test_subset = test_df[test_df.body_parts_tracked == body_parts_tracked_str]

        generator = generate_mouse_data(
            test_subset,
            'test',
            generate_single=True,
            generate_pair=True
        )
            
        for switch, data_te, meta_te, actions_te in generator:

            video_id = meta_te.video_id.iloc[0]
            print(f"video_id={video_id}, switch={switch}")

            # ---- sorted key ----
            body_parts = json.loads(body_parts_tracked_str)
            body_parts_sorted_str = json.dumps(sorted(body_parts))
            
            # ---- sorted keyで検索 ----
            th = all_thresholds.get((body_parts_sorted_str, switch), None)
            print(th)
            if th is None:
                print("No thresholds, skipping...")
                continue

            X_te = data_te.astype(np.float32)
            dtest = xgb.DMatrix(X_te)

            pred = pd.DataFrame(index=meta_te.video_frame)

            for action in actions_te:

                fold_preds = []
                for fold in range(n_folds):

                    model_path = f"xgb_model_{body_parts_sorted_str}_{action}_fold{fold}.json"
                    
                    if not os.path.exists(model_path):
                        continue

                    model = xgb.Booster()
                    model.load_model(model_path)

                    fold_pred = model.predict(dtest)
                    fold_preds.append(fold_pred)

                if len(fold_preds) == 0:
                    print("Model is not found")
                    continue

                avg_pred = np.mean(fold_preds, axis=0)
                pred[action] = avg_pred

            if pred.shape[1] == 0:
                continue

            submission_part = predict_multiclass(pred, meta_te.set_index("video_frame"), th)
            submission_list.append(submission_part)

    if len(submission_list) == 0:
        print("No submission data produced!")
        return None

    submission = pd.concat(submission_list, ignore_index=True)
    submission.to_csv("submission.csv", index=False)
    print("submission.csv saved!")

    return submission


In [73]:
submit_XGB(test_df, all_thresholds)

['["body_center", "ear_left", "ear_right", "lateral_left", "lateral_right", "neck", "nose", "tail_base", "tail_tip"]']
[Test] Processing tracking set 0: ["body_center", "ear_left", "ear_right", "lateral_left", "lateral_right", "neck", "nose", "tail_base", "tail_tip"]
video_id=438887472, switch=single
{'rear': 0.15}
Model is not found
video_id=438887472, switch=single
{'rear': 0.15}
Model is not found
video_id=438887472, switch=single
{'rear': 0.15}
Model is not found
video_id=438887472, switch=single
{'rear': 0.15}
Model is not found
video_id=438887472, switch=pair
None
No thresholds, skipping...
video_id=438887472, switch=pair
None
No thresholds, skipping...
video_id=438887472, switch=pair
None
No thresholds, skipping...
video_id=438887472, switch=pair
None
No thresholds, skipping...
video_id=438887472, switch=pair
None
No thresholds, skipping...
video_id=438887472, switch=pair
None
No thresholds, skipping...
video_id=438887472, switch=pair
None
No thresholds, skipping...
video_id=438