In [None]:
import sys
import pandas as pd
import polars as pl
import torch
import matplotlib.pyplot as plt
import scipy as sp
import numpy as np
import sys
import os
import gc
import random
import time
import pickle
from typing import Optional, Tuple
from contextlib import contextmanager
import importlib.util
import copy
import math
from sklearn.model_selection import StratifiedGroupKFold
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.amp import GradScaler, autocast
from sklearn.metrics import f1_score
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)

In [None]:
torch.set_num_threads(4)

In [None]:
# patch for pkl load from local env
import numpy
sys.modules['numpy._core'] = numpy.core
sys.modules['numpy._core.numeric'] = numpy.core.numeric

In [None]:
CLASS_CODE={
    ### target
    'Cheek - pinch skin': 0,
    'Forehead - pull hairline': 1,
    'Neck - scratch': 2,
    'Neck - pinch skin': 3,
    'Eyelash - pull hair': 4,
    'Eyebrow - pull hair': 5,
    'Forehead - scratch': 6,
    'Above ear - pull hair': 7,
    ### non-target
    'Write name on leg': 8,
    'Feel around in tray and pull out an object': 9,
    'Wave hello': 10,
    'Write name in air': 11,
    'Text on phone': 12,
    'Pull air toward your face': 13,
    'Pinch knee/leg skin': 14,
    'Scratch knee/leg skin': 15,
    'Drink from bottle/cup': 16,
    'Glasses on/off': 17
}
LABEL_MAP={v:k for k,v in CLASS_CODE.items()}

In [None]:
def comp_metric(pred,target):
    pred=np.array(pred)
    target=np.array(target)
    
    y_true_bin = target<8
    y_pred_bin = pred<8
    f1_binary = f1_score(
        y_true_bin,
        y_pred_bin,
        pos_label=True,
        zero_division=0,
        average='binary'
    )

    y_true_mc = np.clip(target,a_max=8,a_min=None)
    y_pred_mc = np.clip(pred,a_max=8,a_min=None)

    f1_macro = f1_score(
        y_true_mc,
        y_pred_mc,
        average='macro',
        zero_division=0
    )
    return 0.5 * f1_binary + 0.5 * f1_macro


def value_to_rank(X):
    return torch.argsort(torch.argsort(X,dim=-1),dim=-1).type(torch.float32)

def soft_cross_entropy(pred, soft_targets,label_smoothing=0.0,weight=None,reduce=True):
    class_num=soft_targets.shape[1]
    soft_targets=soft_targets+label_smoothing/class_num
    soft_targets=soft_targets/soft_targets.sum(axis=1,keepdims=True)
    
    log_probs = F.log_softmax(pred, dim=1)
    if weight is not None:
        log_probs=log_probs*weight.reshape((-1,1))
    loss = -(soft_targets * log_probs).sum(dim=1)
    if reduce:
        return loss.mean()
    else:
        return loss

In [None]:
class InferenceWrapper:
    def __init__(self,path='',device='cuda'):
        self.device=device
        modules={}
        for k in ['CFG','model','data_pipline']:
            spec = importlib.util.spec_from_file_location(f'_{k}', f'{path}/_{k}.py')
            module = importlib.util.module_from_spec(spec)
            modules[k] = module
            spec.loader.exec_module(module)
        self.modules=modules
        self.prep_data=modules['data_pipline'].extract_data_dict
        self.dataset=modules['data_pipline'].CMI3Dataset([],index=[],max_len=modules['CFG'].CFG.max_len)
        self.models={}
        val_res=pd.read_pickle(f'{path}/res.pkl')
        for i in range(len(val_res)):
            model=modules['model'].CMI3Net(
                    **modules['CFG'].CFG.model_cfg,
            )
            model.load_state_dict(val_res[i]['best_weight'])
            model.to(self.device)
            model.eval()
            self.models[i]=model
        return

    def predict(self,df,meta,model_idx=None):
        df=copy.deepcopy(df)
        meta=copy.deepcopy(meta)
        data_dict=self.prep_data(df,meta,test_mode=True)
        batch_data=self.dataset.proc_data_dict(data_dict)
        for k,v in batch_data.items():
            batch_data[k]=torch.as_tensor(v).unsqueeze(0).to(self.device)
            
        with torch.inference_mode():
            with autocast(device_type=self.device, dtype=torch.float32):
                if model_idx is not None:
                    pred=value_to_rank(self.models[model_idx](batch_data))
                else:
                    pred=0.0
                    for v in self.models.values():
                        pred+=value_to_rank(v(batch_data))/len(self.models)
        return pred[0].cpu()

In [None]:
class TestCFG:
    device='cuda'
    extra_cols=[]
thm_cols=['thm_1','thm_2','thm_3','thm_4','thm_5',]
tof_cols=[]
for i in range(1,6):
    for j in range(64):
        tof_cols.append(f'tof_{i}_v{j}')
TestCFG.extra_cols=thm_cols+tof_cols
TestCFG.tof_cols=tof_cols

In [None]:
train_df=pd.read_csv('/kaggle/input/cmi-detect-behavior-with-sensor-data/train.csv')
train_meta=pd.read_csv('/kaggle/input/cmi-detect-behavior-with-sensor-data/train_demographics.csv')
train_meta.set_index('subject',inplace=True)

In [None]:
model1=InferenceWrapper(path='/kaggle/input/m21-cnn-gru-train')
model2=InferenceWrapper(path='/kaggle/input/all-m14-cnn-gru-train')

In [None]:
check_list=[
    ('model1',model1),
    ('model2',model2),
]
oof_preds=[]
oof_target=None
for model_name,model in check_list:
    meta_df=train_meta
    seq_df=train_df
    data_dicts=[]
    info={'sequence_id':[],'subject':[],'target':[]}
    for seq_id,sub_df in tqdm(seq_df.groupby('sequence_id')):
        subject=sub_df['subject'].iloc[0]
        sub_meta=meta_df.loc[subject].copy()
        data_dict=model.prep_data(sub_df,sub_meta,test_mode=False)
        ### append dict_data
        data_dict['teacher_logits']=np.nan
        data_dicts.append(data_dict)
        info['sequence_id'].append(seq_id)
        info['subject'].append(subject)
        info['target'].append(sub_df['gesture'].iloc[0] if 'gesture' in sub_df.columns else np.nan)
    info=pd.DataFrame(info)
    info=info.merge(train_meta,on='subject',how='left')
    info=info.merge(
        train_df[['orientation','gesture','sequence_id']].drop_duplicates('sequence_id'),
        on='sequence_id',how='left'
    )
    # check val score
    sgkf = StratifiedGroupKFold(n_splits=5,random_state=777, shuffle=True)
    splits=sgkf.split(info, info['target'].values, info['subject'].values)
    Y_val=[]
    Y_pred_val=[]
    for i,(train_index,val_index) in enumerate(splits):
        val_dataset = model.modules['data_pipline'].CMI3Dataset(data_dicts,index=val_index,max_len=model.modules['CFG'].CFG.max_len)
        val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, pin_memory=True, drop_last=False, num_workers=4, persistent_workers=True)
        for batch_data in val_loader:
            for k,v in batch_data.items():
                batch_data[k]=v.to(TestCFG.device)
            with torch.inference_mode():
                with autocast(device_type=TestCFG.device, dtype=torch.float32):
                    Y_pred = model.models[i](batch_data)
            Y_val.append(batch_data['Y_class'])
            Y_pred_val.append(Y_pred)
    
            del batch_data
            del Y_pred
    
    Y_val=torch.cat(Y_val).cpu()
    Y_pred_val=torch.cat(Y_pred_val).cpu()
    # compute metric
    score=comp_metric(torch.argmax(Y_pred_val,dim=-1),Y_val)
    print(f'{model_name} val score:{score:.4f}')
    oof_preds.append(Y_pred_val)
    oof_target=Y_val

In [None]:
inference_comb=None

In [None]:
def predict(sequence, demographics):
    global inference_comb
    global LABEL_MAP
    if inference_comb is None:
        inference_comb={
            'IMU':[
                (1.0,InferenceWrapper(path='/kaggle/input/m21-cnn-gru-train')),
            ],
            'All':[
                (1.0,InferenceWrapper(path='/kaggle/input/all-m14-cnn-gru-train')),
            ],
        }

    #####################
    df = sequence.to_pandas()
    meta = demographics.to_pandas()
    meta = meta.loc[meta.index[0]]
    imu_only=df[TestCFG.extra_cols].isna().mean().mean()==1
    #imu_only=True
    
    if imu_only:
        pred_comb=inference_comb['IMU']
    else:
        pred_comb=inference_comb['All']
    ### voting
    pred_indices=[]
    for _,model in pred_comb:
        pred_indices.append(int(torch.argmax(model.predict(df,meta)).cpu()))
    pred_idx = int(sp.stats.mode(pred_indices).mode)
    pred_class = LABEL_MAP[pred_idx]
    return pred_class

In [None]:
# Kaggle competition interface
import kaggle_evaluation.cmi_inference_server
inference_server = kaggle_evaluation.cmi_inference_server.CMIInferenceServer(predict)

if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    inference_server.serve()
else:
    inference_server.run_local_gateway(
        data_paths=(
            '/kaggle/input/cmi-detect-behavior-with-sensor-data/test.csv',
            '/kaggle/input/cmi-detect-behavior-with-sensor-data/test_demographics.csv',
        )
    )