In [1]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("shadenalsuhayan/karsl-medical-signbart-545-01")

print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/karsl-medical-signbart-545-01


In [2]:
# From the Dataset
import kagglehub

# Download latest version
path = kagglehub.dataset_download("yousefdotpy/karsl-502")

print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/karsl-502


In [5]:
%pip install mediapipe

Collecting mediapipe
  Downloading mediapipe-0.10.21-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (9.7 kB)
Collecting protobuf<5,>=4.25.3 (from mediapipe)
  Downloading protobuf-4.25.8-cp37-abi3-manylinux2014_x86_64.whl.metadata (541 bytes)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Downloading sounddevice-0.5.3-py3-none-any.whl.metadata (1.6 kB)
INFO: pip is looking at multiple versions of opencv-contrib-python to determine which version is compatible with other requirements. This could take a while.
Collecting opencv-contrib-python (from mediapipe)
  Downloading opencv_contrib_python-4.11.0.86-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)
Downloading mediapipe-0.10.21-cp311-cp311-manylinux_2_28_x86_64.whl (35.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m35.6/35.6 MB[0m [31m38.8 MB/s[0m eta [36m0:00:00[0m00:01[0m:00:01[0m
[?25hDownloading protobuf-4.25.8-cp37-abi3-manylinux2014_x86_64.whl (294 kB)
[2K   [90m━━━━━━━━━

In [7]:
import os, pickle, random, time
import numpy as np
import cv2
import mediapipe as mp
from tqdm import tqdm

KARSL_ROOT = "/kaggle/input/karsl-502"  #original dataset root
PREVIOUS_DATASET_ROOT = "/kaggle/input/karsl-medical-signbart-545-01"

OUT_ROOT = "/kaggle/working/karsl_medical_emergencies_signbart"
os.makedirs(OUT_ROOT, exist_ok = True)

In [10]:
!cp -r /kaggle/input/karsl-medical-signbart-545-01/karsl_medical_emergencies_signbart /kaggle/working/

In [11]:
!ls /kaggle/working/karsl_medical_emergencies_signbart

id2label.json  label2id.json  train


In [20]:
import json

with open('/kaggle/working/karsl_medical_emergencies_signbart/label2id.json', 'r') as f:
    label2id = json.load(f)

folder_to_class = set(label2id.keys())

print('Number of selected signs:', len(folder_to_class))

Number of selected signs: 131


In [21]:
def split_target(original_split, signer):
    if original_split == 'train':
        if signer in ['01', '02']:
            return 'train'
        elif signer == '03':
            return 'val'
        else:
            return None #safety check
    elif original_split == 'test':
        return 'test'
    else:
        return None #safety check
    '''
    The KArSL original split is: for each signer a train and test folders
    As we will be training the model on train folders of signers 01 and 02 and validation will be conducted on train folder of signer 03
    Testing will be on test folders of all 3 signers (details of this process will be explained later on :)
    
    MAP KArSL split TO our target split
    train of signers 01/02 > train
    train of signer 03 > val
    test of all signers > test
    '''

In [22]:
class_class_splitting = ['train', 'test']
signers   = ['03', '01', '02']   #signer 03 before for early val

In [24]:
#Holistic is a module in mediapipe that detects: body pose (33 joints) - left hand (21 joints) - right hand (21 joints) and face (but we do not need this)
#mp_holistic = mp.solutions.holistic

def extract_keypoints(image, pose, hands):
    '''
    This function takes RGB frame and extract 75 landmarks/joints 
    it returns shape as (75, 2) array ,, 33 pose + 21 left hand + 21 right hand = 75 ,, 2 as each has (x,y) coordinates
    coordinates are normalized to [0,1] as Mediapipe style
    '''

    #image shape: height, width, and color (default color for OpenCV is BGR) ,,  so ignore it > (_)
    #Mediapipe requires RGB
    h, w, _ = image.shape
    img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) #covert to RGB

    #MediaPipe 2 models // HOLISTIC took too long
    pose_results = pose.process(img_rgb)
    hands_results = hands.process(img_rgb)
    
    #body pose landmarks
    pose_xy = np.zeros((33, 2), dtype = np.float32) #create empty array
    if pose_results.pose_landmarks: #if mediapipe detects the pose
        for i, lm in enumerate(pose_results.pose_landmarks.landmark): #loop for every joint
                pose_xy[i, 0] = lm.x #filling
                pose_xy[i, 1] = lm.y #filling

    left_hand_xy  = np.zeros((21, 2), dtype = np.float32)
    right_hand_xy = np.zeros((21, 2), dtype = np.float32)

    if hands_results.multi_hand_landmarks and hands_results.multi_handedness:
        for hand_landmarks, handedness in zip(hands_results.multi_hand_landmarks, hands_results.multi_handedness):
            hand_label = handedness.classification[0].label  #left or right
            
            hand_xy = np.array([[lm.x, lm.y] for lm in hand_landmarks.landmark], dtype = np.float32)
            
            if hand_label == 'Left':
                left_hand_xy = hand_xy
            else:
                right_hand_xy = hand_xy

    keypoints = np.concatenate([pose_xy, left_hand_xy, right_hand_xy], axis = 0)  #total of 75 > SignBart expect (75, 2)
    return keypoints

In [25]:
def repetition(rep_path, pose, hands):
    '''
    This function reads all frames in one repetition (each signer has made 50 repetition) , a repetition contains many frames
    rep_path is the path to one repetition folder containing the (.jpg) frames
    returns np.array of shape (T, 75, 2) as T is number of frames ,, or returns None if no valid frames (something could of went wrong)
    holistic is a pre-created model 
    '''
    frame_files = sorted([f for f in os.listdir(rep_path) if f.lower().endswith('.jpg')]) #all frames are .jpg so this is just for ensuring

    if len(frame_files) == 0: #no valid frames in the repetition
        return None
    
    seq = [] #sequence that holds all frames' extractd 75 keypoints

    for frame_name in frame_files:
        frame_path = os.path.join(rep_path, frame_name)
        
        img = cv2.imread(frame_path)
        if img is None: #if no image to read > skip
            continue
            
        keypoints = extract_keypoints(img, pose, hands) #pass the image and the model to extract_keypoints function
        seq.append(keypoints) #stack it to the sequence
            
    if len(seq) == 0: #if all frames fails to be detected by mediapipe
        return None

    return np.stack(seq, axis = 0)  #this conver the sequence holding all frames individually to (T, 75, 2) as T is number of frames - This represent one sample in the dataset :)

In [29]:
mp_pose  = mp.solutions.pose
mp_hands = mp.solutions.hands

with (
    mp_pose.Pose(static_image_mode=True) as pose,
    mp_hands.Hands(static_image_mode=True, max_num_hands=2) as hands
):
    test_rep = repetition(
        '/kaggle/input/karsl-502/01/01/train/0071/03_01_0071_(01_12_16_15_52_41)_c',
        pose,
        hands
    )
    print(test_rep.shape if test_rep is not None else "FAILED")

W0000 00:00:1765516641.349401     172 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1765516641.362144     172 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1765516641.421176     166 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1765516641.485157     166 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1765516641.512082     169 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.


(29, 75, 2)


In [30]:
MAX_TRAIN_REPETITION = 10

total_new = 0
skip_empty = 0
skip_not_in_subset = 0

mp_pose  = mp.solutions.pose
mp_hands = mp.solutions.hands

with (
    mp_pose.Pose(static_image_mode=True) as pose,
    mp_hands.Hands(static_image_mode=True, max_num_hands=2) as hands
):
    for original_split in splitting:
        for signer in signers:

            target_split = split_target(original_split, signer)
            if target_split is None:
                continue

            base_dir = os.path.join(KARSL_ROOT, signer, signer, original_split)
            if not os.path.isdir(base_dir):
                continue

            print(f'\nProcessing signer {signer}, {original_split} > target {target_split}')

            for sign_folder in tqdm(os.listdir(base_dir)):
                if sign_folder not in folder_to_class:
                    skip_not_in_subset += 1
                    continue

                in_sign_dir = os.path.join(base_dir, sign_folder)
                if not os.path.isdir(in_sign_dir):
                    continue

                out_sign_dir = os.path.join(OUT_ROOT, target_split, sign_folder)
                os.makedirs(out_sign_dir, exist_ok=True)

                rep_folders = sorted([
                    rf for rf in os.listdir(in_sign_dir)
                    if os.path.isdir(os.path.join(in_sign_dir, rf))
                ])

                if original_split == 'train':
                    rep_folders = rep_folders[:MAX_TRAIN_REPETITION]

                for rep_folder in rep_folders:
                    rep_path = os.path.join(in_sign_dir, rep_folder)

                    out_name = f'{signer}_{rep_folder}.pkl'
                    out_path = os.path.join(out_sign_dir, out_name)

                    if os.path.exists(out_path):
                        continue   # <-- THIS is why resume works

                    skeleton_seq = repetition(rep_path, pose, hands)
                    if skeleton_seq is None:
                        skip_empty += 1
                        continue

                    sample = {
                        'keypoints': skeleton_seq,
                        'class': sign_folder
                    }

                    with open(out_path, 'wb') as f:
                        pickle.dump(sample, f)

                    total_new += 1
                    if total_new % 100 == 0:
                        print('Saved new samples:', total_new)



Processing signer 03, train > target val


  0%|          | 0/502 [00:00<?, ?it/s]W0000 00:00:1765516878.770973     179 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1765516878.786726     178 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1765516878.803099     174 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1765516878.862243     177 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
  6%|▌         | 30/502 [08:31<2:02:55, 15.63s/it]

Saved new samples: 100


 14%|█▎        | 69/502 [17:22<1:37:33, 13.52s/it]

Saved new samples: 200


 21%|██        | 106/502 [26:52<2:14:02, 20.31s/it]

Saved new samples: 300


 21%|██▏       | 107/502 [27:18<1:40:49, 15.32s/it]


KeyboardInterrupt: 

In [31]:
!cd /kaggle/working && zip -r karsl_backup_resume01.zip karsl_medical_emergencies_signbart

  adding: karsl_medical_emergencies_signbart/ (stored 0%)
  adding: karsl_medical_emergencies_signbart/label2id.json (deflated 69%)
  adding: karsl_medical_emergencies_signbart/train/ (stored 0%)
  adding: karsl_medical_emergencies_signbart/train/0152/ (stored 0%)
  adding: karsl_medical_emergencies_signbart/train/0152/01_03_01_0152_(06_04_17_16_55_01)_c.pkl (deflated 16%)
  adding: karsl_medical_emergencies_signbart/train/0152/01_03_01_0152_(06_04_17_16_55_10)_c.pkl (deflated 16%)
  adding: karsl_medical_emergencies_signbart/train/0152/01_03_01_0152_(06_04_17_16_54_34)_c.pkl (deflated 17%)
  adding: karsl_medical_emergencies_signbart/train/0152/01_03_01_0152_(06_04_17_16_54_22)_c.pkl (deflated 17%)
  adding: karsl_medical_emergencies_signbart/train/0152/01_03_01_0152_(06_04_17_16_54_47)_c.pkl (deflated 16%)
  adding: karsl_medical_emergencies_signbart/train/0152/01_03_01_0152_(06_04_17_16_54_51)_c.pkl (deflated 17%)
  adding: karsl_medical_emergencies_signbart/train/0152/01_03_01_0152

In [34]:
ROOT = '/kaggle/input/karsl-medical-signbart-545-01/karsl_backup_resume01/karsl_medical_emergencies_signbart'

for split in ["train", "val", "test"]:
    path = os.path.join(ROOT, split)
    count = 0
    if os.path.isdir(path):
        for s in os.listdir(path):
            d = os.path.join(path, s)
            if os.path.isdir(d):
                count += len([f for f in os.listdir(d) if f.endswith(".pkl")])
    print(split, count)

train 545
val 307
test 0


In [35]:
original_split = 'test'
signers = ['01', '02', '03'] # all signers in test
target_split = 'test'

total_new = 0
skip_empty = 0
skip_not_in_subset = 0

mp_pose  = mp.solutions.pose
mp_hands = mp.solutions.hands

with (
    mp_pose.Pose(static_image_mode=True) as pose,
    mp_hands.Hands(static_image_mode=True, max_num_hands=2) as hands
):
    for signer in signers:
        base_dir = os.path.join(KARSL_ROOT, signer, signer, original_split)
        if not os.path.isdir(base_dir):
            continue

        print(f"\nNow processing TEST: signer {signer} ...")

        for sign_folder in tqdm(os.listdir(base_dir)):
            #keep only our 131 signs
            if sign_folder not in folder_to_class:
                skip_not_in_subset += 1
                continue

            in_sign_dir = os.path.join(base_dir, sign_folder)
            if not os.path.isdir(in_sign_dir):
                continue

            out_sign_dir = os.path.join(OUT_ROOT, target_split, sign_folder)
            os.makedirs(out_sign_dir, exist_ok=True)

            rep_folders = sorted([
                rf for rf in os.listdir(in_sign_dir)
                if os.path.isdir(os.path.join(in_sign_dir, rf))
            ])

            #in test keeping all repetitions (8)
            for rep_folder in rep_folders:
                rep_path = os.path.join(in_sign_dir, rep_folder)

                out_name = f'{signer}_{rep_folder}.pkl'
                out_path = os.path.join(out_sign_dir, out_name)

                if os.path.exists(out_path):
                    continue

                skeleton_seq = repetition(rep_path, pose, hands)
                if skeleton_seq is None:
                    skip_empty += 1
                    continue

                with open(out_path, 'wb') as f:
                    pickle.dump({'keypoints': skeleton_seq, 'class': sign_folder}, f)

                total_new += 1
                if total_new % 100 == 0:
                    print('Saved TEST samples so far:', total_new)

print("\nDone extracting TEST only!")
print("Total NEW test samples:", total_new)
print("Skipped empty:", skip_empty)
print("Skipped not in subset:", skip_not_in_subset)


W0000 00:00:1765521889.174242     196 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.



Now processing TEST: signer 01 ...


  0%|          | 0/502 [00:00<?, ?it/s]W0000 00:00:1765521889.204263     196 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1765521889.227910     190 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1765521889.295823     191 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
  7%|▋         | 35/502 [07:26<1:57:35, 15.11s/it]

Saved TEST samples so far: 100


 19%|█▊        | 93/502 [14:49<42:14,  6.20s/it]  

Saved TEST samples so far: 200


 27%|██▋       | 135/502 [22:01<59:06,  9.66s/it]  

Saved TEST samples so far: 300


 37%|███▋      | 188/502 [30:12<50:39,  9.68s/it]  

Saved TEST samples so far: 400


 46%|████▌     | 229/502 [37:19<58:00, 12.75s/it]  

Saved TEST samples so far: 500


 56%|█████▌    | 281/502 [44:35<33:06,  8.99s/it]

Saved TEST samples so far: 600


 66%|██████▌   | 332/502 [50:38<20:03,  7.08s/it]

Saved TEST samples so far: 700


 74%|███████▍  | 371/502 [58:22<17:44,  8.13s/it]

Saved TEST samples so far: 800


 83%|████████▎ | 417/502 [1:05:11<15:51, 11.19s/it]

Saved TEST samples so far: 900


 93%|█████████▎| 466/502 [1:12:46<08:05, 13.47s/it]

Saved TEST samples so far: 1000


100%|██████████| 502/502 [1:16:49<00:00,  9.18s/it]



Now processing TEST: signer 02 ...


  4%|▍         | 22/502 [02:57<45:23,  5.67s/it] 

Saved TEST samples so far: 1100


 13%|█▎        | 64/502 [09:30<59:01,  8.09s/it]  

Saved TEST samples so far: 1200


 22%|██▏       | 108/502 [15:39<1:12:35, 11.06s/it]

Saved TEST samples so far: 1300


 33%|███▎      | 164/502 [23:10<43:23,  7.70s/it]  

Saved TEST samples so far: 1400


 43%|████▎     | 214/502 [29:25<39:18,  8.19s/it]

Saved TEST samples so far: 1500


 53%|█████▎    | 266/502 [36:11<29:36,  7.53s/it]

Saved TEST samples so far: 1600


 62%|██████▏   | 309/502 [41:57<19:03,  5.93s/it]

Saved TEST samples so far: 1700


 70%|██████▉   | 350/502 [48:34<27:17, 10.77s/it]

Saved TEST samples so far: 1800


 80%|████████  | 402/502 [54:47<10:59,  6.60s/it]

Saved TEST samples so far: 1900


 89%|████████▉ | 446/502 [1:01:06<08:30,  9.11s/it]

Saved TEST samples so far: 2000


100%|██████████| 502/502 [1:08:23<00:00,  8.18s/it]



Now processing TEST: signer 03 ...


  0%|          | 0/502 [00:00<?, ?it/s]

Saved TEST samples so far: 2100


  8%|▊         | 41/502 [08:19<1:15:58,  9.89s/it]

Saved TEST samples so far: 2200


 19%|█▊        | 93/502 [16:07<48:28,  7.11s/it]  

Saved TEST samples so far: 2300


 27%|██▋       | 137/502 [25:55<1:17:03, 12.67s/it]

Saved TEST samples so far: 2400


 37%|███▋      | 188/502 [34:08<49:52,  9.53s/it]  

Saved TEST samples so far: 2500


 48%|████▊     | 239/502 [42:23<34:12,  7.81s/it]  

Saved TEST samples so far: 2600


 56%|█████▌    | 281/502 [50:06<39:38, 10.76s/it]  

Saved TEST samples so far: 2700


 67%|██████▋   | 334/502 [57:41<26:51,  9.59s/it]

Saved TEST samples so far: 2800


 74%|███████▍  | 371/502 [1:05:40<21:21,  9.78s/it]

Saved TEST samples so far: 2900


 81%|████████  | 406/502 [1:10:25<16:39, 10.41s/it]


KeyboardInterrupt: 

In [36]:
!cd /kaggle/working && zip -r karsl_backup_train545_val307_test.zip karsl_medical_emergencies_signbart

  adding: karsl_medical_emergencies_signbart/ (stored 0%)
  adding: karsl_medical_emergencies_signbart/test/ (stored 0%)
  adding: karsl_medical_emergencies_signbart/test/0194/ (stored 0%)
  adding: karsl_medical_emergencies_signbart/test/0194/01_03_01_0194_(21_10_17_16_18_15)_c.pkl (deflated 40%)
  adding: karsl_medical_emergencies_signbart/test/0194/03_03_03_0194_(18_12_17_15_48_25)_c.pkl (deflated 21%)
  adding: karsl_medical_emergencies_signbart/test/0194/01_03_01_0194_(13_09_17_14_52_19)_c.pkl (deflated 36%)
  adding: karsl_medical_emergencies_signbart/test/0194/01_03_01_0194_(13_09_17_14_51_41)_c.pkl (deflated 37%)
  adding: karsl_medical_emergencies_signbart/test/0194/02_03_02_0194_(25_05_17_18_14_10)_c.pkl (deflated 36%)
  adding: karsl_medical_emergencies_signbart/test/0194/03_03_03_0194_(01_05_17_17_11_23)_c.pkl (deflated 12%)
  adding: karsl_medical_emergencies_signbart/test/0194/02_03_02_0194_(16_11_17_01_44_41)_c.pkl (deflated 37%)
  adding: karsl_medical_emergencies_signb

In [37]:
# The following code will only execute
# successfully when compression is complete

import kagglehub

# Download latest version
path = kagglehub.dataset_download("shadenalsuhayan/train-val-test-incomplete")

print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/train-val-test-incomplete


In [40]:
TEST_PATH = "/kaggle/input/train-val-test-incomplete/karsl_medical_emergencies_signbart/test"
import glob

def count_pkls(root):
    return len(glob.glob(os.path.join(root, "**", "*.pkl"), recursive=True))

for signer in sorted(os.listdir(TEST_PATH)):
    signer_path = os.path.join(TEST_PATH, signer)
    if not os.path.isdir(signer_path): 
        continue
    n_top = len([x for x in os.listdir(signer_path) if os.path.isdir(os.path.join(signer_path, x))])
    n_pkl = count_pkls(signer_path)
    print(f"{signer}: top-level-folders={n_top:3d} | pkl_files={n_pkl:4d}")


0071: top-level-folders=  0 | pkl_files=  24
0072: top-level-folders=  0 | pkl_files=  24
0073: top-level-folders=  0 | pkl_files=  24
0074: top-level-folders=  0 | pkl_files=  24
0075: top-level-folders=  0 | pkl_files=  24
0076: top-level-folders=  0 | pkl_files=  24
0077: top-level-folders=  0 | pkl_files=  24
0078: top-level-folders=  0 | pkl_files=  24
0079: top-level-folders=  0 | pkl_files=  24
0080: top-level-folders=  0 | pkl_files=  24
0081: top-level-folders=  0 | pkl_files=  24
0082: top-level-folders=  0 | pkl_files=  24
0083: top-level-folders=  0 | pkl_files=  24
0084: top-level-folders=  0 | pkl_files=  24
0085: top-level-folders=  0 | pkl_files=  24
0086: top-level-folders=  0 | pkl_files=  24
0087: top-level-folders=  0 | pkl_files=  24
0088: top-level-folders=  0 | pkl_files=  24
0089: top-level-folders=  0 | pkl_files=  24
0090: top-level-folders=  0 | pkl_files=  24
0091: top-level-folders=  0 | pkl_files=  24
0092: top-level-folders=  0 | pkl_files=  16
0093: top-

In [45]:
TEST_PATH = "/kaggle/working/karsl_medical_emergencies_signbart/test"
print("TOTAL test pkls (recursive) =",
      len(glob.glob(os.path.join(TEST_PATH, "**", "*.pkl"), recursive=True)))

TOTAL test pkls (recursive) = 2957


In [47]:
count = 0
for signer in sorted(os.listdir(TEST_PATH)):
    sp = os.path.join(TEST_PATH, signer)
    if os.path.isdir(sp):
        n = len(glob.glob(os.path.join(sp, "*.pkl")))
        count += 1
        print(signer, n)
print(count) #should be 131

0071 24
0072 24
0073 24
0074 24
0075 24
0076 24
0077 24
0078 24
0079 24
0080 24
0081 24
0082 24
0083 24
0084 24
0085 24
0086 24
0087 24
0088 24
0089 24
0090 24
0091 24
0092 16
0093 24
0094 24
0095 24
0096 24
0097 24
0098 16
0099 24
0100 16
0101 16
0102 24
0103 24
0104 27
0105 21
0106 24
0107 24
0108 24
0109 24
0110 24
0111 24
0112 24
0113 24
0114 16
0115 16
0116 16
0117 24
0118 24
0119 16
0120 16
0121 24
0122 16
0123 24
0124 24
0125 24
0126 24
0127 16
0128 24
0129 24
0130 16
0131 16
0132 24
0133 24
0134 24
0135 24
0136 24
0137 24
0138 16
0139 24
0140 16
0141 24
0142 24
0143 24
0144 24
0145 16
0146 16
0147 24
0148 24
0149 24
0150 24
0151 24
0152 24
0153 24
0154 24
0155 24
0156 16
0157 16
0158 24
0159 24
0160 24
0161 21
0162 24
0163 24
0164 16
0166 24
0173 24
0182 24
0183 24
0186 16
0193 24
0194 24
0195 24
0196 24
0197 24
0198 24
0199 24
0200 24
0201 24
0202 24
0203 24
0204 24
0205 24
0206 24
0207 16
0208 24
0216 24
0217 24
0222 24
0223 24
0254 24
0255 24
0256 24
0261 24
0316 24
0349 24


In [48]:
!ls -lah /kaggle/input
!ls -lah /kaggle/input/train-val-test-incomplete

total 8.0K
drwxr-xr-x 5 root   root    4.0K Dec 12 10:28 .
drwxr-xr-x 5 root   root    4.0K Dec 12 04:47 ..
drwxr-xr-x 5 nobody nogroup    0 Dec  6 13:14 karsl-502
drwxr-xr-x 4 nobody nogroup    0 Dec 12 06:10 karsl-medical-signbart-545-01
drwxr-xr-x 3 nobody nogroup    0 Dec 12 10:28 train-val-test-incomplete
total 4.0K
drwxr-xr-x 3 nobody nogroup    0 Dec 12 10:28 .
drwxr-xr-x 5 root   root    4.0K Dec 12 10:28 ..
drwxr-xr-x 3 nobody nogroup    0 Dec 12 10:28 karsl_medical_emergencies_signbart


In [49]:
!ls -lah /kaggle/input/train-val-test-incomplete
!ls -lah /kaggle/input/train-val-test-incomplete/karsl_medical_emergencies_signbart

total 4.0K
drwxr-xr-x 3 nobody nogroup    0 Dec 12 10:28 .
drwxr-xr-x 5 root   root    4.0K Dec 12 10:28 ..
drwxr-xr-x 3 nobody nogroup    0 Dec 12 10:28 karsl_medical_emergencies_signbart
total 0
drwxr-xr-x   3 nobody nogroup 0 Dec 12 10:28 .
drwxr-xr-x   3 nobody nogroup 0 Dec 12 10:28 ..
drwxr-xr-x 133 nobody nogroup 0 Dec 12 10:28 test


In [50]:
!cp -r /kaggle/input/karsl-medical-signbart-545-01/karsl_backup_resume01/karsl_medical_emergencies_signbart /kaggle/working/

In [51]:
!ls -lah /kaggle/working/karsl_medical_emergencies_signbart

total 28K
drwxr-xr-x   5 root root 4.0K Dec 12 10:50 .
drwxr-xr-x   4 root root 4.0K Dec 12 10:20 ..
-rw-r--r--   1 root root 1.9K Dec 12 10:50 id2label.json
-rw-r--r--   1 root root 1.6K Dec 12 10:50 label2id.json
drwxr-xr-x 133 root root 4.0K Dec 12 08:01 test
drwxr-xr-x  57 root root 4.0K Dec 12 10:50 train
drwxr-xr-x  33 root root 4.0K Dec 12 10:50 val


In [53]:
ROOT = '/kaggle/working/karsl_medical_emergencies_signbart'

def count_pkls(p):
    return len(glob.glob(os.path.join(p, '**', '*.pkl'), recursive=True))

print('train: ', count_pkls(os.path.join(ROOT, 'train')))
print('val: ', count_pkls(os.path.join(ROOT, 'val')))
print('test: ', count_pkls(os.path.join(ROOT, 'test')))


train:  545
val:  307
test:  2957


In [56]:
!cd /kaggle/working && zip -r karsl_medical_emergencies_signbart_FULL_01.zip \
    karsl_medical_emergencies_signbart

  adding: karsl_medical_emergencies_signbart/ (stored 0%)
  adding: karsl_medical_emergencies_signbart/label2id.json (deflated 69%)
  adding: karsl_medical_emergencies_signbart/train/ (stored 0%)
  adding: karsl_medical_emergencies_signbart/train/0152/ (stored 0%)
  adding: karsl_medical_emergencies_signbart/train/0152/01_03_01_0152_(06_04_17_16_55_01)_c.pkl (deflated 16%)
  adding: karsl_medical_emergencies_signbart/train/0152/01_03_01_0152_(06_04_17_16_55_10)_c.pkl (deflated 16%)
  adding: karsl_medical_emergencies_signbart/train/0152/01_03_01_0152_(06_04_17_16_54_34)_c.pkl (deflated 17%)
  adding: karsl_medical_emergencies_signbart/train/0152/01_03_01_0152_(06_04_17_16_54_22)_c.pkl (deflated 17%)
  adding: karsl_medical_emergencies_signbart/train/0152/01_03_01_0152_(06_04_17_16_54_47)_c.pkl (deflated 16%)
  adding: karsl_medical_emergencies_signbart/train/0152/01_03_01_0152_(06_04_17_16_54_51)_c.pkl (deflated 17%)
  adding: karsl_medical_emergencies_signbart/train/0152/01_03_01_0152

In [57]:
!ls -lah /kaggle/working | grep FULL_01

-rw-r--r-- 1 root root  76M Dec 12 10:59 karsl_medical_emergencies_signbart_FULL_01.zip


In [58]:
!unzip -l /kaggle/working/karsl_medical_emergencies_signbart_FULL_01.zip | head -n 20

Archive:  /kaggle/working/karsl_medical_emergencies_signbart_FULL_01.zip
  Length      Date    Time    Name
---------  ---------- -----   ----
        0  2025-12-12 10:50   karsl_medical_emergencies_signbart/
     1593  2025-12-12 10:50   karsl_medical_emergencies_signbart/label2id.json
        0  2025-12-12 10:50   karsl_medical_emergencies_signbart/train/
        0  2025-12-12 10:49   karsl_medical_emergencies_signbart/train/0152/
    39185  2025-12-12 10:49   karsl_medical_emergencies_signbart/train/0152/01_03_01_0152_(06_04_17_16_55_01)_c.pkl
    43385  2025-12-12 10:49   karsl_medical_emergencies_signbart/train/0152/01_03_01_0152_(06_04_17_16_55_10)_c.pkl
    40385  2025-12-12 10:49   karsl_medical_emergencies_signbart/train/0152/01_03_01_0152_(06_04_17_16_54_34)_c.pkl
    36785  2025-12-12 10:49   karsl_medical_emergencies_signbart/train/0152/01_03_01_0152_(06_04_17_16_54_22)_c.pkl
    37985  2025-12-12 10:49   karsl_medical_emergencies_signbart/train/0152/01_03_01_0152_(06_04_17