In [None]:
!pip install torch lerobot

In [None]:
!pip install lerobot.data

In [None]:
!pip install decord

In [None]:
!pip install transformers num2words

## Training

In [None]:
# Add parent directory to path to import train module
import sys
from pathlib import Path
sys.path.append(str(Path().resolve().parent))
from train_smolvla import train

print("\nStarting training process...")
print("Note: We'll use a public dataset for training as our sample is too small")

# Create a temporary directory for training output
train_output_dir = Path('model_output')

print(f"Training output will be saved to: {train_output_dir}")

train(output_dir=str(train_output_dir), dataset_id="ISdept/piper_arm")

## Prepare dataset

In [None]:
# Import necessary modules
import sys
from pathlib import Path
import json
import re
import traceback

# Add the src directory to the path so we can import prepare_dataset
from data_processing.prepare_dataset import process_session, create_tasks_parquet, create_episodes_parquet_index, update_total_frames_from_episodes, compute_and_save_dataset_stats
from data_processing.episode_data import EpisodeData, CameraData

# --- CONFIGURATION ---
ROOT_FOLDER = Path("data/piper_training_data/")  # Root folder containing episode subfolders
OUTPUT_FOLDER = Path("output/")  # Output folder for processed dataset
REPO_ID = "ISDept/piper_arm"  # Your desired Hugging Face repo ID
# ---------------------

def find_episode_folders(root_folder):
    """Find all episode folders with naming convention episode1, episode2, etc."""
    episode_folders = []
    pattern = re.compile(r'^episode(\d+)$', re.IGNORECASE)
    
    for item in root_folder.iterdir():
        if item.is_dir():
            match = pattern.match(item.name)
            if match:
                episode_folders.append((item, int(match.group(1))))
    
    # Sort by episode number
    episode_folders.sort(key=lambda x: x[1])
    return episode_folders

def find_json_and_videos(episode_folder):
    """Find JSON file and video files in the episode folder."""
    json_files = list(episode_folder.glob("*.json"))
    if not json_files:
        raise FileNotFoundError(f"No JSON file found in {episode_folder}")
    if len(json_files) > 1:
        print(f"Warning: Multiple JSON files found in {episode_folder}, using {json_files[0]}")
    
    json_path = json_files[0]
    
    # Find video files (assuming common video extensions)
    video_extensions = ['.mp4', '.avi', '.mov', '.mkv']
    video_files = []
    for ext in video_extensions:
        video_files.extend(episode_folder.glob(f"*{ext}"))
    
    return json_path, video_files

def get_camera_name_from_video_path(video_path):
    """Determine camera name based on video filename content."""
    filename = video_path.stem.lower()
    if 'front' in filename:
        return 'front'
    elif 'right' in filename:
        return 'right'
    elif 'gripper' in filename:
        return 'gripper'
    else:
        # Fallback: use the last part of filename after underscore
        return video_path.stem.split('_')[-1]
      
def process_episode_folder(episode_folder, episode_idx, global_index_offset, last_frames_to_chop):
    """Process a single episode folder."""
    json_path, video_files = find_json_and_videos(episode_folder)
    
    # Create CameraData objects from video files
    cameras_list = []
    for video_path in video_files:
        # Extract camera name from filename (you might want to customize this logic)
        camera_name = get_camera_name_from_video_path(video_path)
        cameras_list.append(CameraData(video_path=str(video_path), camera=camera_name))
    
    episode_data = EpisodeData(
        joint_data_json_path=str(json_path), 
        episode_index=episode_idx, 
        fps=10, 
        global_index_offset=global_index_offset, 
        cameras=cameras_list,
        folder = episode_folder,
        task_description = "Pick up the cube and place it into the container."
    )
    
    # Process the first episode differently to create initial files
    is_first_episode = (episode_idx == 1)
    num_of_frames = process_session(episode_data, OUTPUT_FOLDER, is_first_episode, last_frames_to_chop)
    episode_data.num_of_frames = num_of_frames
    return episode_data

def main():
    # Find all episode folders
    episode_folders = find_episode_folders(ROOT_FOLDER)
    
    if not episode_folders:
        print(f"No episode folders found in {ROOT_FOLDER}")
        return
    
    print(f"Found {len(episode_folders)} episode folders")
    
    last_frames_to_chop = 30
    global_index_offset = 0
    all_episodes_data = []
    
    
    # Process each episode folder
    for episode_folder, episode_idx in episode_folders:
        print(f"Processing episode {episode_idx} in folder {episode_folder}")
        
        if episode_idx == 3:
            last_frames_to_chop = 42
        elif episode_idx == 7 or episode_idx == 32 or episode_idx == 46 or episode_idx == 76 or episode_idx == 87 or episode_idx == 88 \
          or episode_idx ==  89 or episode_idx == 102 or episode_idx == 103 or episode_idx == 108 or episode_idx == 110 or episode_idx == 118 \
          or episode_idx == 119 or episode_idx == 120 or episode_idx == 121 or episode_idx == 122 or episode_idx == 126 or episode_idx == 152:
            last_frames_to_chop = 38
        elif episode_idx == 11 or episode_idx == 14 or episode_idx == 17 or episode_idx == 25 or episode_idx == 37 or episode_idx == 132:
            last_frames_to_chop = 32
        elif episode_idx == 15 or episode_idx == 30 or episode_idx == 38 or episode_idx == 40 or episode_idx == 41 or episode_idx == 42 \
          or episode_idx == 49 or episode_idx == 51 or episode_idx == 52 or episode_idx == 56 or episode_idx == 57 or episode_idx == 65 \
          or episode_idx == 68 or episode_idx == 70 or episode_idx == 74 or episode_idx == 78 or episode_idx == 79 or episode_idx == 81 \
          or episode_idx == 82 or episode_idx == 83 or episode_idx == 84 or episode_idx == 91 or episode_idx == 104 or episode_idx == 105 \
          or episode_idx == 106 or episode_idx == 127 or episode_idx == 144 or episode_idx == 146 or episode_idx == 147 or episode_idx == 148 \
            or episode_idx == 149:
            last_frames_to_chop = 30
        elif episode_idx == 137:
            last_frames_to_chop = 24
        elif episode_idx == 44 or episode_idx == 162 or episode_idx == 164:
            last_frames_to_chop = 25
        elif episode_idx == 129:
            last_frames_to_chop = 18
        elif episode_idx > 129 :
            last_frames_to_chop = 28
        elif episode_idx < 129:
            last_frames_to_chop = 35
        
        try:
            episode_data = process_episode_folder(episode_folder, episode_idx, global_index_offset, last_frames_to_chop)
            all_episodes_data.append(episode_data)
            
            # Update global index offset for the next episode
            global_index_offset += episode_data.num_of_frames
            #global_index_offset -= last_frames_to_chop
            
            
        except Exception as e:
            print(f"Error processing episode {episode_idx}: {e}")
            traceback.print_exc()
            continue
    
    # Create final output files after processing all episodes
    if all_episodes_data:
        # Only create tasks parquet for the first episode
        create_tasks_parquet(OUTPUT_FOLDER, 'pick_and_place')
        
        # Create episodes parquet index for all episodes
        for _, episode_idx in episode_folders:
            create_episodes_parquet_index(OUTPUT_FOLDER, episode_idx)
        
        update_total_frames_from_episodes(OUTPUT_FOLDER)
        
        compute_and_save_dataset_stats(OUTPUT_FOLDER)
        
    else:
        print("No episodes were successfully processed")

if __name__ == "__main__":
    main()


  from .autonotebook import tqdm as notebook_tqdm


Found 164 episode folders
Processing episode 1 in folder data/piper_training_data/episode1


Casting the dataset: 100%|██████████| 150/150 [00:00<00:00, 11203.93 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1177.84ba/s]


Processing episode 2 in folder data/piper_training_data/episode2


Casting the dataset: 100%|██████████| 101/101 [00:00<00:00, 35598.71 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1300.16ba/s]


Processing episode 3 in folder data/piper_training_data/episode3


Casting the dataset: 100%|██████████| 137/137 [00:00<00:00, 43867.44 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1232.89ba/s]


Processing episode 4 in folder data/piper_training_data/episode4


Casting the dataset: 100%|██████████| 108/108 [00:00<00:00, 35281.94 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1254.65ba/s]


Processing episode 5 in folder data/piper_training_data/episode5


Casting the dataset: 100%|██████████| 112/112 [00:00<00:00, 37835.22 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1623.81ba/s]


Processing episode 6 in folder data/piper_training_data/episode6


Casting the dataset: 100%|██████████| 112/112 [00:00<00:00, 39830.60 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1264.11ba/s]


Processing episode 7 in folder data/piper_training_data/episode7


Casting the dataset: 100%|██████████| 129/129 [00:00<00:00, 40912.30 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1234.71ba/s]


Processing episode 8 in folder data/piper_training_data/episode8


Casting the dataset: 100%|██████████| 124/124 [00:00<00:00, 41501.25 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1020.51ba/s]


Processing episode 9 in folder data/piper_training_data/episode9


Casting the dataset: 100%|██████████| 208/208 [00:00<00:00, 87794.63 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1294.94ba/s]


Processing episode 10 in folder data/piper_training_data/episode10


Casting the dataset: 100%|██████████| 106/106 [00:00<00:00, 35593.33 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1276.03ba/s]


Processing episode 11 in folder data/piper_training_data/episode11


Casting the dataset: 100%|██████████| 88/88 [00:00<00:00, 26588.30 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1636.48ba/s]


Processing episode 12 in folder data/piper_training_data/episode12


Casting the dataset: 100%|██████████| 95/95 [00:00<00:00, 29027.38 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1490.51ba/s]


Processing episode 13 in folder data/piper_training_data/episode13


Casting the dataset: 100%|██████████| 93/93 [00:00<00:00, 32136.29 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1228.56ba/s]


Processing episode 14 in folder data/piper_training_data/episode14


Casting the dataset: 100%|██████████| 95/95 [00:00<00:00, 30294.14 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1153.23ba/s]


Processing episode 15 in folder data/piper_training_data/episode15


Casting the dataset: 100%|██████████| 85/85 [00:00<00:00, 29129.49 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 962.44ba/s]


Processing episode 16 in folder data/piper_training_data/episode16


Casting the dataset: 100%|██████████| 85/85 [00:00<00:00, 32478.44 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1201.81ba/s]


Processing episode 17 in folder data/piper_training_data/episode17


Casting the dataset: 100%|██████████| 99/99 [00:00<00:00, 39273.25 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1483.13ba/s]


Processing episode 18 in folder data/piper_training_data/episode18


Casting the dataset: 100%|██████████| 108/108 [00:00<00:00, 39819.34 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 871.82ba/s]


Processing episode 19 in folder data/piper_training_data/episode19


Casting the dataset: 100%|██████████| 103/103 [00:00<00:00, 30318.85 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1506.03ba/s]


Processing episode 20 in folder data/piper_training_data/episode20


Casting the dataset: 100%|██████████| 89/89 [00:00<00:00, 31831.93 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1381.07ba/s]


Processing episode 21 in folder data/piper_training_data/episode21


Casting the dataset: 100%|██████████| 106/106 [00:00<00:00, 31075.43 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1129.93ba/s]


Processing episode 22 in folder data/piper_training_data/episode22


Casting the dataset: 100%|██████████| 105/105 [00:00<00:00, 32487.60 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1140.38ba/s]


Processing episode 23 in folder data/piper_training_data/episode23


Casting the dataset: 100%|██████████| 117/117 [00:00<00:00, 39224.17 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1128.41ba/s]


Processing episode 24 in folder data/piper_training_data/episode24


Casting the dataset: 100%|██████████| 105/105 [00:00<00:00, 32052.54 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1430.53ba/s]


Processing episode 25 in folder data/piper_training_data/episode25


Casting the dataset: 100%|██████████| 135/135 [00:00<00:00, 41668.34 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1008.00ba/s]


Processing episode 26 in folder data/piper_training_data/episode26


Casting the dataset: 100%|██████████| 101/101 [00:00<00:00, 37462.39 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1111.07ba/s]


Processing episode 27 in folder data/piper_training_data/episode27


Casting the dataset: 100%|██████████| 121/121 [00:00<00:00, 40701.80 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1268.69ba/s]


Processing episode 28 in folder data/piper_training_data/episode28


Casting the dataset: 100%|██████████| 110/110 [00:00<00:00, 34002.02 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1161.21ba/s]


Processing episode 29 in folder data/piper_training_data/episode29


Casting the dataset: 100%|██████████| 95/95 [00:00<00:00, 34456.84 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1206.30ba/s]


Processing episode 30 in folder data/piper_training_data/episode30


Casting the dataset: 100%|██████████| 94/94 [00:00<00:00, 29047.71 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1406.07ba/s]


Processing episode 31 in folder data/piper_training_data/episode31


Casting the dataset: 100%|██████████| 95/95 [00:00<00:00, 38692.84 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1366.22ba/s]


Processing episode 32 in folder data/piper_training_data/episode32


Casting the dataset: 100%|██████████| 122/122 [00:00<00:00, 38610.51 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1044.92ba/s]


Processing episode 33 in folder data/piper_training_data/episode33


Casting the dataset: 100%|██████████| 107/107 [00:00<00:00, 40135.09 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1035.37ba/s]


Processing episode 34 in folder data/piper_training_data/episode34


Casting the dataset: 100%|██████████| 92/92 [00:00<00:00, 30302.81 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1494.76ba/s]


Processing episode 35 in folder data/piper_training_data/episode35


Casting the dataset: 100%|██████████| 94/94 [00:00<00:00, 28415.46 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1257.66ba/s]


Processing episode 36 in folder data/piper_training_data/episode36


Casting the dataset: 100%|██████████| 96/96 [00:00<00:00, 31573.21 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1524.09ba/s]


Processing episode 37 in folder data/piper_training_data/episode37


Casting the dataset: 100%|██████████| 87/87 [00:00<00:00, 28208.45 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1546.00ba/s]


Processing episode 38 in folder data/piper_training_data/episode38


Casting the dataset: 100%|██████████| 80/80 [00:00<00:00, 28460.08 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1644.83ba/s]


Processing episode 39 in folder data/piper_training_data/episode39


Casting the dataset: 100%|██████████| 113/113 [00:00<00:00, 41167.06 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1533.57ba/s]


Processing episode 40 in folder data/piper_training_data/episode40


Casting the dataset: 100%|██████████| 86/86 [00:00<00:00, 29737.03 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1607.63ba/s]


Processing episode 41 in folder data/piper_training_data/episode41


Casting the dataset: 100%|██████████| 98/98 [00:00<00:00, 37445.73 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1628.22ba/s]


Processing episode 42 in folder data/piper_training_data/episode42


Casting the dataset: 100%|██████████| 74/74 [00:00<00:00, 26294.35 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1274.09ba/s]


Processing episode 43 in folder data/piper_training_data/episode43


Casting the dataset: 100%|██████████| 97/97 [00:00<00:00, 32581.68 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1107.26ba/s]


Processing episode 44 in folder data/piper_training_data/episode44


Casting the dataset: 100%|██████████| 84/84 [00:00<00:00, 29187.44 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1531.33ba/s]


Processing episode 45 in folder data/piper_training_data/episode45


Casting the dataset: 100%|██████████| 90/90 [00:00<00:00, 34106.19 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1534.13ba/s]


Processing episode 46 in folder data/piper_training_data/episode46


Casting the dataset: 100%|██████████| 116/116 [00:00<00:00, 36744.90 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1356.94ba/s]


Processing episode 47 in folder data/piper_training_data/episode47


Casting the dataset: 100%|██████████| 94/94 [00:00<00:00, 38939.71 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1544.86ba/s]


Processing episode 48 in folder data/piper_training_data/episode48


Casting the dataset: 100%|██████████| 108/108 [00:00<00:00, 34682.25 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1218.21ba/s]


Processing episode 49 in folder data/piper_training_data/episode49


Casting the dataset: 100%|██████████| 84/84 [00:00<00:00, 27779.04 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1266.01ba/s]


Processing episode 50 in folder data/piper_training_data/episode50


Casting the dataset: 100%|██████████| 107/107 [00:00<00:00, 32162.14 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1683.78ba/s]


Processing episode 51 in folder data/piper_training_data/episode51


Casting the dataset: 100%|██████████| 85/85 [00:00<00:00, 28668.05 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1416.52ba/s]


Processing episode 52 in folder data/piper_training_data/episode52


Casting the dataset: 100%|██████████| 78/78 [00:00<00:00, 27445.95 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1113.73ba/s]


Processing episode 53 in folder data/piper_training_data/episode53


Casting the dataset: 100%|██████████| 121/121 [00:00<00:00, 46398.86 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1312.77ba/s]


Processing episode 54 in folder data/piper_training_data/episode54


Casting the dataset: 100%|██████████| 132/132 [00:00<00:00, 40904.92 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1723.92ba/s]


Processing episode 55 in folder data/piper_training_data/episode55


Casting the dataset: 100%|██████████| 97/97 [00:00<00:00, 41438.94 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1567.96ba/s]


Processing episode 56 in folder data/piper_training_data/episode56


Casting the dataset: 100%|██████████| 102/102 [00:00<00:00, 39289.10 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1510.37ba/s]


Processing episode 57 in folder data/piper_training_data/episode57


Casting the dataset: 100%|██████████| 101/101 [00:00<00:00, 33519.92 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 875.64ba/s]


Processing episode 58 in folder data/piper_training_data/episode58


Casting the dataset: 100%|██████████| 125/125 [00:00<00:00, 41033.73 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1495.30ba/s]


Processing episode 59 in folder data/piper_training_data/episode59


Casting the dataset: 100%|██████████| 67/67 [00:00<00:00, 24676.71 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1576.81ba/s]


Processing episode 60 in folder data/piper_training_data/episode60


Casting the dataset: 100%|██████████| 83/83 [00:00<00:00, 28617.12 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1468.59ba/s]


Processing episode 61 in folder data/piper_training_data/episode61


Casting the dataset: 100%|██████████| 132/132 [00:00<00:00, 52112.96 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1548.28ba/s]


Processing episode 62 in folder data/piper_training_data/episode62


Casting the dataset: 100%|██████████| 90/90 [00:00<00:00, 36317.81 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1515.28ba/s]


Processing episode 63 in folder data/piper_training_data/episode63


Casting the dataset: 100%|██████████| 92/92 [00:00<00:00, 35453.51 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1329.41ba/s]


Processing episode 64 in folder data/piper_training_data/episode64


Casting the dataset: 100%|██████████| 153/153 [00:00<00:00, 53705.62 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1545.43ba/s]


Processing episode 65 in folder data/piper_training_data/episode65


Casting the dataset: 100%|██████████| 114/114 [00:00<00:00, 44724.60 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1683.78ba/s]


Processing episode 66 in folder data/piper_training_data/episode66


Casting the dataset: 100%|██████████| 83/83 [00:00<00:00, 31714.24 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1447.31ba/s]


Processing episode 67 in folder data/piper_training_data/episode67


Casting the dataset: 100%|██████████| 110/110 [00:00<00:00, 42161.51 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1514.74ba/s]


Processing episode 68 in folder data/piper_training_data/episode68


Casting the dataset: 100%|██████████| 84/84 [00:00<00:00, 30435.52 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1672.37ba/s]


Processing episode 69 in folder data/piper_training_data/episode69


Casting the dataset: 100%|██████████| 97/97 [00:00<00:00, 35886.70 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1521.88ba/s]


Processing episode 70 in folder data/piper_training_data/episode70


Casting the dataset: 100%|██████████| 92/92 [00:00<00:00, 35330.16 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1477.91ba/s]


Processing episode 71 in folder data/piper_training_data/episode71


Casting the dataset: 100%|██████████| 86/86 [00:00<00:00, 32935.55 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1570.31ba/s]


Processing episode 72 in folder data/piper_training_data/episode72


Casting the dataset: 100%|██████████| 84/84 [00:00<00:00, 30791.95 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1342.18ba/s]


Processing episode 73 in folder data/piper_training_data/episode73


Casting the dataset: 100%|██████████| 78/78 [00:00<00:00, 29093.44 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1555.17ba/s]


Processing episode 74 in folder data/piper_training_data/episode74


Casting the dataset: 100%|██████████| 89/89 [00:00<00:00, 33149.19 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1548.28ba/s]


Processing episode 75 in folder data/piper_training_data/episode75


Casting the dataset: 100%|██████████| 98/98 [00:00<00:00, 33423.47 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1486.29ba/s]


Processing episode 76 in folder data/piper_training_data/episode76


Casting the dataset: 100%|██████████| 111/111 [00:00<00:00, 44184.09 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1789.38ba/s]


Processing episode 77 in folder data/piper_training_data/episode77


Casting the dataset: 100%|██████████| 82/82 [00:00<00:00, 27055.77 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1392.53ba/s]


Processing episode 78 in folder data/piper_training_data/episode78


Casting the dataset: 100%|██████████| 101/101 [00:00<00:00, 32239.32 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1739.65ba/s]


Processing episode 79 in folder data/piper_training_data/episode79


Casting the dataset: 100%|██████████| 87/87 [00:00<00:00, 35110.60 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1757.14ba/s]


Processing episode 80 in folder data/piper_training_data/episode80


Casting the dataset: 100%|██████████| 93/93 [00:00<00:00, 34192.70 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1300.16ba/s]


Processing episode 81 in folder data/piper_training_data/episode81


Casting the dataset: 100%|██████████| 91/91 [00:00<00:00, 33480.85 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1538.63ba/s]


Processing episode 82 in folder data/piper_training_data/episode82


Casting the dataset: 100%|██████████| 94/94 [00:00<00:00, 32672.96 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1678.39ba/s]


Processing episode 83 in folder data/piper_training_data/episode83


Casting the dataset: 100%|██████████| 85/85 [00:00<00:00, 34139.22 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1548.28ba/s]


Processing episode 84 in folder data/piper_training_data/episode84


Casting the dataset: 100%|██████████| 81/81 [00:00<00:00, 30623.64 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1631.39ba/s]


Processing episode 85 in folder data/piper_training_data/episode85


Casting the dataset: 100%|██████████| 94/94 [00:00<00:00, 34548.25 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1592.37ba/s]


Processing episode 86 in folder data/piper_training_data/episode86


Casting the dataset: 100%|██████████| 124/124 [00:00<00:00, 48839.67 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1468.59ba/s]


Processing episode 87 in folder data/piper_training_data/episode87


Casting the dataset: 100%|██████████| 120/120 [00:00<00:00, 50382.03 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1446.81ba/s]


Processing episode 88 in folder data/piper_training_data/episode88


Casting the dataset: 100%|██████████| 126/126 [00:00<00:00, 49184.02 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1615.06ba/s]


Processing episode 89 in folder data/piper_training_data/episode89


Casting the dataset: 100%|██████████| 103/103 [00:00<00:00, 40049.44 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1707.08ba/s]


Processing episode 90 in folder data/piper_training_data/episode90


Casting the dataset: 100%|██████████| 87/87 [00:00<00:00, 33067.92 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1691.25ba/s]


Processing episode 91 in folder data/piper_training_data/episode91


Casting the dataset: 100%|██████████| 114/114 [00:00<00:00, 47896.49 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1508.20ba/s]


Processing episode 92 in folder data/piper_training_data/episode92


Casting the dataset: 100%|██████████| 113/113 [00:00<00:00, 43763.28 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1557.48ba/s]


Processing episode 93 in folder data/piper_training_data/episode93


Casting the dataset: 100%|██████████| 133/133 [00:00<00:00, 58486.31 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1381.52ba/s]


Processing episode 94 in folder data/piper_training_data/episode94


Casting the dataset: 100%|██████████| 121/121 [00:00<00:00, 39961.48 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1663.75ba/s]


Processing episode 95 in folder data/piper_training_data/episode95


Casting the dataset: 100%|██████████| 89/89 [00:00<00:00, 26696.21 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1096.55ba/s]


Processing episode 96 in folder data/piper_training_data/episode96


Casting the dataset: 100%|██████████| 125/125 [00:00<00:00, 48861.88 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1785.57ba/s]


Processing episode 97 in folder data/piper_training_data/episode97


Casting the dataset: 100%|██████████| 126/126 [00:00<00:00, 52021.09 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1572.08ba/s]


Processing episode 98 in folder data/piper_training_data/episode98


Casting the dataset: 100%|██████████| 108/108 [00:00<00:00, 33551.95 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1312.77ba/s]


Processing episode 99 in folder data/piper_training_data/episode99


Casting the dataset: 100%|██████████| 87/87 [00:00<00:00, 26252.12 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1398.57ba/s]


Processing episode 100 in folder data/piper_training_data/episode100


Casting the dataset: 100%|██████████| 91/91 [00:00<00:00, 24998.80 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1353.00ba/s]


Processing episode 101 in folder data/piper_training_data/episode101


Casting the dataset: 100%|██████████| 102/102 [00:00<00:00, 34179.04 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1529.09ba/s]


Processing episode 102 in folder data/piper_training_data/episode102


Casting the dataset: 100%|██████████| 110/110 [00:00<00:00, 35575.10 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1550.00ba/s]


Processing episode 103 in folder data/piper_training_data/episode103


Casting the dataset: 100%|██████████| 98/98 [00:00<00:00, 30368.81 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1487.87ba/s]


Processing episode 104 in folder data/piper_training_data/episode104


Casting the dataset: 100%|██████████| 89/89 [00:00<00:00, 28495.65 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1197.69ba/s]


Processing episode 105 in folder data/piper_training_data/episode105


Casting the dataset: 100%|██████████| 77/77 [00:00<00:00, 26552.78 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1780.26ba/s]


Processing episode 106 in folder data/piper_training_data/episode106


Casting the dataset: 100%|██████████| 95/95 [00:00<00:00, 29266.17 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 951.31ba/s]


Processing episode 107 in folder data/piper_training_data/episode107


Casting the dataset: 100%|██████████| 90/90 [00:00<00:00, 27312.59 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1133.60ba/s]


Processing episode 108 in folder data/piper_training_data/episode108


Casting the dataset: 100%|██████████| 98/98 [00:00<00:00, 32045.04 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1418.43ba/s]


Processing episode 109 in folder data/piper_training_data/episode109


Casting the dataset: 100%|██████████| 116/116 [00:00<00:00, 35539.76 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1463.98ba/s]


Processing episode 110 in folder data/piper_training_data/episode110


Casting the dataset: 100%|██████████| 119/119 [00:00<00:00, 36102.87 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1533.57ba/s]


Processing episode 111 in folder data/piper_training_data/episode111


Casting the dataset: 100%|██████████| 94/94 [00:00<00:00, 40292.75 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1644.83ba/s]


Processing episode 112 in folder data/piper_training_data/episode112


Casting the dataset: 100%|██████████| 91/91 [00:00<00:00, 34707.80 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1412.22ba/s]


Processing episode 113 in folder data/piper_training_data/episode113


Casting the dataset: 100%|██████████| 104/104 [00:00<00:00, 35121.39 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1111.37ba/s]


Processing episode 114 in folder data/piper_training_data/episode114


Casting the dataset: 100%|██████████| 113/113 [00:00<00:00, 41898.55 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1531.33ba/s]


Processing episode 115 in folder data/piper_training_data/episode115


Casting the dataset: 100%|██████████| 88/88 [00:00<00:00, 34482.32 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1730.32ba/s]


Processing episode 116 in folder data/piper_training_data/episode116


Casting the dataset: 100%|██████████| 86/86 [00:00<00:00, 33632.65 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1667.05ba/s]


Processing episode 117 in folder data/piper_training_data/episode117


Casting the dataset: 100%|██████████| 105/105 [00:00<00:00, 41614.09 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1510.37ba/s]


Processing episode 118 in folder data/piper_training_data/episode118


Casting the dataset: 100%|██████████| 128/128 [00:00<00:00, 49922.90 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1439.36ba/s]


Processing episode 119 in folder data/piper_training_data/episode119


Casting the dataset: 100%|██████████| 115/115 [00:00<00:00, 34145.90 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1013.36ba/s]


Processing episode 120 in folder data/piper_training_data/episode120


Casting the dataset: 100%|██████████| 120/120 [00:00<00:00, 39444.87 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1373.38ba/s]


Processing episode 121 in folder data/piper_training_data/episode121


Casting the dataset: 100%|██████████| 120/120 [00:00<00:00, 42751.76 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1432.97ba/s]


Processing episode 122 in folder data/piper_training_data/episode122


Casting the dataset: 100%|██████████| 100/100 [00:00<00:00, 31326.49 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1565.04ba/s]


Processing episode 123 in folder data/piper_training_data/episode123


Casting the dataset: 100%|██████████| 112/112 [00:00<00:00, 41461.79 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1591.77ba/s]


Processing episode 124 in folder data/piper_training_data/episode124


Casting the dataset: 100%|██████████| 100/100 [00:00<00:00, 35968.65 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1219.27ba/s]


Processing episode 125 in folder data/piper_training_data/episode125


Casting the dataset: 100%|██████████| 84/84 [00:00<00:00, 24388.86 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1089.43ba/s]


Processing episode 126 in folder data/piper_training_data/episode126


Casting the dataset: 100%|██████████| 105/105 [00:00<00:00, 31075.50 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1370.24ba/s]


Processing episode 127 in folder data/piper_training_data/episode127


Casting the dataset: 100%|██████████| 60/60 [00:00<00:00, 20298.29 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1334.92ba/s]


Processing episode 128 in folder data/piper_training_data/episode128


Casting the dataset: 100%|██████████| 112/112 [00:00<00:00, 36554.51 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1360.46ba/s]


Processing episode 129 in folder data/piper_training_data/episode129


Casting the dataset: 100%|██████████| 100/100 [00:00<00:00, 29706.81 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1499.57ba/s]


Processing episode 130 in folder data/piper_training_data/episode130


Casting the dataset: 100%|██████████| 81/81 [00:00<00:00, 29581.07 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1546.00ba/s]


Processing episode 131 in folder data/piper_training_data/episode131


Casting the dataset: 100%|██████████| 76/76 [00:00<00:00, 24374.30 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1575.03ba/s]


Processing episode 132 in folder data/piper_training_data/episode132


Casting the dataset: 100%|██████████| 75/75 [00:00<00:00, 24662.70 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1200.43ba/s]


Processing episode 133 in folder data/piper_training_data/episode133


Casting the dataset: 100%|██████████| 91/91 [00:00<00:00, 27317.61 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1227.12ba/s]


Processing episode 134 in folder data/piper_training_data/episode134


Casting the dataset: 100%|██████████| 76/76 [00:00<00:00, 25282.92 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1499.04ba/s]


Processing episode 135 in folder data/piper_training_data/episode135


Casting the dataset: 100%|██████████| 76/76 [00:00<00:00, 28648.07 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1249.79ba/s]


Processing episode 136 in folder data/piper_training_data/episode136


Casting the dataset: 100%|██████████| 100/100 [00:00<00:00, 32415.98 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1470.65ba/s]


Processing episode 137 in folder data/piper_training_data/episode137


Casting the dataset: 100%|██████████| 79/79 [00:00<00:00, 24610.07 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1333.22ba/s]


Processing episode 138 in folder data/piper_training_data/episode138


Casting the dataset: 100%|██████████| 95/95 [00:00<00:00, 32442.51 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1219.27ba/s]


Processing episode 139 in folder data/piper_training_data/episode139


Casting the dataset: 100%|██████████| 79/79 [00:00<00:00, 28562.19 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1449.31ba/s]


Processing episode 140 in folder data/piper_training_data/episode140


Casting the dataset: 100%|██████████| 111/111 [00:00<00:00, 40277.51 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1721.80ba/s]


Processing episode 141 in folder data/piper_training_data/episode141


Casting the dataset: 100%|██████████| 101/101 [00:00<00:00, 38857.52 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1533.57ba/s]


Processing episode 142 in folder data/piper_training_data/episode142


Casting the dataset: 100%|██████████| 82/82 [00:00<00:00, 33718.91 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1625.70ba/s]


Processing episode 143 in folder data/piper_training_data/episode143


Casting the dataset: 100%|██████████| 75/75 [00:00<00:00, 29265.31 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1639.68ba/s]


Processing episode 144 in folder data/piper_training_data/episode144


Casting the dataset: 100%|██████████| 107/107 [00:00<00:00, 40636.59 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1669.71ba/s]


Processing episode 145 in folder data/piper_training_data/episode145


Casting the dataset: 100%|██████████| 126/126 [00:00<00:00, 45799.66 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1404.66ba/s]


Processing episode 146 in folder data/piper_training_data/episode146


Casting the dataset: 100%|██████████| 85/85 [00:00<00:00, 33928.04 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1550.57ba/s]


Processing episode 147 in folder data/piper_training_data/episode147


Casting the dataset: 100%|██████████| 109/109 [00:00<00:00, 41824.09 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1642.25ba/s]


Processing episode 148 in folder data/piper_training_data/episode148


Casting the dataset: 100%|██████████| 120/120 [00:00<00:00, 46620.64 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1163.79ba/s]


Processing episode 149 in folder data/piper_training_data/episode149


Casting the dataset: 100%|██████████| 125/125 [00:00<00:00, 48770.98 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1404.66ba/s]


Processing episode 150 in folder data/piper_training_data/episode150


Casting the dataset: 100%|██████████| 70/70 [00:00<00:00, 25008.63 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1757.14ba/s]


Processing episode 151 in folder data/piper_training_data/episode151


Casting the dataset: 100%|██████████| 103/103 [00:00<00:00, 40232.20 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1394.38ba/s]


Processing episode 152 in folder data/piper_training_data/episode152


Casting the dataset: 100%|██████████| 82/82 [00:00<00:00, 31669.70 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1381.07ba/s]


Processing episode 153 in folder data/piper_training_data/episode153


Casting the dataset: 100%|██████████| 77/77 [00:00<00:00, 28071.40 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1384.72ba/s]


Processing episode 154 in folder data/piper_training_data/episode154


Casting the dataset: 100%|██████████| 83/83 [00:00<00:00, 17555.58 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 927.74ba/s]


Processing episode 155 in folder data/piper_training_data/episode155


Casting the dataset: 100%|██████████| 106/106 [00:00<00:00, 44006.36 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1396.70ba/s]


Processing episode 156 in folder data/piper_training_data/episode156


Casting the dataset: 100%|██████████| 86/86 [00:00<00:00, 32040.34 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1032.06ba/s]


Processing episode 157 in folder data/piper_training_data/episode157


Casting the dataset: 100%|██████████| 83/83 [00:00<00:00, 33004.10 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1903.91ba/s]


Processing episode 158 in folder data/piper_training_data/episode158


Casting the dataset: 100%|██████████| 89/89 [00:00<00:00, 38161.22 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1545.43ba/s]


Processing episode 159 in folder data/piper_training_data/episode159


Casting the dataset: 100%|██████████| 86/86 [00:00<00:00, 33423.85 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1818.87ba/s]


Processing episode 160 in folder data/piper_training_data/episode160


Casting the dataset: 100%|██████████| 94/94 [00:00<00:00, 37452.70 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1620.67ba/s]


Processing episode 161 in folder data/piper_training_data/episode161


Casting the dataset: 100%|██████████| 74/74 [00:00<00:00, 30453.15 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1572.08ba/s]


Processing episode 162 in folder data/piper_training_data/episode162


Casting the dataset: 100%|██████████| 66/66 [00:00<00:00, 20853.04 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1227.48ba/s]


Processing episode 163 in folder data/piper_training_data/episode163


Casting the dataset: 100%|██████████| 91/91 [00:00<00:00, 35311.47 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1513.10ba/s]


Processing episode 164 in folder data/piper_training_data/episode164


Casting the dataset: 100%|██████████| 73/73 [00:00<00:00, 30094.77 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1608.25ba/s]
  hist, _ = np.histogram(batch[:, i], bins=self._bin_edges[i])
  hist, _ = np.histogram(batch[:, i], bins=self._bin_edges[i])
  hist, _ = np.histogram(batch[:, i], bins=self._bin_edges[i])
  hist, _ = np.histogram(batch[:, i], bins=self._bin_edges[i])
  hist, _ = np.histogram(batch[:, i], bins=self._bin_edges[i])
  hist, _ = np.histogram(batch[:, i], bins=self._bin_edges[i])
  hist, _ = np.histogram(batch[:, i], bins=self._bin_edges[i])
  hist, _ = np.histogram(batch[:, i], bins=self._bin_edges[i])
  hist, _ = np.histogram(batch[:, i], bins=self._bin_edges[i])
  hist, _ = np.histogram(batch[:, i], bins=self._bin_edges[i])
  hist, _ = np.histogram(batch[:, i], bins=self._bin_edges[i])
  hist, _ = np.histogram(batch[:, i], bins=self._bin_edges[i])
  hist, _ = np.histogram(batch[:, i], bins=self._bin_edges[i])
  hist, _ = np.his

In [None]:
!huggingface-cli login

In [None]:
from huggingface_hub import HfApi
import os


!hf upload \
  'ISDept/piper_arm' \
  /Users/eddyma/DEV/Github/lerobot-piper/src/output \
  --repo-type dataset

## Inference

### Webcam inference

In [None]:
import sys
from pathlib import Path
sys.path.append(str(Path().resolve().parent))

!python webcam_inference.py

### Video Inference - Diffusion

In [None]:
!python video_inference.py

In [None]:
!python video_inference_close_loop.py

### Video inference - SmolVLA

In [None]:
!python video_inference_smolvla.py

In [None]:
!python video_inference_smolvla_close_loop.py

## Visualize

In [None]:
!python extract_joint_positions.py

import json
import matplotlib.pyplot as plt
import numpy as np

def plot_joint_positions(json_file_path, title, start_frame_index=0):
    """
    Plots joint positions from a JSON file, starting from a specified frame index.
    Handles both inference results format and joint positions format.

    Parameters:
    json_file_path (str): Path to the JSON file.
    title (str): Title for the plot.
    start_frame_index (int): The frame index from which to start plotting. Defaults to 0 (the beginning).
    """
    # Read the JSON file
    with open(json_file_path, 'r') as f:
        data = json.load(f)
    
    # Check if this is inference results format (list of objects with 'result' key)
    # or joint positions format (list of arrays)
    if isinstance(data, list) and len(data) > 0:
        # This is joint positions format (list of arrays)
        # For this format, we'll just plot all data starting from start_frame_index
        if start_frame_index >= len(data):
            print(f"No data found starting from frame index {start_frame_index}.")
            return
        
        # Extract joint positions from start_frame_index onward
        filtered_data = data[start_frame_index:]
        frame_indices = list(range(start_frame_index, start_frame_index + len(filtered_data)))
        print(f"Frames plotted: {len(frame_indices)} (from index {min(frame_indices)} to {max(frame_indices)})")
        
        # Initialize lists for each joint
        joints = [[] for _ in range(7)]  # 6 joints + 1 gripper
        
        # Extract joint positions for each frame in the filtered data
        for action in filtered_data:
            for i in range(7):  # 6 joints + 1 gripper
                joints[i].append(action[i])

    
    # Create the plot
    plt.figure(figsize=(12, 4)) # Slightly larger figure for clarity
    
    # Joint names
    joint_names = ['Joint 1', 'Joint 2', 'Joint 3', 'Joint 4', 'Joint 5', 'Joint 6', 'Gripper']
    
    # Plot each joint with a different color
    for i in range(7):
        plt.plot(frame_indices, joints[i], label=joint_names[i], marker='o', markersize=3, linewidth=1.5)
    
    # Add labels and title
    plt.xlabel('Frame Index')
    plt.ylabel('Joint Position')
    plt.title(f"{title} (Starting from frame {start_frame_index})")
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left') # Legend outside plot
    plt.grid(True, alpha=0.3)
    
    # Show the plot
    plt.tight_layout()
    plt.show()

plot_joint_positions('temp/inference_actions.json', 'Predicted Joint Positions Training')
plot_joint_positions('temp/data_20251128_095915_gt.json', 'Ground Truth Joint Positions')
plot_joint_positions('temp/inference_actions_close_loop.json', 'Predicted Joint Positions Closed Loop')

In [None]:
from pathlib import Path
from lerobot.policies.diffusion.modeling_diffusion import DiffusionPolicy
output_directory = Path("outputs/eval/example_pusht_diffusion")
# Comment out the old pretrained model path
 # pretrained_policy_path = "lerobot/diffusion_pusht"
# Use your newly trained model path instead
pretrained_policy_path = Path("outputs/train/example_pusht_diffusion")
policy = DiffusionPolicy.from_pretrained("ISdept/piper_arm")

print(policy.config)

In [None]:
# Updated plotting functionality using the new plotting utility
import sys
from pathlib import Path
sys.path.append(str(Path().resolve()))

from plotting_utils import plot_joint_positions

# Plot the data using the improved function that handles both file formats
plot_joint_positions('temp/inference_actions.json', 'Predicted Joint Positions - Episode 1')
plot_joint_positions('temp/metadata_20251113_080958_gt.json', 'Ground Truth Joint Positions')

In [None]:
!python inspect_local_parquet.py

In [None]:
!lerobot-train \
  --dataset.repo_id=ISdept/piper_arm \
  --policy.type=diffusion \
  --output_dir=outputs/train/output_diff3 \
  --job_name=pick_and_place \
  --policy.device=cuda \
  --policy.repo_id=ISdept/piper_arm \
  --wandb.enable=false \
  --dataset.revision="main" \
  --dataset.image_transforms.enable=True \
  --policy.use_separate_rgb_encoder_per_camera=True \
  --policy.crop_shape=[400,400] \
  --save_checkpoint=True \
  --save_freq=2000 \
  \
  --steps=25000 \
  --policy.n_obs_steps=10 \
  --policy.horizon=24 \
  --batch_size=3 \
  --num_workers=4

In [None]:
!lerobot-eval \
  --policy.repo_id="ISdept/piper_arm" \
  --policy.type="diffusion" \
  --policy.device="mps" \
  --env.type="aloha" \
  --eval.n_episodes=10 \
  --output_dir="outputs/inference/piper_arm_eval" \
  --job_name="piper_arm_diffusion_eval"

In [None]:
!lerobot-dataset-viz \
    --repo-id ISdept/piper_arm \
    --episode-index 001

In [37]:
import torch
(torch.rand(1).item() - 0.5) * 0.01

base = torch.tensor([1.0, 2.0, 3.0])

torch.randn_like(base)


tensor([ 0.0199, -0.3549, -2.1588])

In [None]:
frame_time = 0.1
[i * frame_time for i in range(24)]