In [None]:
import os
import json
import zipfile
import pickle
import numpy as np
from tempfile import TemporaryDirectory

def process_json_from_zip(zip_path, output_path, nested_folder=None):
    """
    Process JSON files within a ZIP archive to restructure the data by parts.

    Parameters:
        zip_path (str): Path to the ZIP file containing JSON files.
        output_path (str): Path to save the restructured dataset (pickle file).
        nested_folder (str): Optional. Path inside the ZIP to focus on (e.g., '01_crowd_keypoint/01/NIA_SL_FS0001_CROWD01_F/').
    """
    all_frames = {
        'pose': [],
        'face': [],
        'hand_left': [],
        'hand_right': []
    }

    # Create a temporary directory to extract files
    with TemporaryDirectory() as temp_dir:
        # Extract all files from the ZIP archive
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall(temp_dir)
        
        # Determine the directory to process
        target_dir = os.path.join(temp_dir, nested_folder) if nested_folder else temp_dir

        # Get all JSON files in the target directory
        json_files = [os.path.join(target_dir, f) for f in os.listdir(target_dir) if f.endswith('.json')]

        # Sort files to maintain order
        json_files.sort()

        for json_file in json_files:
            # Load JSON data
            with open(json_file, 'r') as f:
                data = json.load(f)

            # Extract key points
            pose = np.array(data['people']['pose_keypoints_2d']).reshape(-1, 3)
            face = np.array(data['people']['face_keypoints_2d']).reshape(-1, 3)
            hand_left = np.array(data['people']['hand_left_keypoints_2d']).reshape(-1, 3)
            hand_right = np.array(data['people']['hand_right_keypoints_2d']).reshape(-1, 3)

            # Append each part to the corresponding list
            all_frames['pose'].append(pose)
            all_frames['face'].append(face)
            all_frames['hand_left'].append(hand_left)
            all_frames['hand_right'].append(hand_right)

    # Convert lists to numpy arrays
    for part in all_frames:
        all_frames[part] = np.stack(all_frames[part], axis=0)  # (frames, keypoints, 3)

    # Save as pickle
    with open(output_path, 'wb') as f:
        pickle.dump(all_frames, f)

    print(f"Restructured dataset saved to: {output_path}")


# Example usage
zip_file_path = r"/nas/Dataset/수어 영상/1.Training/[라벨]01_crowd_keypoint.zip"
nested_folder = "01_crowd_keypoint/01/NIA_SL_FS0001_CROWD01_F"  # Optional: Focus on this folder
output_pickle = "/nas/Chingiz/sing_language/newDATA_points/restructured_by_parts_NIA_SL_FS0001_CROWD01_F.pkl"

process_json_from_zip(zip_file_path, output_pickle, nested_folder)


Restructured dataset saved to: /nas/Chingiz/sing_language/newDATA_points/restructured_by_parts.pkl
