# Make image cuts file

In [None]:
from lhotse.cut import MonoCut, CutSet
from pathlib import Path
from pprint import pprint
from typing import Union
import numpy as np
from lhotse import fastcopy


def attach_image(
    cut, key: str, path_or_object: Union[str, np.ndarray, bytes]
):
    from lhotse.image.image import Image
    from lhotse.image.io import PillowInMemoryWriter

    # Handle different types of input
    if isinstance(path_or_object, (str, Path)):
        # It's a path, directly reference the file without writing anything
        # Get the dimensions by opening the image
        import PIL.Image as PILImage

        with PILImage.open(path_or_object) as img:
            width, height = img.size

        # Create an Image manifest pointing to the original file
        # We'll use the original file extension to determine the file name in storage_key
        path = Path(path_or_object)
        storage_key = str(path.name)
        # Use the parent directory as storage_path
        storage_path = str(path.parent)

        image_manifest = Image(
            storage_type="pillow_files",
            storage_path=storage_path,
            storage_key=storage_key,
            width=width,
            height=height,
        )
    else:
        # For numpy arrays or bytes, use in-memory writer
        writer = PillowInMemoryWriter()
        with writer:
            image_manifest = writer.store_image(key, path_or_object)

    cut.custom[key] = image_manifest
    return cut



def make_image_cuts_file(image_dir):
    """Create a cuts file with image cuts."""

    cuts = []
    for image_file in sorted(Path(image_dir).glob("*/*.jpg")):
        cut = MonoCut(
            id=image_file.stem,
            start=0.0,
            duration=1.0,  # Assuming each image represents a 1-second cut
            channel=0,
            supervisions=[],
            custom={"image_text": "text prompt for image"},
        )
        # Attach the image using its path. Lhotse will handle creating the Image manifest internally.

        try:
            cut = cut.attach_image(key="image", path_or_object=image_file)
        except Exception as e:
            cut = attach_image(
                cut=cut,
                key="image",
                path_or_object=image_file,
            )


        cuts.append(cut)

        if len(cuts) % 100 == 0:
            print(f"\nProcessed {len(cuts)} images...")
            pprint(cut.to_dict())
        

    cutset = CutSet.from_cuts(cuts)
    cutset.to_jsonl("image_cuts.jsonl.gz")
    print(f"Created cuts file with {len(cuts)} image cuts at 'image_cuts.jsonl.gz'.")


image_dir = "/Users/feiteng/Downloads/testdata/images"
make_image_cuts_file(image_dir)

# Make Video cuts_file

In [6]:
# conda install ffmpeg
from torchcodec.decoders import VideoDecoder
from lhotse import CutSet, MonoCut, Recording, SupervisionSegment
from lhotse.audio.source import VideoInfo
from lhotse.audio import AudioSource, AudioLoadingError
from pathlib import Path
from pprint import pprint

def make_video_cuts_file(video_dir):
    """Create a cuts file with video cuts."""

    cuts = []
    for video_file in sorted(Path(video_dir).glob("*/*.mp4")):
        try:
            recording = Recording.from_file(video_file)
        except (AudioLoadingError, RuntimeError, TypeError) as e:  # no audio in video
            # print(f"Video file {video_file} has no audio track, creating recording manually...")
            device = "cpu"  # or e.g. "cuda" !
            decoder = VideoDecoder(str(video_file), device=device)

            video_info = VideoInfo(
                fps=decoder.metadata.average_fps,
                num_frames=decoder.metadata.num_frames,
                width=decoder.metadata.width,
                height=decoder.metadata.height,
            )
                    
            recording = Recording(
                id=video_file.stem,
                sampling_rate=None,
                num_samples=None,
                duration=decoder.metadata.duration_seconds,
                channel_ids=[],  # set not None to FIX Recording.__post_init__ error
                sources=[
                    AudioSource(
                        type="file",
                        channels=None,
                        source=str(video_file),
                        video=video_info,
                    )
                ],
            )

        cut = recording.to_cut()
        supervision = SupervisionSegment(
            id=recording.id,
            recording_id=recording.id,
            start=0,
            duration=recording.duration,
            channel=0,
            text="Video Text Prompt",  # Replace with actual text prompt if available
        )
        cut.supervisions.append(supervision)
        cuts.append(cut)
        
        if len(cuts) % 10 == 0:
            print(f"\nProcessed {len(cuts)} videos...")
            pprint(cut.to_dict())
        

    cutset = CutSet.from_cuts(cuts)
    cutset.to_jsonl("video_cuts.jsonl.gz")
    print(f"Created cuts file with {len(cuts)} video cuts at 'video_cuts.jsonl.gz'.")


video_dir = "/Users/feiteng/Downloads/testdata/videos"
make_video_cuts_file(video_dir)


Processed 10 videos...
{'channel': [],
 'duration': 8.5085,
 'id': 'video',
 'recording': {'channel_ids': [],
               'duration': 8.5085,
               'id': 'video',
               'num_samples': 0,
               'sampling_rate': 0,
               'sources': [{'channels': [],
                            'source': '/Users/feiteng/Downloads/testdata/videos/010/video.mp4',
                            'type': 'file',
                            'video': {'fps': 23.976023976023978,
                                      'height': 800,
                                      'num_frames': 204,
                                      'width': 640}}]},
 'start': 0.0,
 'supervisions': [{'channel': 0,
                   'duration': 8.5085,
                   'id': 'video',
                   'recording_id': 'video',
                   'start': 0,
                   'text': 'Video Text Prompt'}],
 'type': 'MultiCut'}

Processed 20 videos...
{'channel': [],
 'duration': 8.2,
 'id': 'video',
