# Tutorial

This notebook will show various snippets of code for varios tasks using the audio and video functionalities provided by the `pipepal` package. 

Please note that all file paths and configurations used here are placeholders and should be replaced with actual user-specific paths and settings.

In [None]:
# Importing necessary classes from the pipepal package
from pipepal.audio.tasks import ExampleTask, IOTask as AudioIOTask
from pipepal.video.tasks import IOTask as VideoIOTask

## Example Audio Task

In [None]:
# Example Task
# ------------
# This section demonstrates the usage of the ExampleTask which is a basic template task.

# Creating an instance of ExampleTask
example_task = ExampleTask()

# Running the example task with sample data and service configurations
example_response = example_task.run({
    "data": {
        "hello": "world"
    },
    "service": {
        "service_name": "ExampleService",
        "model_checkpoint": "model.ckpt",
        "model_version": "1.0",
    }
})

# Output the response from the example task
print("Example Task Response:", example_response)

## Video IO Task

In [None]:
# Video IO Task
# -------------
# This section illustrates how to extract audio tracks from video files using VideoIOTask.

# Creating an instance of VideoIOTask
video_io_task = VideoIOTask()

# Extracting audio from video
video_io_response = video_io_task.extract_audios_from_videos({
    "data": {
        "files": ["/path/to/your/video_file.mp4"],
        "audio_format": "wav",
        "audio_codec": "pcm_s16le",
        "output_folder": "/path/to/output/audios"
    },
    "service": {
        "service_name": "ffmpeg"
    }
})

# Output the response from the video IO task
print("Video IO Task Response:", video_io_response)

## Audio IO Task

In [None]:
# Audio IO Task
# -------------
# This section shows how to read audio files from disk using AudioIOTask.

# Creating an instance of AudioIOTask
audio_io_task = AudioIOTask()

# Reading audios from disk
audio_io_response = audio_io_task.read_audios_from_disk({
    "data": {
        "files": [
            "/path/to/audio/file1.wav",
            "/path/to/audio/file2.wav",
            "/path/to/audio/file3.wav"
        ]
    },
    "service": {
        "service_name": "Datasets"
    }
})

# Accessing the output from the response which is a list of audio data
audio_dataset = audio_io_response['output']

# Displaying information about the first and last audio files in the dataset
print("First audio shape:", audio_dataset[0]['audio']['array'].shape)
print("First audio array:", audio_dataset[0]['audio']['array'])
print("Last audio shape:", audio_dataset[-1]['audio']['array'].shape)
print("Last audio array:", audio_dataset[-1]['audio']['array'])

## Raw Signal Processing Task

In [None]:
""" # Not working #
# Raw Signal Processing Task
# --------------------------
# This section demonstrates the use of RawSignalProcessingTask to perform operations like channel selection and resampling.

from pipepal.audio.tasks import RawSignalProcessingTask

# Instantiate your class
raw_signal_processing_task = RawSignalProcessingTask()

# Running the raw signal processing task with the previously created audio dataset
asp_response = raw_signal_processing_task.run({
    "data": {
        "dataset": audio_dataset,
        "channeling": {
            "method": "selection",  # alternative is "average"
            "channels_to_keep": [0]
        },
        "resampling": {
            "rate": 16000,
        }
    },
    "service": {
        "service_name": "torchaudio"
    }
})

# Accessing the new audio dataset from the response
new_audio_dataset = asp_response['output']

# Printing details about the processed audio dataset
print("Processed audio dataset:", new_audio_dataset)
print("Shape of last audio array in processed dataset:", new_audio_dataset[-1]['audio']['array'].shape)
print("Last audio array in processed dataset:", new_audio_dataset[-1]['audio']['array'])

# Optionally, pushing the new audio dataset to a hub
# Uncomment the following line to perform this action
# new_audio_dataset.push_to_hub("your_hub_repository_name")
"""