## [Github Repo](https://github.com/ml-explore/mlx-examples)

## [HF Community](https://huggingface.co/mlx-community)

#### Loading some data from a german ASR dataset

In [4]:
!pip install datasets -q
!pip install soundfile -q

In [5]:
from datasets import load_dataset

# Load the dataset in streaming mode
dataset = load_dataset('flozi00/german-canary-asr-0324', split='train', streaming=True)

# Initialize an iterator
iterator = iter(dataset)

# Fetch the first 10 rows and store them in a list of dictionaries
first_10_rows = []
for _ in range(10):
    row = next(iterator)
    first_10_rows.append({
        'audio': row['audio'],  # Assuming the audio column is named 'audio'
        'text1': row['transkription'],  # Assuming the first text column is named 'text1'
        'text2': row['source']   # Assuming the second text column is named 'text2'
    })

# Print the list of dictionaries
for entry in first_10_rows:
    print(entry)


{'audio': {'bytes': b'ID3\x04\x00\x00\x00\x00\x00#TSSE\x00\x00\x00\x0f\x00\x00\x03Lavf56.40.101\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\xfbT\xc4\x00\x03\xc0\x00\x01\xa4\x00\x00\x00 \x00\x004\x82\x80\x00\x04\x0c\x00\x03\x01\x00\x00\xc0\x00\x00\x10\x00\x01\x00\x0e\xa1Z\x85\xc1\xec\xc3\xbcZ7\x86]\x00\xf7\xd6\x02\xab\x0b\x84\xee\xf3P:\xcc\x0es\xf6\x80\xa6\xc0kah\x1f\xf8\xf2DGp\xa5\x17\xff\xe4\xa0\xe0\x14A\xcfQ\x10\xfff\xb3GX\xb8\xc4\xfe\x17@=\xc1\x984\xff\xff\x92g\x8b\xe4\x99\x00*\x0e\xcf\xff\x7f\xe9\x91@7\x9c\x1b\x9e)r\x08N&\x00@\x0f\xa7\xff\xef\xff\xca\xe0u8o\xe9\x1f&\t\xc0\xcbbS\x1ck\x7f\xff\xff\xff\xff\xff\xff\xff\xfeO\xac\xdfk\xb6\xdbv\xf6\xbe\xdf&\x92\xc9$\x11\x80\x0b.\xb3\xc7\x89\x10\x94Jb\x94\xc9\x89\x17eI\xf7(b\x11\xe4%\xa5\xff\xfbT\xc4U\x80\x10\xfa\x04\xed\xb9\x19\x80\x02(\x9e)w1\x80\x02\xfc\xd9\x99\x89r\xd8d\xb3\xb7\xe0y\xba\xc4\xc7^P\xcbW\xa1\xa3]\x8d)\xd9\x92D$\r\xe6R\xe7\x1d;@\xcb\x85\xc3q\x9f\x80,_\x8b\xb3\xa8\xdd\xeeF\xe5\xf6nX\x88e\xfc\xbbVr\xed\xdf\xc6\xa6\xdf\xc9

In [11]:
!pip install pydub -q

In [1]:
!pip install mlx-whisper -q
!pip install huggingface_hub -q

In [25]:
from pydub import AudioSegment
import tempfile
from IPython.display import Audio
import mlx_whisper
import os

# Extract the audio byte string
audio_bytes = first_10_rows[1]['audio']['bytes']

# Create a temporary file to save the audio
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio_file:
    temp_audio_filename = temp_audio_file.name
    temp_audio_file.write(audio_bytes)

# Optional Play the audio in the Jupyter notebook
Audio(temp_audio_filename)


In [28]:
import mlx_whisper

# Transcribe the audio
result = mlx_whisper.transcribe(
    temp_audio_filename,
    path_or_hf_repo="mlx-community/whisper-large-v3-turbo",
    #word_timestamps=True,
)

#/Users/tillmann/.cache/huggingface/hub/models--mlx-community--whisper-large-v3-turbo

# Print transcription result
print(result)


{'text': ' Das Gericht ist an den in der Anklage wiedergegebenen Sachverhalt gebunden.', 'segments': [{'id': 0, 'seek': 0, 'start': 0.0, 'end': 3.66, 'text': ' Das Gericht ist an den in der Anklage wiedergegebenen Sachverhalt gebunden.', 'tokens': [50365, 2846, 9409, 1405, 1418, 364, 1441, 294, 1163, 1107, 7837, 609, 6216, 432, 16702, 268, 25626, 331, 20731, 21125, 10028, 13, 50548], 'temperature': 0.0, 'avg_logprob': -0.03928583860397339, 'compression_ratio': 0.9868421052631579, 'no_speech_prob': 4.017580539084076e-12}], 'language': 'de'}
