In [1]:
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', message='IProgress not found')

import librosa
import numpy as np
import torch
import torchaudio.transforms as T
import IPython.display as ipd
from tqdm import tqdm

## 1. Load Mimi model

In [2]:
from transformers import MimiModel
model_id = "kyutai/mimi"
mimi = MimiModel.from_pretrained(model_id)
mimi = mimi.to("cuda")

import logging
from typing import List, Optional, Union, Tuple
logger = logging.getLogger(__name__)
UNICODE_OFFSET: int = 0xE000
"""For very large codebook size (e.g. > 32768), use this higher unicode offset to avoid running into surrogates
which are not printable and won't work with BPE tokenization."""

"For very large codebook size (e.g. > 32768), use this higher unicode offset to avoid running into surrogates\nwhich are not printable and won't work with BPE tokenization."

## 2. Helper functions
- unicode characters (i.e., string) to Mimi codes
- Mimi codes to unicode characters
- audio (e.g., file) to unicode string

In [3]:
def chars_to_codes(
    chars: str, 
    num_codebooks: int,
    codebook_size: int,
    return_tensors: Optional[str] = None, 
    unicode_offset: int = UNICODE_OFFSET,
) -> Union[List[List[int]], np.ndarray, torch.Tensor]:
    codes = np.array([ord(c) for c in chars])
    codes = codes.reshape(-1, num_codebooks).T
    for i in range(codes.shape[0]):
        codes[i] -= unicode_offset + i*codebook_size
    if return_tensors is None:
        codes = codes.tolist()
    elif return_tensors == "pt":
        codes = torch.tensor(codes)
    return codes

In [4]:
def codes_to_chars(
    codes: Union[List[List[int]], np.ndarray, torch.Tensor], 
    codebook_size: int,
    copy_before_conversion: bool = True,
    unicode_offset: int = UNICODE_OFFSET,
) -> str:
    if isinstance(codes, list):
        codes = np.array(codes)
        copy_before_conversion = False
    elif isinstance(codes, torch.Tensor):
        codes = codes.cpu().numpy()
    if len(codes.shape) != 2:
        raise ValueError("codes must be a 2D array of shape (num_codebooks, seq_length).")
    if copy_before_conversion:
        codes = codes.copy()
    for i in range(codes.shape[0]):
        codes[i] += unicode_offset + i*codebook_size
    codes = codes.T.reshape(-1)
    chars = "".join([chr(c) for c in codes])
    return chars

In [5]:
def audio_to_str(audio_numpy: np.ndarray, mimi_model: MimiModel, device: str) -> str:
    audio_tensor = torch.tensor(audio_numpy).to(device).unsqueeze(0)
    if len(audio_tensor.shape) == 2:
        audio_tensor = audio_tensor.unsqueeze(1)
    
    with torch.no_grad():
        audio_codes = mimi_model.encode(audio_tensor)
    
    codes = audio_codes[0][0].cpu()
    codes = codes[:8, :]
    audio_str = codes_to_chars(codes, codebook_size=2048)
    return audio_str

In [6]:
def resample_audio(audio: np.ndarray, orig_sr: int, target_sr: int) -> np.ndarray:
    if orig_sr == target_sr:
        return audio
    return librosa.resample(audio, orig_sr=orig_sr, target_sr=target_sr)

In [7]:
audio, sr = librosa.load("../data/2830-3979-0010.flac", sr=None)
audio_resampled = resample_audio(audio, orig_sr=sr, target_sr=24000)
audio_str = audio_to_str(audio_resampled, mimi, "cuda")
print(audio_str)

碌𐈙𐹴𑘵𑿘碌𐉍𐨫𑏐𑼼碌𐛈𐨻𑌳𑴩碌𐏻𐸤𑌳𑷅碌𐑼𐯄𑗨𑯧﹔𐄲𐼨𑜧𑡥碌𐏻𐸤𑓘𑷅ﭿ𐍼𐾝𑄫𑵻𐕄𐼨𑐺𑩽ﹼ𐃶𐨡𑁧𑷯︫𐄯𐼨𑔴𑮌ａ𐈮𐽖𑈼𑿀屢𐕜𐱇𑍺𑲐猪𐈵𐼕𑘇𑻰ﱛ𐐱𐢹𑄦𑫧ﰱ𐌰𐴯𑀎𑠵祖𐃠𐮂𑇛𑶉ﺤ𐍈𐤕𑏑𑿖磻𐘞𐷰𑘵𑿖שּׁ𐋡𐹁𑀿𑺎﹉𐂎𐣉𑙡𑾐Ｚ𐘵𐿊𑊼𑡦祿𐃓𐥰𑙡𑿐ﰦ𐑗𐪱𑞅𑮦ﳫ𐃓𐯓𑕣𑭎隷𐊀𐪢𑗤𑿖鍊𐊌𐼪𑊙𑰜淋𐌻𐼨𑚖𑢽識𐉜𐢧𑛮𑧰者𐖻𐵧𑝗𑢲＿𐆅𐸵𑁉𑰚ﳈ𐎾𐤡𑗪𑲢屢𐔳𐣚𑉮𑲮ﾰ𐕌𐲚𑉭𑾹陵𐊼𐼊𑊺𑯲六𐀄𐵆𑉋𑧋碌𐁄𐹢𑓡𑿕碌𐄲𐧞𑏐𑷅碌𐁄𐸤𑞺𑳹碌𐁄𐸤𑜧𑿘碌𐁄𐸤𑓖𑼼碌𐁄𐶣𑞺𑿘


In [None]:
# -------------------------------------- #
# To play audio directly in the notebook #
# -------------------------------------- #
def str2audio(audio_str: str):
    codes = chars_to_codes(audio_str, num_codebooks=8, codebook_size=2048, return_tensors="pt")
    codes = codes.to("cuda").unsqueeze(0)
    print(codes.shape)

    with torch.no_grad():
        audio_decoded = mimi.decode(codes).audio_values[0]
    print("shape of audio_decoded", audio_decoded.shape)
    return ipd.Audio(audio_decoded.cpu().numpy(), rate=24000)

In [None]:
# play audio here in the notebook
str2audio(audio_str)

torch.Size([1, 8, 42])
shape of audio_decoded torch.Size([1, 80640])


## 3. Inference Blueberry model

In [10]:
from transformers import AutoTokenizer, AutoModelForCausalLM
MODEL_NAME = "WillHeld/blueberry" # marin-audio (exp1699-checkpoint50000)
print(f"Loading model: {MODEL_NAME}")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float32,
    device_map="auto"
)
model.eval()
print(f"Number of parameters: {sum(p.numel() for p in model.parameters())}")

Loading model: WillHeld/blueberry


`rope_scaling`'s original_max_position_embeddings field must be less than max_position_embeddings, got 8192 and max_position_embeddings=4096


Number of parameters: 648614400


In [None]:
def extract_text_and_audio_segments(generated_text: str) -> Tuple[List[str], List[str]]:
    text_segments = []
    audio_segments = []
    
    # Split by text markers
    text_parts = generated_text.split("<|text_start|>")
    
    for part in text_parts[1:]:  # Skip the first empty part
        if "<|text_end|>" in part:
            text_content = part.split("<|text_end|>")[0]
            text_segments.append(text_content)
    
    # Split by audio markers
    audio_parts = generated_text.split("<|audio_start|>")
    
    for part in audio_parts[1:]:  # Skip the first part
        if "<|audio_end|>" in part:
            audio_content = part.split("<|audio_end|>")[0]
            audio_segments.append(audio_content)
        else:
            # Handle incomplete audio segment (generation stopped before <|audio_end|>)
            # Extract everything after <|audio_start|> until end of string
            audio_content = part.strip()
            if audio_content:
                audio_segments.append(audio_content)
    
    return text_segments, audio_segments


def run_inference(prompt: str, max_new_tokens: int = 1000):
    print(f"Prompt: '{prompt}'")
    # by default, tokenizer prepends the <|begin_of_text|> token
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=True,
            # note that: temperature=0.00001, # this will fail
            temperature=1.0,
            top_p=0.9,
            pad_token_id=tokenizer.eos_token_id
        )
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=False)
    print(f"Generated: '{generated_text}'")
    print("-" * 80 + "\n")
    return generated_text

### 3.1 Testing Generation

In [12]:
generated_text = run_inference("<|text_start|>Paris is the capital of", 500)

Prompt: '<|text_start|>Paris is the capital of'


Generated: '<|begin_of_text|><|text_start|>Paris is the capital of the EU with the<|text_end|><|audio_start|>諸𐄲𐶣𑝏𑿘諸𐇡𐨫𑊚𑹰諸𐇡𐸤𑌹𑿘諸𐛈𐸤𑞺𑿘諸𐛈𐸤𑞺𑹰𐄋𐸤𑌹𑹰諸𐛈𐰆𑞺𑹰︚𐄲𐨫𑆬𑽌﵃𐞺𐠰𑉞𑶷濾𐈫𐳄𑔿𑼢𐐂𐹜𑟂𑸜來𐊾𐮵𑉍𑴐画𐇖𐦿𑕵𑹸ﲋ𐊞𐢞𑊵𑤋女𐚡𐴬𑂮𑯿狼𐍓𐪌𑚒𑯀𐎽𐳝𑟘𑯖ｅ𐌩𐦓𑏦𑵝ﲥ𐂀𐤮𑘿𑫦ﯺ𐗗𐰅𑞼𑤧ﷶ𐌩𐵻𑏚𑲛﻿𐅪𐽣𑒅𑷂ﬃ𐐗𐷊𑑪𑯞藍𐁑𐥮𑙈𑹶ﳺ𐀰𐦃𑑼𑼶祝𐞮𐭞𑍉𑪋ﲏ𐏣𐻓𑘭𑴗𐟜𐭌𑘗𑠣ﲴ𐔊𐩝𑔞𑠯陋𐑋𐬐𑊅𑿼ﾮ𐇫𐮥𑔬𑹶寧𐒼𐥛𑆻𑡥︚𐆋𐩟𑐕𑲦ﯘ𐟡𐰞𑐁𑰴ﶥ𐎝𐦉𑚆𑾁ﰅ𐔊𐵡𑕐𑥟<|audio_end|><|text_start|>capital being the principal economic<|text_end|><|audio_start|>𐟹𐶈𑐺𑱐濾𐌩𐠎𑟽𑵥ﬖ𐐪𐮨𑉧𑮌￙𐈿𐵨𑔪𑾘卑𐌩𐭰𑄣𑦚悔𐝡𐸑𑒅𑨄ﮨ𐓽𐭘𑒩𑿑ﬣ𐂺𐩲𑃒𑠑聾𐍙𐭰𑀽𑬉𐍜𐵗𑄌𑥂﹁𐉟𐦱𑒅𑪞杻𐕻𐳝𑏈𑯑ﾕ𐊊𐠌𑟻𑫯ﰢ𐜆𐠗𑓺𑹸Ｕ𐁒𐩱𑂗𑵴רּ𐒔𐵌𑟨𑩒ﯺ𐌩𐳧𑂚𑽘︎𐌾𐪘𑘿𑪆ﻗ𐇍𐯹𑅴𑱙寧𐏻𐡎𑉲𑪶<|audio_end|><|text_start|>prospects<|text_end|><|audio_start|>￯𐔊𐻿𑕌𑷟濾𐒢𐥗𑎲𑵓﹍𐅧𐱎𑂧𑨡ﳺ𐔊𐽣'
--------------------------------------------------------------------------------



In [13]:
texts, audios = extract_text_and_audio_segments(generated_text)
print("len(texts)", len(texts))
print("len(audios)", len(audios))
print("text: " + "\n".join(texts))
audio_str = "".join(audios)
N = len(audio_str)
audio_str = audio_str[:(N//8)*8]

len(texts) 3
len(audios) 3
text: Paris is the capital of the EU with the
capital being the principal economic
prospects


In [14]:
str2audio(audio_str)

torch.Size([1, 8, 59])
shape of audio_decoded torch.Size([1, 113280])


### 3.2 Test ASR / Speech Continuation

In [15]:
audio, sr = librosa.load("../data/2830-3979-0010.flac", sr=None)
audio_resampled = resample_audio(audio, orig_sr=sr, target_sr=24000)
audio_str = audio_to_str(audio_resampled, mimi, "cuda")
print(audio_str)
# BUT THE ESSENCE OF LUTHER'S LECTURES IS THERE

碌𐈙𐹴𑘵𑿘碌𐉍𐨫𑏐𑼼碌𐛈𐨻𑌳𑴩碌𐏻𐸤𑌳𑷅碌𐑼𐯄𑗨𑯧﹔𐄲𐼨𑜧𑡥碌𐏻𐸤𑓘𑷅ﭿ𐍼𐾝𑄫𑵻𐕄𐼨𑐺𑩽ﹼ𐃶𐨡𑁧𑷯︫𐄯𐼨𑔴𑮌ａ𐈮𐽖𑈼𑿀屢𐕜𐱇𑍺𑲐猪𐈵𐼕𑘇𑻰ﱛ𐐱𐢹𑄦𑫧ﰱ𐌰𐴯𑀎𑠵祖𐃠𐮂𑇛𑶉ﺤ𐍈𐤕𑏑𑿖磻𐘞𐷰𑘵𑿖שּׁ𐋡𐹁𑀿𑺎﹉𐂎𐣉𑙡𑾐Ｚ𐘵𐿊𑊼𑡦祿𐃓𐥰𑙡𑿐ﰦ𐑗𐪱𑞅𑮦ﳫ𐃓𐯓𑕣𑭎隷𐊀𐪢𑗤𑿖鍊𐊌𐼪𑊙𑰜淋𐌻𐼨𑚖𑢽識𐉜𐢧𑛮𑧰者𐖻𐵧𑝗𑢲＿𐆅𐸵𑁉𑰚ﳈ𐎾𐤡𑗪𑲢屢𐔳𐣚𑉮𑲮ﾰ𐕌𐲚𑉭𑾹陵𐊼𐼊𑊺𑯲六𐀄𐵆𑉋𑧋碌𐁄𐹢𑓡𑿕碌𐄲𐧞𑏐𑷅碌𐁄𐸤𑞺𑳹碌𐁄𐸤𑜧𑿘碌𐁄𐸤𑓖𑼼碌𐁄𐶣𑞺𑿘


In [16]:
str2audio(audio_str)

torch.Size([1, 8, 42])
shape of audio_decoded torch.Size([1, 80640])


In [17]:
# Continuation
generated_text = run_inference(f"<|audio_start|>{audio_str}", 500)
texts, audios = extract_text_and_audio_segments(generated_text)
print("len(texts)", len(texts))
print("len(audios)", len(audios))
print("-----------------------------")
print("\n-------------------------\n".join(texts))
audio_str = "".join(audios)
N = len(audio_str)
audio_str = audio_str[:(N//8)*8]
str2audio(audio_str)

Prompt: '<|audio_start|>碌𐈙𐹴𑘵𑿘碌𐉍𐨫𑏐𑼼碌𐛈𐨻𑌳𑴩碌𐏻𐸤𑌳𑷅碌𐑼𐯄𑗨𑯧﹔𐄲𐼨𑜧𑡥碌𐏻𐸤𑓘𑷅ﭿ𐍼𐾝𑄫𑵻𐕄𐼨𑐺𑩽ﹼ𐃶𐨡𑁧𑷯︫𐄯𐼨𑔴𑮌ａ𐈮𐽖𑈼𑿀屢𐕜𐱇𑍺𑲐猪𐈵𐼕𑘇𑻰ﱛ𐐱𐢹𑄦𑫧ﰱ𐌰𐴯𑀎𑠵祖𐃠𐮂𑇛𑶉ﺤ𐍈𐤕𑏑𑿖磻𐘞𐷰𑘵𑿖שּׁ𐋡𐹁𑀿𑺎﹉𐂎𐣉𑙡𑾐Ｚ𐘵𐿊𑊼𑡦祿𐃓𐥰𑙡𑿐ﰦ𐑗𐪱𑞅𑮦ﳫ𐃓𐯓𑕣𑭎隷𐊀𐪢𑗤𑿖鍊𐊌𐼪𑊙𑰜淋𐌻𐼨𑚖𑢽識𐉜𐢧𑛮𑧰者𐖻𐵧𑝗𑢲＿𐆅𐸵𑁉𑰚ﳈ𐎾𐤡𑗪𑲢屢𐔳𐣚𑉮𑲮ﾰ𐕌𐲚𑉭𑾹陵𐊼𐼊𑊺𑯲六𐀄𐵆𑉋𑧋碌𐁄𐹢𑓡𑿕碌𐄲𐧞𑏐𑷅碌𐁄𐸤𑞺𑳹碌𐁄𐸤𑜧𑿘碌𐁄𐸤𑓖𑼼碌𐁄𐶣𑞺𑿘'


Generated: '<|begin_of_text|><|audio_start|>碌𐈙𐹴𑘵𑿘碌𐉍𐨫𑏐𑼼碌𐛈𐨻𑌳𑴩碌𐏻𐸤𑌳𑷅碌𐑼𐯄𑗨𑯧﹔𐄲𐼨𑜧𑡥碌𐏻𐸤𑓘𑷅ﭿ𐍼𐾝𑄫𑵻𐕄𐼨𑐺𑩽ﹼ𐃶𐨡𑁧𑷯︫𐄯𐼨𑔴𑮌ａ𐈮𐽖𑈼𑿀屢𐕜𐱇𑍺𑲐猪𐈵𐼕𑘇𑻰ﱛ𐐱𐢹𑄦𑫧ﰱ𐌰𐴯𑀎𑠵祖𐃠𐮂𑇛𑶉ﺤ𐍈𐤕𑏑𑿖磻𐘞𐷰𑘵𑿖שּׁ𐋡𐹁𑀿𑺎﹉𐂎𐣉𑙡𑾐Ｚ𐘵𐿊𑊼𑡦祿𐃓𐥰𑙡𑿐ﰦ𐑗𐪱𑞅𑮦ﳫ𐃓𐯓𑕣𑭎隷𐊀𐪢𑗤𑿖鍊𐊌𐼪𑊙𑰜淋𐌻𐼨𑚖𑢽識𐉜𐢧𑛮𑧰者𐖻𐵧𑝗𑢲＿𐆅𐸵𑁉𑰚ﳈ𐎾𐤡𑗪𑲢屢𐔳𐣚𑉮𑲮ﾰ𐕌𐲚𑉭𑾹陵𐊼𐼊𑊺𑯲六𐀄𐵆𑉋𑧋碌𐁄𐹢𑓡𑿕碌𐄲𐧞𑏐𑷅碌𐁄𐸤𑞺𑳹碌𐁄𐸤𑜧𑿘碌𐁄𐸤𑓖𑼼碌𐁄𐶣𑞺𑿘碌𐁄𐸤𑔅𑹰𣏕𐍈𐼨𑐔𑴩碌𐁄𐱻𑜧𑲦碌𐁄𐨫𑗨𑼼碌𐁄𐶣𑞺𑿘碌𐁄𐶣𑞺𑿘碌𐁄𐶣𑌹𑻐碌𐁄𐸤𑜧𑺁碌𐁄𐨫𑌹𑼼祝𐜇𐼨𑔼𑽛ﻀ𐞓𐳖𑛑𑡞ﮧ𐓗𐢍𑅽𑵪ﭷ𐏓𐩠𑚳𑰘﫾𐂌𐣽𑃞𑫹ﴎ𐇾𐠈𑂲𑾘𐝹𐵗𑌚𑧁稜𐂹𐡜𑙼𑭨遲𐇫𐼛𑜧𑯹ﴌ𐈅𐵯𑁒𑼵ﳋ𐍿𐶆𑃊𑭃﫴𐑩𐠪𑆔𑮹﷾𐁣𐨹𑄖𑭭﵅𐛲𐩞𑑦𑥏祝𐙾𐩯𑊪𑽵﻾𐟹𐼓𑔥𑫕ｃ𐑶𐣺𑐋𑩄碌𐇌𐡝𑔋𑷅𐎭𐩾𑓓𑥚ﾰ𐑐𐰘𑀤𑮱ﮕ𐒱𐣡𑔽𑫭︣𐚸𐺟𑗱𑯲ךּ𐅬𐩯𑏡𑿖既𐏱𐳠𑈜𑠯ﳛ𐉱𐿵𑔉𑪢頻𐆁𐺯𑕞𑩰ﵷ𐁽𐡸𑅮𑰢𥉉𐚒𐨓𑉈𑦏ﲔ𐀗𐳧𑜭𑢒ﻒ𐗀𐷲𑍑𑠍ｪ𐎏𐱫𑂜𑼥倫𐗕𐧒𑒡𑰾瑱𐊐𐪍𑙾𑩲稜𐎾𐡜𑐔𑳝鈴𐟄𐳆𑁔𑢅ﳄ𐏗𐬡𑌮𑽉海𐃴𐥦𑀎𑢤￳𐒁𐴈𑏴𑯰ﵨ𐞝𐶧𑟹𑮱ﭦ𐆒𐼨𑟹𑿨稜𐏦𐼨𑂠𑦇ﾬ𐅛𐥌𑆚𑨸𐇡𐵆𑞺𑼼碌𐁄𐶣𑞺𑿘碌𐁄𐶣𑞺𑿘碌𐁄𐸤𑜧𑳹碌𐁄𐶣𑞺𑼼碌𐁄𐤽𑏐𑼼碌𐁄𐶣𑞺𑹰碌𐁄𐶣𑞺𑿘碌𐁄𐶣𑜧𑿘碌𐁄𐸤𑜧𑼼碌𐁄𐶣𑞺𑼼碌'
--------------------------------------------------------------------------------

len(texts) 0
len(audios) 1
---------

In [18]:
# ASR
generated_text = run_inference(f"<|audio_start|>{audio_str}<|audio_end|><|text_start|>", 500)
texts, audios = extract_text_and_audio_segments(generated_text)
print("len(texts)", len(texts))
print("len(audios)", len(audios))
print("-----------------------------")
print("\n-------------------------\n".join(texts))
audio_str = "".join(audios)
N = len(audio_str)
audio_str = audio_str[:(N//8)*8]
str2audio(audio_str)

Prompt: '<|audio_start|>碌𐈙𐹴𑘵𑿘碌𐉍𐨫𑏐𑼼碌𐛈𐨻𑌳𑴩碌𐏻𐸤𑌳𑷅碌𐑼𐯄𑗨𑯧﹔𐄲𐼨𑜧𑡥碌𐏻𐸤𑓘𑷅ﭿ𐍼𐾝𑄫𑵻𐕄𐼨𑐺𑩽ﹼ𐃶𐨡𑁧𑷯︫𐄯𐼨𑔴𑮌ａ𐈮𐽖𑈼𑿀屢𐕜𐱇𑍺𑲐猪𐈵𐼕𑘇𑻰ﱛ𐐱𐢹𑄦𑫧ﰱ𐌰𐴯𑀎𑠵祖𐃠𐮂𑇛𑶉ﺤ𐍈𐤕𑏑𑿖磻𐘞𐷰𑘵𑿖שּׁ𐋡𐹁𑀿𑺎﹉𐂎𐣉𑙡𑾐Ｚ𐘵𐿊𑊼𑡦祿𐃓𐥰𑙡𑿐ﰦ𐑗𐪱𑞅𑮦ﳫ𐃓𐯓𑕣𑭎隷𐊀𐪢𑗤𑿖鍊𐊌𐼪𑊙𑰜淋𐌻𐼨𑚖𑢽識𐉜𐢧𑛮𑧰者𐖻𐵧𑝗𑢲＿𐆅𐸵𑁉𑰚ﳈ𐎾𐤡𑗪𑲢屢𐔳𐣚𑉮𑲮ﾰ𐕌𐲚𑉭𑾹陵𐊼𐼊𑊺𑯲六𐀄𐵆𑉋𑧋碌𐁄𐹢𑓡𑿕碌𐄲𐧞𑏐𑷅碌𐁄𐸤𑞺𑳹碌𐁄𐸤𑜧𑿘碌𐁄𐸤𑓖𑼼碌𐁄𐶣𑞺𑿘碌𐁄𐸤𑔅𑹰𣏕𐍈𐼨𑐔𑴩碌𐁄𐱻𑜧𑲦碌𐁄𐨫𑗨𑼼碌𐁄𐶣𑞺𑿘碌𐁄𐶣𑞺𑿘碌𐁄𐶣𑌹𑻐碌𐁄𐸤𑜧𑺁碌𐁄𐨫𑌹𑼼祝𐜇𐼨𑔼𑽛ﻀ𐞓𐳖𑛑𑡞ﮧ𐓗𐢍𑅽𑵪ﭷ𐏓𐩠𑚳𑰘﫾𐂌𐣽𑃞𑫹ﴎ𐇾𐠈𑂲𑾘𐝹𐵗𑌚𑧁稜𐂹𐡜𑙼𑭨遲𐇫𐼛𑜧𑯹ﴌ𐈅𐵯𑁒𑼵ﳋ𐍿𐶆𑃊𑭃﫴𐑩𐠪𑆔𑮹﷾𐁣𐨹𑄖𑭭﵅𐛲𐩞𑑦𑥏祝𐙾𐩯𑊪𑽵﻾𐟹𐼓𑔥𑫕ｃ𐑶𐣺𑐋𑩄碌𐇌𐡝𑔋𑷅𐎭𐩾𑓓𑥚ﾰ𐑐𐰘𑀤𑮱ﮕ𐒱𐣡𑔽𑫭︣𐚸𐺟𑗱𑯲ךּ𐅬𐩯𑏡𑿖既𐏱𐳠𑈜𑠯ﳛ𐉱𐿵𑔉𑪢頻𐆁𐺯𑕞𑩰ﵷ𐁽𐡸𑅮𑰢𥉉𐚒𐨓𑉈𑦏ﲔ𐀗𐳧𑜭𑢒ﻒ𐗀𐷲𑍑𑠍ｪ𐎏𐱫𑂜𑼥倫𐗕𐧒𑒡𑰾瑱𐊐𐪍𑙾𑩲稜𐎾𐡜𑐔𑳝鈴𐟄𐳆𑁔𑢅ﳄ𐏗𐬡𑌮𑽉海𐃴𐥦𑀎𑢤￳𐒁𐴈𑏴𑯰ﵨ𐞝𐶧𑟹𑮱ﭦ𐆒𐼨𑟹𑿨稜𐏦𐼨𑂠𑦇ﾬ𐅛𐥌𑆚𑨸𐇡𐵆𑞺𑼼碌𐁄𐶣𑞺𑿘碌𐁄𐶣𑞺𑿘碌𐁄𐸤𑜧𑳹碌𐁄𐶣𑞺𑼼碌𐁄𐤽𑏐𑼼碌𐁄𐶣𑞺𑹰碌𐁄𐶣𑞺𑿘碌𐁄𐶣𑜧𑿘碌𐁄𐸤𑜧𑼼碌𐁄𐶣𑞺𑼼<|audio_end|><|text_start|>'


Generated: '<|begin_of_text|><|audio_start|>碌𐈙𐹴𑘵𑿘碌𐉍𐨫𑏐𑼼碌𐛈𐨻𑌳𑴩碌𐏻𐸤𑌳𑷅碌𐑼𐯄𑗨𑯧﹔𐄲𐼨𑜧𑡥碌𐏻𐸤𑓘𑷅ﭿ𐍼𐾝𑄫𑵻𐕄𐼨𑐺𑩽ﹼ𐃶𐨡𑁧𑷯︫𐄯𐼨𑔴𑮌ａ𐈮𐽖𑈼𑿀屢𐕜𐱇𑍺𑲐猪𐈵𐼕𑘇𑻰ﱛ𐐱𐢹𑄦𑫧ﰱ𐌰𐴯𑀎𑠵祖𐃠𐮂𑇛𑶉ﺤ𐍈𐤕𑏑𑿖磻𐘞𐷰𑘵𑿖שּׁ𐋡𐹁𑀿𑺎﹉𐂎𐣉𑙡𑾐Ｚ𐘵𐿊𑊼𑡦祿𐃓𐥰𑙡𑿐ﰦ𐑗𐪱𑞅𑮦ﳫ𐃓𐯓𑕣𑭎隷𐊀𐪢𑗤𑿖鍊𐊌𐼪𑊙𑰜淋𐌻𐼨𑚖𑢽識𐉜𐢧𑛮𑧰者𐖻𐵧𑝗𑢲＿𐆅𐸵𑁉𑰚ﳈ𐎾𐤡𑗪𑲢屢𐔳𐣚𑉮𑲮ﾰ𐕌𐲚𑉭𑾹陵𐊼𐼊𑊺𑯲六𐀄𐵆𑉋𑧋碌𐁄𐹢𑓡𑿕碌𐄲𐧞𑏐𑷅碌𐁄𐸤𑞺𑳹碌𐁄𐸤𑜧𑿘碌𐁄𐸤𑓖𑼼碌𐁄𐶣𑞺𑿘碌𐁄𐸤𑔅𑹰𣏕𐍈𐼨𑐔𑴩碌𐁄𐱻𑜧𑲦碌𐁄𐨫𑗨𑼼碌𐁄𐶣𑞺𑿘碌𐁄𐶣𑞺𑿘碌𐁄𐶣𑌹𑻐碌𐁄𐸤𑜧𑺁碌𐁄𐨫𑌹𑼼祝𐜇𐼨𑔼𑽛ﻀ𐞓𐳖𑛑𑡞ﮧ𐓗𐢍𑅽𑵪ﭷ𐏓𐩠𑚳𑰘﫾𐂌𐣽𑃞𑫹ﴎ𐇾𐠈𑂲𑾘𐝹𐵗𑌚𑧁稜𐂹𐡜𑙼𑭨遲𐇫𐼛𑜧𑯹ﴌ𐈅𐵯𑁒𑼵ﳋ𐍿𐶆𑃊𑭃﫴𐑩𐠪𑆔𑮹﷾𐁣𐨹𑄖𑭭﵅𐛲𐩞𑑦𑥏祝𐙾𐩯𑊪𑽵﻾𐟹𐼓𑔥𑫕ｃ𐑶𐣺𑐋𑩄碌𐇌𐡝𑔋𑷅𐎭𐩾𑓓𑥚ﾰ𐑐𐰘𑀤𑮱ﮕ𐒱𐣡𑔽𑫭︣𐚸𐺟𑗱𑯲ךּ𐅬𐩯𑏡𑿖既𐏱𐳠𑈜𑠯ﳛ𐉱𐿵𑔉𑪢頻𐆁𐺯𑕞𑩰ﵷ𐁽𐡸𑅮𑰢𥉉𐚒𐨓𑉈𑦏ﲔ𐀗𐳧𑜭𑢒ﻒ𐗀𐷲𑍑𑠍ｪ𐎏𐱫𑂜𑼥倫𐗕𐧒𑒡𑰾瑱𐊐𐪍𑙾𑩲稜𐎾𐡜𑐔𑳝鈴𐟄𐳆𑁔𑢅ﳄ𐏗𐬡𑌮𑽉海𐃴𐥦𑀎𑢤￳𐒁𐴈𑏴𑯰ﵨ𐞝𐶧𑟹𑮱ﭦ𐆒𐼨𑟹𑿨稜𐏦𐼨𑂠𑦇ﾬ𐅛𐥌𑆚𑨸𐇡𐵆𑞺𑼼碌𐁄𐶣𑞺𑿘碌𐁄𐶣𑞺𑿘碌𐁄𐸤𑜧𑳹碌𐁄𐶣𑞺𑼼碌𐁄𐤽𑏐𑼼碌𐁄𐶣𑞺𑹰碌𐁄𐶣𑞺𑿘碌𐁄𐶣𑜧𑿘碌𐁄𐸤𑜧𑼼碌𐁄𐶣𑞺𑼼<|audio_end|><|text_start|>But the essence of Luther's lectures is there. A lot of this is an example of mirroring, "Jenist 