In [None]:
!pip install python-telegram-bot==13.15

Collecting python-telegram-bot==13.15
  Downloading python_telegram_bot-13.15-py3-none-any.whl.metadata (11 kB)
Collecting tornado==6.1 (from python-telegram-bot==13.15)
  Downloading tornado-6.1.tar.gz (497 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting APScheduler==3.6.3 (from python-telegram-bot==13.15)
  Downloading APScheduler-3.6.3-py2.py3-none-any.whl.metadata (5.4 kB)
Collecting cachetools==4.2.2 (from python-telegram-bot==13.15)
  Downloading cachetools-4.2.2-py3-none-any.whl.metadata (4.6 kB)
Collecting tzlocal>=1.2 (from APScheduler==3.6.3->python-telegram-bot==13.15)
  Downloading tzlocal-5.3.1-py3-none-any.whl.metadata (7.6 kB)
Downloading python_telegram_bot-13.15-py3-none-any.whl (519 kB)
Downloading APScheduler-3.6.3-py2.py3-none-any.whl (58 kB)
Downloading cachetools-4.2.2-py3-none-any.whl (11 kB)
Downloading tzlocal-5.3.1-py3-none-any.whl (18 kB)
Building wheels for collected packages: tornado
  Building wheel for tornado (setup.py) ... [?25ldone

In [1]:
import os
import logging
from telegram.ext import Updater, MessageHandler, Filters
import IPython.display as ipd

import librosa
import torch
from transformers import WhisperForConditionalGeneration, WhisperProcessor
import io


  from .autonotebook import tqdm as notebook_tqdm


In [4]:
with open("Telegram-bot-token.txt", "r") as f:
    TOKEN = f.read().strip() # strip() removes the trailing "\n" if it exists


In [5]:

# HF_MODEL = "cantillation/Teamim-AllNusah-whisper-medium_Warmup_steps-1000_LR-1e-05_Random-True"
HF_MODEL = "cantillation/Teamim-medium_Random_WeightDecay-0.005_Augmented_New-Data_date-11-03-2025"
model = WhisperForConditionalGeneration.from_pretrained(HF_MODEL).to("cuda")
processor = WhisperProcessor.from_pretrained(HF_MODEL, language="hebrew", task="transcribe")
SR = processor.feature_extractor.sampling_rate

In [6]:
def extract_features(audio):
    feature = processor.feature_extractor(audio, sampling_rate=SR,).input_features[0]
    return torch.tensor(feature).unsqueeze(0)


In [7]:
def transcribe(audio):

    # preprocess the audio file
    inputs = extract_features(audio).to("cuda")
    
    # generate the text
    generated_ids = model.generate(inputs, max_length=225, num_beams=4, early_stopping=True) # num_beams is the number of beams for beam search
    # return_dict_in_generate=True so we need to access the "sequences" key
    transcription = processor.decode(generated_ids[0], skip_special_tokens=True)
    
    return transcription


In [8]:
# # fast transcribe:
# from time import time
# import os
# os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
# os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:32,garbage_collection_threshold:0.8'


# import torch
# from faster_whisper import WhisperModel
# import gradio as gr


# device = "cuda" if torch.cuda.is_available() else "cpu"
# compute_type = "float16" if torch.cuda.is_available() else "int8"

# model_name = "cantillation/Teamim-AllNusah-whisper-medium_Warmup_steps-1000_LR-1e-05_Random-True"

# model = WhisperModel(model_name, device=device, compute_type=compute_type,)


# def transcribe(audio):
#     segments, _ = model.transcribe(audio, language='he', max_new_tokens=220)
#     return '\n'.join([segment.text for segment in segments])

# SR = 16000


In [None]:
# Enable logging
logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
                     level=logging.INFO)

# Define a function to handle audio messages
def handle_audio(update, context):
    audio_message = update.message.voice or update.message.audio
    # Get the audio file
    file = context.bot.get_file(audio_message.file_id)
    audio = file.download_as_bytearray()
    audio = librosa.load(io.BytesIO(audio), sr=SR, mono=True)[0]
    
    # Send a message to the user
    context.bot.send_message(chat_id=update.message.chat_id, text="קיבלתי את הקובץ, אני מתחיל להמיר אותו לטקסט עם טעמים...")
    
    # Audio to text with cantillations
    transcription = str(transcribe(audio))
    
    # Send the transcription to the user
    context.bot.send_message(chat_id=update.message.chat_id, text=f"זה מוכן!: \n {transcription}")

def main():
    # Create an instance of the Updater class
    updater = Updater(TOKEN, use_context=True)

    # Get the dispatcher to register handlers
    dispatcher = updater.dispatcher

    # Register a handler for audio messages
    audio_handler = MessageHandler(Filters.audio, handle_audio)
    dispatcher.add_handler(audio_handler)
    
    # Register a handler for voice messages
    voice_handler = MessageHandler(Filters.voice, handle_audio)
    dispatcher.add_handler(voice_handler)
    
    # Start the bot
    updater.start_polling()
    updater.idle()

if __name__ == '__main__':
    main()

2025-03-11 18:18:45,608 - apscheduler.scheduler - INFO - Scheduler started
Due to a bug fix in https://github.com/huggingface/transformers/pull/28687 transcription using a multilingual Whisper will default to language detection followed by transcription instead of translation to English.This might be a breaking change for your use case. If you want to instead always translate your audio to English, make sure to pass `language='en'`.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
2025-03-11 19:20:45,004 - telegram.ext.updater - INFO - Received signal 2 (SIGINT), stopping...
2025-03-11 19:20:45,005 - apscheduler.scheduler - INFO - Scheduler has been shut down


In [None]:
# Example for how to im
from telegram import ReplyKeyboardMarkup
from telegram.ext import CommandHandler, MessageHandler, Filters, ConversationHandler

# Define a function to handle text messages
def handle_text(update, context):
    context.bot.send_message(chat_id=update.message.chat_id, text="You said: " + update.message.text)

# Define a function to handle /start command
def start(update, context):
    context.bot.send_message(chat_id=update.message.chat_id, text="Hello, I'm your bot!")

# Define a function to handle /help command
def help(update, context):
    context.bot.send_message(chat_id=update.message.chat_id, text="You can send me an audio message and I will transcribe it for you.")

# Define a function to handle a conversation
def start_conversation(update, context):
    reply_keyboard = [['Option 1', 'Option 2'], ['Option 3', 'Option 4']]
    update.message.reply_text('Please choose:', reply_markup=ReplyKeyboardMarkup(reply_keyboard, one_time_keyboard=True))
    return 1

def continue_conversation(update, context):
    user_choice = update.message.text
    update.message.reply_text('You chose: ' + user_choice)
    return ConversationHandler.END


# Define a function to handle audio messages
def handle_audio(update, context):
    audio_message = update.message.voice or update.message.audio
    # Get the audio file
    file = context.bot.get_file(audio_message.file_id)
    audio = file.download_as_bytearray()
    audio = librosa.load(io.BytesIO(audio), sr=SR, mono=True)[0]
    
    # Send a message to the user
    context.bot.send_message(chat_id=update.message.chat_id, text="קיבלתי את הקובץ, אני מתחיל להמיר אותו לטקסט עם טעמים...")
    
    # Audio to text with cantillations
    transcription = str(transcribe(audio))
    
    # Send the transcription to the user
    context.bot.send_message(chat_id=update.message.chat_id, text=f"זה מוכן!: \n {transcription}")
    
    
def main():
    # Create an instance of the Updater class
    updater = Updater(TOKEN, use_context=True)

    # Get the dispatcher to register handlers
    dispatcher = updater.dispatcher

    # Register a handler for text messages
    text_handler = MessageHandler(Filters.text & (~Filters.command), handle_text)
    dispatcher.add_handler(text_handler)

    # Register a handler for /start command
    start_handler = CommandHandler('start', start)
    dispatcher.add_handler(start_handler)

    # Register a handler for /help command
    help_handler = CommandHandler('help', help)
    dispatcher.add_handler(help_handler)

    # Register a handler for audio messages
    audio_handler = MessageHandler(Filters.audio, handle_audio)
    dispatcher.add_handler(audio_handler)

    # Register a handler for voice messages
    voice_handler = MessageHandler(Filters.voice, handle_audio)
    dispatcher.add_handler(voice_handler)

    # Register a conversation handler
    conv_handler = ConversationHandler(
        entry_points=[CommandHandler('conversation', start_conversation)],
        states={
            1: [MessageHandler(Filters.text, continue_conversation)],
        },
        fallbacks=[CommandHandler('cancel', lambda update, context: ConversationHandler.END)]
    )
    dispatcher.add_handler(conv_handler)

    # Start the bot
    updater.start_polling()
    updater.idle()

if __name__ == '__main__':
    main()