In [None]:
model = "qwen/qwen3-30b-a3b"
tailscale_server = "https://desktop-3oeimac.tail3b962f.ts.net"
chat_completion_api = tailscale_server + "/api/v0/chat/completions"
embedding_model = "text-embedding-qwen3-embedding-8b@q5_0"

In [None]:
# step 1: we look for existing transcript for the selected videos.
import glob 
from langchain_community.document_loaders import YoutubeLoader
from pytube import YouTube


media_dir = "./media/it"
loaded_videos = []
for doc in glob.glob(pathname=f"{media_dir}/*.txt", recursive=True):
    with open(doc, "r") as file:
        lines = file.readlines()
        
        
        for line in lines:
            url = line.strip()
            yt = YouTube(url)
            try:
                title = yt.title
            except Exception:
                title = None
                print(f'title not found for {url}')

            try:  
                loader = YoutubeLoader.from_youtube_url(
                    url, language=['en', 'it'], continue_on_failure=True
                )
            
                transcript = loader.load()
            except Exception:
                print(f'Error loading the transcript for {url}')
                transcript = None
            loaded_videos.append({"url": url, "title": title, "transcript": transcript })
            


print(loaded_videos)

title not found for https://www.youtube.com/watch?v=NyjXMMBPvSA
Error loading the transcript for https://www.youtube.com/watch?v=NyjXMMBPvSA
title not found for https://www.youtube.com/watch?v=mQENVePdT5A&t=6s
Error loading the transcript for https://www.youtube.com/watch?v=mQENVePdT5A&t=6s
[{'url': 'https://www.youtube.com/watch?v=NyjXMMBPvSA', 'title': None, 'transcript': None}, {'url': 'https://www.youtube.com/watch?v=mQENVePdT5A&t=6s', 'title': None, 'transcript': None}]


In [4]:
import io
import logging
import os
import time
from typing import Any, Callable, Dict, Iterator, Literal, Optional, Tuple, Union
from langchain_community.document_loaders.parsers.audio import OpenAIWhisperParserLocal

from langchain_core.documents import Document
from langchain_community.document_loaders.base import BaseBlobParser
from langchain_community.document_loaders.parsers.audio import _get_audio_from_blob

from langchain_community.document_loaders.blob_loaders import Blob

from langchain_community.utils.openai import is_openai_v1

import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline

logger = logging.getLogger()
class OpenAIWhisperParserLocalCustom(BaseBlobParser):
    """Transcribe and parse audio files with OpenAI Whisper model.

    Audio transcription with OpenAI Whisper model locally from transformers.

    Parameters:
    device - device to use
        NOTE: By default uses the gpu if available,
        if you want to use cpu, please set device = "cpu"
    lang_model - whisper model to use, for example "openai/whisper-medium"
    forced_decoder_ids - id states for decoder in multilanguage model,
        usage example:
        from transformers import WhisperProcessor
        processor = WhisperProcessor.from_pretrained("openai/whisper-medium")
        forced_decoder_ids = WhisperProcessor.get_decoder_prompt_ids(language="french",
          task="transcribe")
        forced_decoder_ids = WhisperProcessor.get_decoder_prompt_ids(language="french",
        task="translate")



    """

    def __init__(
        self,
        lang_model: Optional[str] = None,
        batch_size: int = 8,
        chunk_length: int = 30,
        forced_decoder_ids: Optional[Tuple[Dict]] = None,
    ):
        """Initialize the parser.

        Args:
            device: device to use.
            lang_model: whisper model to use, for example "openai/whisper-medium".
              Defaults to None.
            forced_decoder_ids: id states for decoder in a multilanguage model.
              Defaults to None.
            batch_size: batch size used for decoding
              Defaults to 8.
            chunk_length: chunk length used during inference.
              Defaults to 30s.
        """
        try:
            from transformers import pipeline
        except ImportError:
            raise ImportError(
                "transformers package not found, please install it with "
                "`pip install transformers`"
            )
        try:
            import torch
        except ImportError:
            raise ImportError(
                "torch package not found, please install it with `pip install torch`"
            )

        self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
        torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
        self.batch_size = batch_size
        model_id = "openai/whisper-large-v3"

        model = AutoModelForSpeechSeq2Seq.from_pretrained(
            model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
        )
        model.to(self.device)

        processor = AutoProcessor.from_pretrained(model_id)

        self.pipe = pipeline(
            "automatic-speech-recognition",
            model=model,
            tokenizer=processor.tokenizer,
            feature_extractor=processor.feature_extractor,
            torch_dtype=torch_dtype,
            device=self.device,
            chunk_length_s=chunk_length        )
        
        if forced_decoder_ids is not None:
            try:
                self.pipe.model.config.forced_decoder_ids = forced_decoder_ids
            except Exception as exception_text:
                logger.info(
                    "Unable to set forced_decoder_ids parameter for whisper model"
                    f"Text of exception: {exception_text}"
                    "Therefore whisper model will use default mode for decoder"
                )

    def lazy_parse(self, blob: Blob) -> Iterator[Document]:
        """Lazily parse the blob."""

        try:
            import librosa
        except ImportError:
            raise ImportError(
                "librosa package not found, please install it with "
                "`pip install librosa`"
            )

        audio = _get_audio_from_blob(blob)

        file_obj = io.BytesIO(audio.export(format="mp3").read())

        # Transcribe
        print(f"Transcribing part  new {blob.path}!")  # noqa: T201

        y, sr = librosa.load(file_obj, sr=16000)

        prediction = self.pipe(y.copy(), batch_size=self.batch_size)["text"]

        yield Document(
            page_content=prediction,
            metadata={"source": blob.source},
        )


In [None]:
#step 2: we load audio and convert them if no transcript is available yet.
from langchain_community.document_loaders import YoutubeAudioLoader
from langchain.document_loaders.generic import GenericLoader
from langchain_community.document_loaders.parsers.audio import OpenAIWhisperParserLocal
from langchain_community.document_loaders.blob_loaders.file_system import FileSystemBlobLoader

# model = whisper.load_model("turbo")
# result = model.transcribe("./test/🔥COME MIGLIORARE TUTTE LE HALF LAY？🔥.m4a")
# print(result["text"])

from langchain_core.document_loaders import BaseLoader
from langchain_core.documents import Document


urls_to_process = ["https://www.youtube.com/watch?v=NyjXMMBPvSA"]
save_dir = './test'
# for v in loaded_videos:
    
#     if v.get('transcript',None) is None:
#         urls_to_process.append(v.get('url', None))
        

loader = GenericLoader(
        YoutubeAudioLoader(
            urls_to_process, 
            save_dir
        ),
        OpenAIWhisperParserLocalCustom())
loader.load()
    # print(documents)

Device set to use cpu


[youtube] Extracting URL: https://www.youtube.com/watch?v=NyjXMMBPvSA
[youtube] NyjXMMBPvSA: Downloading webpage
[youtube] NyjXMMBPvSA: Downloading tv client config
[youtube] NyjXMMBPvSA: Downloading tv player API JSON
[youtube] NyjXMMBPvSA: Downloading ios player API JSON
[youtube] NyjXMMBPvSA: Downloading m3u8 information
[info] NyjXMMBPvSA: Downloading 1 format(s): 140
[download] ./test/🔥COSA FARE NELLE TRAZIONI SE HAI LE LEVE LUNGHE？🔥.m4a has already been downloaded
[download] 100% of    3.77MiB
[ExtractAudio] Not converting audio ./test/🔥COSA FARE NELLE TRAZIONI SE HAI LE LEVE LUNGHE？🔥.m4a; file is already in target format m4a
Transcribing part  new test/🔥COSA FARE NELLE TRAZIONI SE HAI LE LEVE LUNGHE？🔥.m4a!


The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


In [None]:



device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

model_id = "openai/whisper-large-v3"

model = AutoModelForSpeechSeq2Seq.from_pretrained(
    model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
)
model.to(device)

processor = AutoProcessor.from_pretrained(model_id)

pipe = pipeline(
    "automatic-speech-recognition",
    model=model,
    tokenizer=processor.tokenizer,
    feature_extractor=processor.feature_extractor,
    torch_dtype=torch_dtype,
    device=device,
)


result = pipe("./test/🔥COME MIGLIORARE TUTTE LE HALF LAY？🔥.m4a",  return_timestamps=True)

print(result["text"])

Device set to use cpu
Using custom `forced_decoder_ids` from the (generation) config. This is deprecated in favor of the `task` and `language` flags/config options.
Transcription using a multilingual Whisper will default to language detection followed by transcription instead of translation to English. This might be a breaking change for your use case. If you want to instead always translate your audio to English, make sure to pass `language='en'`. See https://github.com/huggingface/transformers/pull/28687 for more details.


 ciao ragazzi benvenuti sul mio canale youtube sulla mia pagina facebook oggi vi voglio parlare della flay soprattutto per quello che riguarda va bene nel front lever ok ma soprattutto nella planche vabbè anche la bandiera volendo ma anche nella verticale perché la flay è una variante così scomoda prima di continuare con il bellissimo argomento di questo video voglio invitarti ad ad attivare la campanellina, a lasciare un commento, a lasciare un like, solo così potete aiutarmi a far crescere questo canale che è sempre stato gratuito e che ho sempre voluto coltivare con la mia passione. L'aumento delle visualizzazioni, l'aumento degli iscritti, quanto può diffondere questo canale è solo merito vostro. Oltre a questo vi invito a seguirmi sul mio canale Instagram dove condivido i risultati dei miei allievi perché io ci tengo ad essere conosciuto per il risultato dei miei allievi sono importantissimi e sotto questi video vado a condividere anche molte tips e molti suggerimenti visitate il 

In [2]:
!pip install ipywidgets

Collecting ipywidgets
  Downloading ipywidgets-8.1.7-py3-none-any.whl (139 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.8/139.8 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting jupyterlab_widgets~=3.0.15
  Downloading jupyterlab_widgets-3.0.15-py3-none-any.whl (216 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m216.6/216.6 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting widgetsnbextension~=4.0.14
  Downloading widgetsnbextension-4.0.14-py3-none-any.whl (2.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Installing collected packages: widgetsnbextension, jupyterlab_widgets, ipywidgets
Successfully installed ipywidgets-8.1.7 jupyterlab_widgets-3.0.15 widgetsnbextension-4.0.14

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.2.2[0m[39;49m -> [0m[32;49m25.1.1

In [5]:
import gc 
gc.collect()

273