In [None]:
from langchain.document_loaders import PyPDFLoader

def documents_loader(path: str):
    """
    Load documents from a given path.
    Args:
        path (str): The path to the document.
    Returns:
        list: A list of loaded documents.
    """
    loader = PyPDFLoader(path)
    documents = loader.load()
    return documents


In [8]:
documents = documents_loader("./Data/PDF_Files/the-gale-encyclopedia-of-medicine_compress.pdf")
print(len(documents))

637


In [16]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

def split_documents(documents, chunk_size=1000, chunk_overlap=200):
    """
    Split documents into smaller chunks.
    Args:
        documents (list): List of documents to split.
        chunk_size (int): Size of each chunk.
        chunk_overlap (int): Overlap between chunks.
    Returns:
        list: List of split documents.
    """
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        length_function=len
    )
    text_chunks = text_splitter.split_documents(documents)
    return text_chunks

In [17]:
chunks = split_documents(documents, chunk_size=1000, chunk_overlap=200)
print(f"Number of chunks: {len(chunks)}")

Number of chunks: 3426


In [None]:
import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline


device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

model_id = "openai/whisper-large-v3-turbo"

model = AutoModelForSpeechSeq2Seq.from_pretrained(
    model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
)
model.to(device)

processor = AutoProcessor.from_pretrained(model_id)

pipe = pipeline(
    "automatic-speech-recognition",
    model=model,
    tokenizer=processor.tokenizer,
    feature_extractor=processor.feature_extractor,
    torch_dtype=torch_dtype,
    device=device,
)

  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Device set to use cpu


In [5]:
result = pipe('guess_age_gender.wav')
print(result["text"])




 I heard that you can understand what people say and even know their age and gender. So can you guess my age and gender from my voice?
