In [4]:
from langchain.document_loaders import PyPDFLoader

def documents_loader(path: str):
    """
    Load documents from a given path.
    Args:
        path (str): The path to the document.
    Returns:
        list: A list of loaded documents.
    """
    loader = PyPDFLoader(path)
    documents = loader.load()
    return documents


In [5]:
documents = documents_loader("./Data/PDF_Files/D_Strange-Girl_Meets_Boy_Penguin_Readers-1-min.pdf")
print(len(documents))

34


In [6]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

def split_documents(documents, chunk_size=1000, chunk_overlap=200):
    """
    Split documents into smaller chunks.
    Args:
        documents (list): List of documents to split.
        chunk_size (int): Size of each chunk.
        chunk_overlap (int): Overlap between chunks.
    Returns:
        list: List of split documents.
    """
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        length_function=len
    )
    text_chunks = text_splitter.split_documents(documents)
    return text_chunks

In [7]:
chunks = split_documents(documents, chunk_size=1000, chunk_overlap=200)
print(f"Number of chunks: {len(chunks)}")

Number of chunks: 35


In [8]:
import faiss
from langchain_community.vectorstores import FAISS
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_huggingface import HuggingFaceEmbeddings
import uuid
import os


def store_documents_in_faiss(chunks, document_name):
    """
    Store documents in a FAISS vector store.
    Args:
        chunks (list): List of document chunks to be stored.
    """
    embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

    index = faiss.IndexFlatL2(len(embedding_model.embed_query("hello world")))

    vector_store = FAISS(
        embedding_function=embedding_model,
        index=index,
        docstore= InMemoryDocstore(),
        index_to_docstore_id={}
    )

    ids = [str(uuid.uuid4()) for _ in chunks]
    vector_store.add_documents(documents=chunks, ids=ids)
    vector_store.save_local(os.path.join('Data/vector_db', document_name))
    vector_store = FAISS.load_local(
        os.path.join('Data/vector_db', document_name),
        embedding_model,
        allow_dangerous_deserialization=True
    )
    return vector_store


In [9]:
def query_faiss_vector_store(vector_store, query, k=3):
    """
    Query the FAISS vector store for similar documents.
    Args:
        vector_store (FAISS): The FAISS vector store instance.
        query (str): The query string to search for.
        k (int): The number of similar documents to return.
    Returns:
        list: List of similar documents.
    """
    results = vector_store.similarity_search(query=query, k=k)
    return results


In [10]:
vector_store = store_documents_in_faiss(chunks, "boy_meet_girl")

results = query_faiss_vector_store(vector_store, query="how to meet boy", k=3)
for doc in results:
    print(f"* {doc.page_content} [{doc.metadata}]\n\n\n")

* It was a big new boat with cafes, shops, a cinema 
and a disco. In the evening Louise and 1 went to the 
disco together. We had a Coke and listened to the 
music and watched the dancers. But he wasn't there. 
Then suddenly a tall boy in a black and white shirt 
came in — it was him! He was with a friend. They 
stopped and looked slowly at all the people in the 
disco. It was dark in there and he stood and looked for 
a long time. 
10 
He's looking for me! 
He wants to see me! [{'producer': '3-Heights(TM) PDF Optimization Shell 5.9.1.5 (http://www.pdf-tools.com)', 'creator': 'PyPDF', 'creationdate': '2007-03-26T22:47:34+00:00', 'moddate': '2021-01-27T15:00:06+01:00', 'source': './Data/PDF_Files/D_Strange-Girl_Meets_Boy_Penguin_Readers-1-min.pdf', 'total_pages': 34, 'page': 10, 'page_label': '11'}]



* Girl Meets Boy 
And suddenly there he was, this tall, quiet boy in a blue and 
white shirt . . . I'll always remember the first time I saw him. 
Donna is on a boat. She is going to Spai

In [11]:
import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline


device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

model_id = "openai/whisper-large-v3-turbo"

model = AutoModelForSpeechSeq2Seq.from_pretrained(
    model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
)
model.to(device)

processor = AutoProcessor.from_pretrained(model_id)

pipe = pipeline(
    "automatic-speech-recognition",
    model=model,
    tokenizer=processor.tokenizer,
    feature_extractor=processor.feature_extractor,
    torch_dtype=torch_dtype,
    device=device,
)

Device set to use cpu


In [14]:
result = pipe('./Data/audio_files/guess_age_gender.wav')
print(result["text"])


Using custom `forced_decoder_ids` from the (generation) config. This is deprecated in favor of the `task` and `language` flags/config options.
Transcription using a multilingual Whisper will default to language detection followed by transcription instead of translation to English. This might be a breaking change for your use case. If you want to instead always translate your audio to English, make sure to pass `language='en'`. See https://github.com/huggingface/transformers/pull/28687 for more details.


 I heard that you can understand what people say and even know their age and gender. So can you guess my age and gender from my voice?


In [None]:
import os
from groq import Groq

client = Groq(api_key=os.environ.get("GORQ_API"))

speech_file_path = "speech.wav" 
model = "playai-tts"
voice = "Fritz-PlayAI"
text = "I love building and shipping new features for our users!"
response_format = "wav"

response = client.audio.speech.create(
    model=model,
    voice=voice,
    input=text,
    response_format=response_format
)
response.write_to_file(speech_file_path)

In [16]:
from groq import Groq

def text_to_speech(text, speech_file_path = "speech.wav", model="playai-tts", voice="Fritz-PlayAI"):
    """
    Converts text to speech using the Groq API.
    """
    client = Groq(api_key=os.environ.get("GORQ_API"))
    response = client.audio.speech.create(
        model=model,
        voice=voice,
        input=text,
        response_format="wav"
    )
    audio_file = response.write_to_file(os.path.join('Data/audio_files', speech_file_path))
    return audio_file

# Example usage
text = "Hello, this is a test of the text-to-speech conversion."
audio_file = text_to_speech(text, speech_file_path="test_speech.wav")

In [None]:
from groq import Groq
import base64
import os

# Function to encode the image
def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')

# Path to your image
image_path = "./Data/Images/OIP.webp"

# Getting the base64 string
base64_image = encode_image(image_path)

client = Groq(api_key=os.environ.get("GORQ_API"))

chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "Which desease this image shows?"},
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{base64_image}",
                    },
                },
            ],
        }
    ],
    model="meta-llama/llama-4-scout-17b-16e-instruct",
)

print(chat_completion.choices[0].message.content)

The image shows a person with white flakes on their scalp, which is a common symptom of a skin condition known as **dandruff** or **seborrheic dermatitis**. However, the presence of small, white flakes that resemble tiny scales or patches, particularly on the scalp, is more indicative of **dandruff** or **seborrheic dermatitis**, but most specifically **dandruff**.

A more severe and specific condition characterized by a similar appearance is **head lice** infestation, but the image does not show any moving objects (lice or nits close to the scalp) but rather flakes. 

However, the flakes are also a symptom of **psoriasis**. Considering the typical appearance of the condition (white scales), I'd say the image most likely shows **dandruff**.


In [1]:
from langchain_huggingface import ChatHuggingFace, HuggingFacePipeline
from langchain.chains import LLMChain
from langchain_core.prompts import PromptTemplate

llm = HuggingFacePipeline.from_model_id(
    model_id="meta-llama/Llama-3.1-8B-Instruct",
    task="text-generation",
    pipeline_kwargs=dict(
        max_new_tokens=512,
        do_sample=False,
        repetition_penalty=1.03,
    ),
)

chat_model = ChatHuggingFace(llm=llm)

  from .autonotebook import tqdm as notebook_tqdm


OSError: We couldn't connect to 'https://huggingface.co' to load the files, and couldn't find them in the cached files.
Check your internet connection or see how to run the library in offline mode at 'https://huggingface.co/docs/transformers/installation#offline-mode'.

In [None]:
from langchain_core.messages import (
    HumanMessage,
    SystemMessage,
)

messages = [
    SystemMessage(content="You're a helpful assistant"),
    HumanMessage(
        content="What happens when an unstoppable force meets an immovable object?"
    ),
]

ai_msg = chat_model.invoke(messages)

In [2]:
# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-generation", model="meta-llama/Llama-3.1-8B-Instruct")
messages = [
    {"role": "user", "content": "Who are you?"},
]
pipe(messages)

OSError: We couldn't connect to 'https://huggingface.co' to load the files, and couldn't find them in the cached files.
Check your internet connection or see how to run the library in offline mode at 'https://huggingface.co/docs/transformers/installation#offline-mode'.

In [None]:
from huggingface_hub import snapshot_download

snapshot_download(repo_id="meta-llama/Llama-2-7b-hf", repo_type="model")

Fetching 17 files:   0%|          | 0/17 [00:00<?, ?it/s]
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


GatedRepoError: 403 Client Error. (Request ID: Root=1-6867b1b7-72c788b62aee652a0e6607bc;f06c94a0-47aa-4816-a81a-01b3f2df0937)

Cannot access gated repo for url https://huggingface.co/meta-llama/Llama-2-7b-hf/resolve/01c7f73d771dfac7d292323805ebc428287df4f9/.gitattributes.
Access to model meta-llama/Llama-2-7b-hf is restricted and you are not in the authorized list. Visit https://huggingface.co/meta-llama/Llama-2-7b-hf to ask for access.

In [30]:
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
from langchain_core.messages import SystemMessage, HumanMessage

llm = HuggingFaceEndpoint(
    repo_id="meta-llama/Llama-3-8b-chat-hf",  # Use a chat-capable model
    task="text-generation",
    max_new_tokens=512,
    do_sample=False,
    repetition_penalty=1.03,
)

chat = ChatHuggingFace(llm=llm, verbose=True)

messages = [
    SystemMessage(content="You are a helpful translator. Translate the usersentence to French."),
    HumanMessage(content="I love programming."),
]

print(chat.invoke(messages))

RepositoryNotFoundError: 404 Client Error. (Request ID: Root=1-6867b7eb-7c71ab9a6aaf62b01bf41667;0edda38b-b874-498b-9959-84c635e22929)

Repository Not Found for url: https://huggingface.co/api/models/meta-llama/Llama-3-8b-chat-hf?expand=inferenceProviderMapping.
Please make sure you specified the correct `repo_id` and `repo_type`.
If you are trying to access a private or gated repo, make sure you are authenticated. For more details, see https://huggingface.co/docs/huggingface_hub/authentication

In [6]:
from groq import Groq
import os

def text_to_text(text: str):
    """
    Converts text to text using the Groq API.
    Args:
        text (str): The input text to be processed.
    Returns:
        str: The response from the Groq API.
    """
    client = Groq(api_key=os.environ.get("GORQ_API"))
    completion = client.chat.completions.create(
        model="meta-llama/llama-4-scout-17b-16e-instruct",
        messages=[
        {
            "role": "user",
            "content": text
        }
        ],
        temperature=1,
        max_completion_tokens=1024,
        top_p=1,
        stream=True,
        stop=None,
    )
    response = ""
    for chunk in completion:
        content = chunk.choices[0].delta.content or ""
        response += content

    return response.strip()

# Example usage
text = "can you tell me what is desease and how to treat it?" 
response = text_to_text(text)
print(response)

I'd be happy to explain what a disease is and provide general information on how to approach treatment.

**What is a disease?**

A disease is a condition that impairs normal body functions, causing harm or abnormal changes to the body's structure or function. It's a pathological condition that can affect any part of the body, including organs, tissues, or cells. Diseases can be caused by various factors, such as:

1. **Infectious agents**: Bacteria, viruses, fungi, parasites, or prions (e.g., tuberculosis, influenza, malaria)
2. **Genetic mutations**: Inherited or acquired changes in DNA (e.g., sickle cell anemia, cystic fibrosis)
3. **Environmental factors**: Exposure to toxins, radiation, or pollutants (e.g., lead poisoning, radiation sickness)
4. **Lifestyle factors**: Poor diet, lack of exercise, smoking, or excessive stress (e.g., obesity, diabetes)
5. **Immunological disorders**: Abnormal immune responses (e.g., autoimmune diseases like rheumatoid arthritis)

**Types of diseases*