In [1]:
from huggingface_hub import login
import os
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline
from langchain_community.cross_encoders import HuggingFaceCrossEncoder
from sentence_transformers import CrossEncoder
from dotenv import load_dotenv


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
load_dotenv()

True

In [3]:
token = os.getenv("HF_TOKEN")

login(token=token)

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [4]:
embedding_id = "distiluse-base-multilingual-cased-v2"

embeddings = HuggingFaceEmbeddings(model_name=embedding_id, cache_folder="./model_cache")

In [5]:
reranker_id = "cross-encoder/ms-marco-MiniLM-L-6-v2"

reranker = CrossEncoder(model_name=reranker_id, cache_dir="./model_cache")

In [6]:
reranker_2 = HuggingFaceCrossEncoder(model_name=reranker_id, model_kwargs={'cache_dir' : "./model_cache"})

In [4]:
import torch
from transformers import pipeline, BitsAndBytesConfig

In [None]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
# model_id = "meta-llama/Llama-3.2-1B-Instruct"
pipe = pipeline(
    "text-generation",
    model=model_id,
    device_map="auto",
    model_kwargs={
        'cache_dir' : './model_cache',
        'quantization_config' : quantization_config
    },
    max_new_tokens=128
)

: 

In [6]:
llm = HuggingFacePipeline(pipeline=pipe)

In [7]:
from langchain_core.prompts import PromptTemplate

template = """Question: {question}

Answer: Let's think step by step."""
prompt = PromptTemplate.from_template(template)

chain = prompt | llm

question = "What is electroencephalography?"

response = chain.invoke({"question": question})

print(response)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Question: What is electroencephalography?

Answer: Let's think step by step. Electroencephalography (EEG) is a diagnostic tool that records the electrical activity of the brain. This is done using electrodes placed on the scalp to capture the brain's electrical signals. These signals are typically measured in terms of their frequency, amplitude, and synchronization, which can be used to diagnose and monitor various neurological conditions.

Step 1: Identify the main purpose of EEG
The main purpose of EEG is to record the electrical activity of the brain.

Step 2: Consider the types of signals recorded by EEG
EEG records various types of signals, including:
- Brain waves: delta, theta, alpha, beta, and theta-4 wave activity.
- Cortical activity: local field potentials and synchronized neural activity.
- Transient activity: spike and wave discharges.

Step 3: Analyze the applications of EEG
EEG has a wide range of applications, including:
- Diagnosing and monitoring neurological conditio

In [17]:
from langchain_huggingface import ChatHuggingFace

chat = ChatHuggingFace(llm=llm, verbose=True)

In [18]:
from langchain.tools import tool

@tool
def multiply(x: int, y: int) -> int:
    """Multiplies two numbers."""
    return x * y

chat_with_tools = chat.bind_tools([multiply])

In [24]:
result = chat_with_tools.invoke("What is 2 multiply by 3?")
print(result.text())

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


<|user|>
What is 2 multiply by 3?</s>
<|assistant|>
2 multiplied by 3 is 6.


In [None]:
tavily_key = os.getenv("TAVILY_API_KEY")

if not os.environ.get("TAVILY_API_KEY"):
    os.environ["TAVILY_API_KEY"] = tavily_key