In [1]:
from operator import itemgetter
import yaml
from loguru import logger

from langchain.prompts import ChatPromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda
from langchain.vectorstores import FAISS
from langchain.llms.huggingface_hub import HuggingFaceHub
from langchain.llms import HuggingFacePipeline

from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import pipeline

import torch

In [2]:
from huggingface_hub import login
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [3]:
PATH_TO_CREDS = "../conf/local/credentials.yml"

In [4]:
with open(PATH_TO_CREDS, "r") as f:
    hf_creds = yaml.safe_load(f)["hugging_face"]
logger.info(f"Successfully loaded HuggingFce credentials from '{PATH_TO_CREDS}'")

[32m2023-11-06 02:29:56.766[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m3[0m - [1mSuccessfully loaded HuggingFce credentials from '../conf/local/credentials.yml'[0m


In [5]:
# BAAI/bge-base-en
# BAAI/bge-small-en

from langchain.embeddings import HuggingFaceEmbeddings

model_name = "BAAI/bge-small-en"
model_kwargs = {'device': 0}
encode_kwargs = {'normalize_embeddings': False}
hf_embed = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

In [6]:
# OpenAIEmbeddings??

In [7]:
# FAISS.from_texts??

Inspired from [https://python.langchain.com/docs/expression_language/cookbook/retrieval](https://python.langchain.com/docs/expression_language/cookbook/retrieval)

In [8]:
vectorstore = FAISS.from_texts(
    ["harrison worked at kensho", "Alain worked at Fieldbox", "Suzie worked at FamilyMart"], 
    # embedding=OpenAIEmbeddings(),
    embedding=hf_embed,
)
retriever = vectorstore.as_retriever()

template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

# model = ChatOpenAI()
# model = HuggingFaceHub(
#     repo_id="meta-llama/Llama-2-7b-chat-hf", 
#     huggingfacehub_api_token=hf_creds["hf_hub_token"]
# )

# model = HuggingFacePipeline.from_model_id(
#     model_id="meta-llama/Llama-2-7b-chat-hf",
#     task="text2text-generation",
#     use_auth_token=True,
#     # pipeline_kwargs={"max_new_tokens": 10},
#     device="cuda",
# )


model_id = "meta-llama/Llama-2-7b-chat-hf"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model_auto = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16)
pipe = pipeline(
    "text-generation", 
    # "text-generation", 
    model=model_auto, 
    tokenizer=tokenizer, 
    # max_new_tokens=10,
    device=0,
)
model = HuggingFacePipeline(pipeline=pipe)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [39]:
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

In [41]:
chain.invoke("where did Alain work?")

'\nAlain worked at Fieldbox.'

In [34]:
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

chain = (
    {
        "context": itemgetter("question") | retriever,
        "question": itemgetter("question"),
    }
    | prompt
    | model
    | StrOutputParser()
)

In [38]:
chain.invoke({"question": "where did Suzie work", "language": "English"})

'\nAnswer: Suzie worked at FamilyMart.'