In [None]:
from langchain.embeddings import HuggingFaceEmbeddings, HuggingFaceInstructEmbeddings
from langchain.vectorstores import ElasticVectorSearch
from langchain.text_splitter import CharacterTextSplitter
from langchain import PromptTemplate, LLMChain
from langchain.llms import HuggingFacePipeline
from langchain.document_loaders import TextLoader
from langchain.chains import ConversationChain, RetrievalQA, ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.schema import SystemMessage
from langchain.agents import Tool
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, MessagesPlaceholder
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM,  pipeline
import os

In [None]:
HF_SENTENCE_TRANSFORMER_MODEL = "sentence-transformers/all-mpnet-base-v2"
ELASTIC_URL = "http://localhost:9200/"
MONGO_URL = "http://localhost:27017/"
ELASTIC_INDEX = "genshinpedia_small"
DB_FILES = "en_datasets"
CACHE_DIR = "./cache"
T2T_MODEL = "google/flan-t5-base" # Text2Text
#ST_SIM_MODEL = "hkunlp/instructor-base" #Sentence Similarity

### Sección de DB Retriever

In [None]:
instructor_embeddings = HuggingFaceInstructEmbeddings(model_name=HF_SENTENCE_TRANSFORMER_MODEL)

In [None]:
elastic_db = ElasticVectorSearch(embedding=instructor_embeddings, elasticsearch_url=ELASTIC_URL, index_name=ELASTIC_INDEX)

In [None]:
db_retriever = elastic_db.as_retriever(search_kwargs={"k": 2})

### Sección del modelo base para T2T

In [None]:
tokenizer = AutoTokenizer.from_pretrained(T2T_MODEL) 
model = AutoModelForSeq2SeqLM.from_pretrained(T2T_MODEL, cache_dir=CACHE_DIR) 

In [None]:
pipe = pipeline(
    "text2text-generation",
    model=model, 
    tokenizer=tokenizer, 
    max_length=1000,
    temperature=0.3,
    repetition_penalty=1.15
)
llm = HuggingFacePipeline(pipeline=pipe)

In [None]:
ORIGINAL_PROMPT = """
I am a helpful AI that answers questions. When I don't know the answer I say I don't know. 
I know context: {context} when asked: {question} my response using only information in the context is: 
"""
TUNED_PROMPT = """
I am a helpful AI that answers questions providing details from the context in my answer. When I don't know the answer I say "I don't know" without the quotes.
I know the context: {context} when user ask me the {question}. My detailed response is:
"""

### Seccion de QA Chain

In [None]:
qa_chain = RetrievalQA.from_chain_type(llm=llm, 
                                       chain_type="stuff", 
                                       retriever=db_retriever, 
                                       verbose=True,
                                       return_source_documents=True)

In [None]:
qa_chain("Where does Hu Tao works?")

In [None]:
qa_chain("What does she do at there?")

### Creacion de una Tool para el buscador de GI

In [None]:
genshin_tool = Tool(
    name="Genshinpedia",
    func= qa_chain.run,
    description="Useful for when you need to answer questions about Genshin Impact characters"
)

### Intento de Conversacion (Memoria)

In [None]:
memory = ConversationBufferMemory(
    memory_key="chat_history", 
    #output_key="answer",
    return_messages=True)

In [None]:
memory_blocks = []
qa = ConversationalRetrievalChain.from_llm(
    llm=llm, 
    retriever=db_retriever,
    memory=memory,
    condense_question_prompt=TEST_PROMPT,
    #combine_docs_chain_kwargs={"prompt": TEST_PROMPT},
    return_source_documents=True,
    verbose=False
)

### Intento de Agente

In [None]:
tools = [genshin_tool]

In [None]:
#INSTRUCTOR_MODEL = "tiiuae/falcon-7b-instruct"
INSTRUCTOR_MODEL = "syzymon/long_llama_3b_instruct"

In [1]:
from transformers import BitsAndBytesConfig
from transformers import LlamaTokenizer, AutoModelForCausalLM, AutoTokenizer, pipeline
import torch

  warn("The installed version of bitsandbytes was compiled without GPU support. "


/home/ralvarez22/anaconda3/lib/python3.11/site-packages/bitsandbytes/libbitsandbytes_cpu.so: undefined symbol: cadam32bit_grad_fp32


In [None]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)

In [None]:
model_4bit = AutoModelForCausalLM.from_pretrained(
    INSTRUCTOR_MODEL, 
    #device_map="auto",
    trust_remote_code=True,
    torch_dtype=torch.float32
    #offload_folder="./offloads",
    #quantization_config=quantization_config,
)

#tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer = LlamaTokenizer.from_pretrained(model_id)

In [2]:
print(torch.version.cuda)
print(torch.cuda.is_available())

None
False
