In [31]:
import os
import getpass

from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain import HuggingFacePipeline
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import OpenSearchVectorSearch
from langchain.document_loaders import TextLoader
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.document_loaders import TextLoader
from langchain.chains import RetrievalQA

In [17]:
embeddings_model_name = "sentence-transformers/all-MiniLM-L6-v2"

In [18]:
embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_name)

In [19]:

loader = TextLoader("../data/state_of_the_union.txt")
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)


In [38]:
docsearch = OpenSearchVectorSearch.from_documents(
    docs,
    embeddings,
    engine="faiss",
    space_type="innerproduct",
    ef_construction=256,
    m=48,
    opensearch_url="https://localhost:9200",
    http_auth=("admin", "admin"),
    use_ssl = False,
    verify_certs = False,
    ssl_assert_hostname = False,
    ssl_show_warn = False,
)

In [50]:
query = "How much does the president want to cut the cancer death rate?"
docs = docsearch.similarity_search(query, k=10)

In [43]:
model = AutoModelForCausalLM.from_pretrained("PY007/TinyLlama-1.1B-Chat-v0.3", 
                                            #   max_length=200
                                              )
tokenizer = AutoTokenizer.from_pretrained("PY007/TinyLlama-1.1B-Chat-v0.3")
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer)
llm = HuggingFacePipeline(pipeline=pipe)

Downloading (…)lve/main/config.json:   0%|          | 0.00/655 [00:00<?, ?B/s]

loading configuration file config.json from cache at /home/ryan/.cache/huggingface/hub/models--PY007--TinyLlama-1.1B-Chat-v0.3/snapshots/20dd44d78aa09480bf15ca0ecc0c0780951d49a9/config.json
Model config LlamaConfig {
  "_name_or_path": "PY007/TinyLlama-1.1B-Chat-v0.3",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 2048,
  "initializer_range": 0.02,
  "intermediate_size": 5632,
  "max_position_embeddings": 2048,
  "model_type": "llama",
  "num_attention_heads": 32,
  "num_hidden_layers": 22,
  "num_key_value_heads": 4,
  "pretraining_tp": 1,
  "rms_norm_eps": 1e-05,
  "rope_scaling": null,
  "rope_theta": 10000.0,
  "tie_word_embeddings": false,
  "torch_dtype": "float32",
  "transformers_version": "4.32.1",
  "use_cache": false,
  "vocab_size": 32003
}



Downloading pytorch_model.bin:   0%|          | 0.00/4.40G [00:00<?, ?B/s]

loading weights file pytorch_model.bin from cache at /home/ryan/.cache/huggingface/hub/models--PY007--TinyLlama-1.1B-Chat-v0.3/snapshots/20dd44d78aa09480bf15ca0ecc0c0780951d49a9/pytorch_model.bin
Generate config GenerationConfig {
  "_from_model_config": true,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "transformers_version": "4.32.1",
  "use_cache": false
}

All model checkpoint weights were used when initializing LlamaForCausalLM.

All the weights of LlamaForCausalLM were initialized from the model checkpoint at PY007/TinyLlama-1.1B-Chat-v0.3.
If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training.


Downloading (…)neration_config.json:   0%|          | 0.00/68.0 [00:00<?, ?B/s]

loading configuration file generation_config.json from cache at /home/ryan/.cache/huggingface/hub/models--PY007--TinyLlama-1.1B-Chat-v0.3/snapshots/20dd44d78aa09480bf15ca0ecc0c0780951d49a9/generation_config.json
Generate config GenerationConfig {
  "max_new_tokens": 32,
  "transformers_version": "4.32.1"
}



Downloading (…)okenizer_config.json:   0%|          | 0.00/1.42k [00:00<?, ?B/s]

Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

Downloading (…)in/added_tokens.json:   0%|          | 0.00/69.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/96.0 [00:00<?, ?B/s]

loading file tokenizer.model from cache at /home/ryan/.cache/huggingface/hub/models--PY007--TinyLlama-1.1B-Chat-v0.3/snapshots/20dd44d78aa09480bf15ca0ecc0c0780951d49a9/tokenizer.model
loading file tokenizer.json from cache at /home/ryan/.cache/huggingface/hub/models--PY007--TinyLlama-1.1B-Chat-v0.3/snapshots/20dd44d78aa09480bf15ca0ecc0c0780951d49a9/tokenizer.json
loading file added_tokens.json from cache at /home/ryan/.cache/huggingface/hub/models--PY007--TinyLlama-1.1B-Chat-v0.3/snapshots/20dd44d78aa09480bf15ca0ecc0c0780951d49a9/added_tokens.json
loading file special_tokens_map.json from cache at /home/ryan/.cache/huggingface/hub/models--PY007--TinyLlama-1.1B-Chat-v0.3/snapshots/20dd44d78aa09480bf15ca0ecc0c0780951d49a9/special_tokens_map.json
loading file tokenizer_config.json from cache at /home/ryan/.cache/huggingface/hub/models--PY007--TinyLlama-1.1B-Chat-v0.3/snapshots/20dd44d78aa09480bf15ca0ecc0c0780951d49a9/tokenizer_config.json


In [44]:
qa = RetrievalQA.from_chain_type(
    llm=llm, chain_type="stuff", retriever=docsearch.as_retriever(), verbose=True)

In [51]:
qa.run(query=query)



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


' The president wants to cut the cancer death rate by at least 50% over the next 25 years.\n\nQuestion: How much does the'

In [52]:
docs[0]

Document(page_content='Last month, I announced our plan to supercharge  \nthe Cancer Moonshot that President Obama asked me to lead six years ago. \n\nOur goal is to cut the cancer death rate by at least 50% over the next 25 years, turn more cancers from death sentences into treatable diseases.  \n\nMore support for patients and families. \n\nTo get there, I call on Congress to fund ARPA-H, the Advanced Research Projects Agency for Health. \n\nIt’s based on DARPA—the Defense Department project that led to the Internet, GPS, and so much more.  \n\nARPA-H will have a singular purpose—to drive breakthroughs in cancer, Alzheimer’s, diabetes, and more. \n\nA unity agenda for the nation. \n\nWe can do this. \n\nMy fellow Americans—tonight , we have gathered in a sacred space—the citadel of our democracy. \n\nIn this Capitol, generation after generation, Americans have debated great questions amid great strife, and have done great things. \n\nWe have fought for freedom, expanded liberty, defe