Necessary Installations

In [None]:
!pip install llama-index llama-index-llms-huggingface llama-index-embeddings-huggingface llama-index-llms-langchain faiss-gpu transformers accelerate bitsandbytes trulens-eval

Collecting llama-index
  Downloading llama_index-0.10.33-py3-none-any.whl (6.9 kB)
Collecting llama-index-llms-huggingface
  Downloading llama_index_llms_huggingface-0.1.4-py3-none-any.whl (7.2 kB)
Collecting llama-index-embeddings-huggingface
  Downloading llama_index_embeddings_huggingface-0.2.0-py3-none-any.whl (7.1 kB)
Collecting llama-index-llms-langchain
  Downloading llama_index_llms_langchain-0.1.3-py3-none-any.whl (4.6 kB)
Collecting faiss-gpu
  Downloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (85.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 MB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
Collecting accelerate
  Downloading accelerate-0.29.3-py3-none-any.whl (297 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m297.6/297.6 kB[0m [31m30.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting bitsandbytes
  Downloading bitsandbytes-0.43.1-py3-none-manylinux_2_24_x86_64.whl (119.8 MB)
[2K     [

In [None]:
!pip install --upgrade litellm==1.0.0.dev1

Collecting litellm==1.0.0.dev1
  Downloading litellm-1.0.0.dev1-py3-none-any.whl.metadata (9.5 kB)
Collecting certifi<2024.0.0,>=2023.7.22 (from litellm==1.0.0.dev1)
  Downloading certifi-2023.11.17-py3-none-any.whl.metadata (2.2 kB)
Downloading litellm-1.0.0.dev1-py3-none-any.whl (1.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading certifi-2023.11.17-py3-none-any.whl (162 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m162.5/162.5 kB[0m [31m13.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: certifi, litellm
  Attempting uninstall: certifi
    Found existing installation: certifi 2024.2.2
    Uninstalling certifi-2024.2.2:
      Successfully uninstalled certifi-2024.2.2
Successfully installed certifi-2023.11.17 litellm-1.0.0.dev1
[0m

In [None]:
# import locale
# def getpreferredencoding(do_setlocale = True):
#     return "UTF-8"
# locale.getpreferredencoding = getpreferredencoding

Necessary Imports

In [None]:
from langchain import HuggingFacePipeline
from transformers import AutoTokenizer
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.chains import ConversationalRetrievalChain
from langchain.chains.question_answering import load_qa_chain
from langchain.memory import ConversationBufferMemory
import accelerate
import transformers
import torch
import textwrap
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    GenerationConfig,
    TextStreamer,
    pipeline,
)
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from trulens_eval import Tru
from trulens_eval import Feedback, LiteLLM, TruLlama, Select, TruChain, FeedbackMode
from trulens_eval.feedback import Groundedness
from trulens_eval.feedback.provider.hugs import Huggingface
import numpy as np
import pandas as pd
import os

[nltk_data] Downloading package stopwords to
[nltk_data]     /usr/local/lib/python3.10/dist-
[nltk_data]     packages/llama_index/legacy/_static/nltk_cache...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt to
[nltk_data]     /usr/local/lib/python3.10/dist-
[nltk_data]     packages/llama_index/legacy/_static/nltk_cache...
[nltk_data]   Unzipping tokenizers/punkt.zip.


Required Functions

In [None]:
def readDocument(filepath):
  loader = PyPDFLoader(filepath)
  documents = loader.load()
  # Split document in chunks
  text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=30, separator="\n")
  docs = text_splitter.split_documents(documents=documents)
  return docs

In [None]:
def createLLM(model_name, hf_token):
  os.environ["HF_TOKEN"]=hf_token
  tokenizer = AutoTokenizer.from_pretrained(model_name)
  model = AutoModelForCausalLM.from_pretrained(
      model_name, device_map="auto", torch_dtype=torch.float16, load_in_4bit=True
  )

  generation_config = GenerationConfig.from_pretrained(model_name)
  generation_config.max_new_tokens = 1024
  generation_config.temperature = 0.0001
  generation_config.do_sample = True
  streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)


  pipe = pipeline(
      "text-generation",
      model=model,
      tokenizer=tokenizer,
      return_full_text=True,
      generation_config=generation_config,
      num_return_sequences=1,
      eos_token_id=tokenizer.eos_token_id,
      pad_token_id=tokenizer.eos_token_id,
      streamer=streamer,
  )
  llm = HuggingFacePipeline(pipeline=pipe)
  return llm

In [None]:
def get_build_index(documents):

  embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',model_kwargs={'device': 'cpu'})
  db = FAISS.from_documents(documents, embeddings)

  return db

In [None]:
def createQueryEngine(documents,llm):
  # get the vector index
  db=get_build_index(documents)
  qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=db.as_retriever())
  return qa

In [None]:
def evaluationQuestions(ques_path):
  eval_questions = []
  with open(ques_path, 'r') as file:
      for line in file:
          # Remove newline character and convert to integer
          item = line.strip()
          eval_questions.append(item)
  return eval_questions

In [None]:
def getFeedbackMetrices(hf_token, qa, ques_path):
  tru = Tru()
  tru.reset_database()
  os.environ["HUGGINGFACE_API_KEY"] = hf_token

  # Initialize provider class
  provider = LiteLLM(model_engine="huggingface/mistralai/Mistral-7B-Instruct-v0.2")
  
  f_qa_relevance = Feedback(
    provider.relevance_with_cot_reasons,
    name="Answer Relevance"
  ).on_input_output()

  f_context_relevance = (
      Feedback(
          provider.qs_relevance_with_cot_reasons,
          name="Context Relevance",
      )
      .on(Select.RecordCalls.retrieve.args.query)
      .on(Select.RecordCalls.retrieve.rets.collect())
      .aggregate(np.mean)
  )
  grounded = Groundedness(groundedness_provider=provider)

  # Define a groundedness feedback function
  f_groundedness = (
      Feedback(
          grounded.groundedness_measure_with_cot_reasons,
          name="Groundedness",
      )
      .on(Select.RecordCalls.retrieve.rets.collect())
      .on_output()
      .aggregate(grounded.grounded_statements_aggregator)
  )
  tru_recorder = TruChain(
      qa,
      app_id="App_1",
      feedbacks=[
          f_qa_relevance,
          f_context_relevance,
          f_groundedness,
          f_lang_match
      ]
  )
  eval_ques = evaluationQuestions(ques_path)
  for question in eval_ques:
    with tru_recorder as recording:
        qa.run(question)

  records, feedback = tru.get_records_and_feedback(app_ids=[])
  # pd.set_option("display.max_colwidth", None)
  metrices = records[["input", "output"] + feedback]

  return tru, metrices

In [None]:
#filepath="/content/mydata/insured_document.pdf"
filepath="document_path"
ques_path='questions_path'
hf_token = "your-huggingface-key"
model_name="meta-llama/Llama-2-7b-chat-hf"

In [None]:
documents = readDocument(filepath)

In [None]:
llm = createLLM(model_name, hf_token)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/1.43k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/168 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/628 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/8 [00:00<?, ?it/s]

model-00001-of-00008.safetensors:   0%|          | 0.00/1.89G [00:00<?, ?B/s]

model-00002-of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

model-00003-of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

model-00004-of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

model-00005-of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

model-00006-of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

model-00007-of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

model-00008-of-00008.safetensors:   0%|          | 0.00/816M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

In [None]:
qa = createQueryEngine(documents,llm)

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
tru, result = getFeedbackMetrices(hf_token, qa, ques_path)

🦑 Tru initialized with db url sqlite:///default.sqlite .
🛑 Secret keys may be written to the database. See the `database_redact_keys` option of Tru` to prevent this.
✅ In Answer Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Language Match, input text1 will be set to __record__.main_input or `Select.RecordInput` .
✅ In Language Match, input text2 will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Context Relevance, input question will be set to __record__.app.retrieve.args.query .
✅ In Context Relevance, input context will be set to __record__.app.retrieve.rets.collect() .


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


✅ In Groundedness, input source will be set to __record__.app.retrieve.rets.collect() .
✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .




The policy provides insurance coverage for your trip through its relationship with Travel Guard. The policy includes a Schedule Page that shows the benefits that are offered, and a Declarations Page that shows the base policy benefits and any additional benefits you elected. Each of these benefits will pay up to the limit shown for covered losses. The policy also includes a General Exclusions section that applies to all benefits, and some benefits will include an exclusions section that will only apply to that particular benefit. If you have any questions about the coverages included in this policy, please refer to the policy carefully.
The Covered reasons for TRIP CANCELLATION COVERAGE are:
- Sickness, Injury, or death of an Insured, Family Member, Traveling Companion, or Business Partner.
- Sickness or Injury of an Insured, Traveling Companion, Family Member traveling with the Insured, which results in medically imposed travel restrictions as certified by a Physician at the time of L

In [None]:
pd.set_option("display.max_colwidth", None)
result

In [None]:
tru.get_leaderboard(app_ids=[])

Unnamed: 0_level_0,Language Match,Answer Relevance,latency,total_cost
app_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
App_1,0.963648,0.883333,24.307692,0.0
