In [61]:
import os

from torch import cuda, bfloat16
import transformers
from dotenv import load_dotenv
from pathlib import Path

import psycopg2
from pgvector.psycopg2 import register_vector
import numpy as np


from sentence_transformers import SentenceTransformer
from langchain.embeddings.huggingface import HuggingFaceEmbeddings

In [2]:
env_path = Path('..') / '.env'
load_dotenv(dotenv_path=env_path)
hf_auth = os.environ.get('HUGGING_FACE_TOKEN');

In [3]:
model_id = 'meta-llama/Llama-2-13b-chat-hf'

Quantization config with Bits and Bytes

In [4]:
bnb_config = transformers.BitsAndBytesConfig(
   load_in_4bit=True,
   bnb_4bit_quant_type="nf4",
   bnb_4bit_use_double_quant=True,
   bnb_4bit_compute_dtype=bfloat16
)

Model config

In [6]:
model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    token=hf_auth
)

Initialize the model and move to GPU -- This can take about ~10 mins to download if the local HF cache doesn't have the model

In [7]:
model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,
    device_map='auto',
    token=hf_auth
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]



In [8]:
device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

In [9]:
model.eval()
print(f"Model loaded on {device}")

Model loaded on cuda:0


Get the corresponding llama2 13B tokenizer and initialize it

In [10]:
tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,
    token=hf_auth
)

In [11]:
generate_text = transformers.pipeline(
    model=model,
    tokenizer=tokenizer,
    return_full_text=True,
    task='text-generation',
    # model params
    temperature=0.01,
    max_new_tokens=512, # max tokens for output
    repetition_penalty=1.1
)

In [12]:
res = generate_text("What is the difference between a desert and dessert?")
print(res[0]["generated_text"])

What is the difference between a desert and dessert?

Answer: A desert is a region that receives very little rainfall, typically less than 25 centimeters (10 inches) of precipitation per year. Dessert, on the other hand, is a sweet dish or course served after a meal.

So, the main difference between a desert and dessert is that one is a type of landscape and the other is a type of food. Deserts are dry and barren regions with limited vegetation and wildlife, while desserts are sweet and enjoyable treats that are often served as a conclusion to a meal.


In [13]:
res

[{'generated_text': 'What is the difference between a desert and dessert?\n\nAnswer: A desert is a region that receives very little rainfall, typically less than 25 centimeters (10 inches) of precipitation per year. Dessert, on the other hand, is a sweet dish or course served after a meal.\n\nSo, the main difference between a desert and dessert is that one is a type of landscape and the other is a type of food. Deserts are dry and barren regions with limited vegetation and wildlife, while desserts are sweet and enjoyable treats that are often served as a conclusion to a meal.'}]

In [14]:
from langchain.llms import HuggingFacePipeline

llm = HuggingFacePipeline(pipeline=generate_text)

In [15]:
llm(prompt="What is the difference between a desert and dessert?")

'\n\nAnswer: A desert is a region that receives very little rainfall, typically less than 25 centimeters (10 inches) of precipitation per year. Dessert, on the other hand, is a sweet dish or course served after a meal.\n\nSo, the main difference between a desert and dessert is that one is a type of landscape and the other is a type of food. Deserts are dry and barren regions with limited vegetation and wildlife, while desserts are sweet and enjoyable treats that are often served as a conclusion to a meal.'

### Initializing a Retrieval QA chain

Redo-ing the embeddings as huggingface in a separate table to find if there's much of a functional difference

In [45]:
embed_model = SentenceTransformer("all-MiniLM-L6-v2")


In [59]:
conn = psycopg2.connect(connection_string)

In [38]:
query = "Are property setbacks for pools different in East Hampton?"

In [67]:
def get_top3_similar_docs(query_embedding, conn):
    embedding_array = np.array(query_embedding)
    # Register pgvector extension
    register_vector(conn)
    cur = conn.cursor()
    # Get the top 3 most similar documents
    cur.execute("SELECT chunk_text FROM documentchunk ORDER BY embedding <=> %s LIMIT 3", (embedding_array,))
    top3_docs = cur.fetchall()
    return top3_docs

In [53]:
def get_embeddings(user_input):
    return embed_model.encode(user_input)

In [71]:
get_top3_similar_docs(get_embeddings(query), conn)

[("commercial swimming pools; and upgrade standards for fuel storage tanks. East Hampton's waterways are among the Town's most prized features. They support bountiful shellfish and finfish resources, offer prime habitat for local wildlife and offer residents and visitors a place to swim, fish, hunt, boat, observe wildlife and enjoy scenic beauty and tranquillity. The overlay district will help preserve these important benefits for future generations. \n 255-3-73. Boundaries. [Added 10-6-1995 by L.L.",),
 ('12-10-2019 by L.L. No. 36-2019] \nUnless indicated by specific language to the contrary, the provisions of this chapter shall apply to all properties located in the Town of East Hampton. Notwithstanding the foregoing, the installation of a pool house shall not be required to install or connect to a low-nitrogen sanitary system if the Building Inspector determines that such connection cannot be reasonably accomplished without significant additional costs to the property owner. \n 210-

In [73]:
generate_text(query)

[{'generated_text': "Are property setbacks for pools different in East Hampton?\nYes, property setbacks for pools are different in East Hampton than for other structures. The Town of East Hampton has specific regulations regarding pool construction and location. Pools must be located at least 10 feet away from any property line, and the total area of the pool and surrounding deck cannot exceed 25% of the lot size. Additionally, pools must be surrounded by a fence or other enclosure that meets certain height and safety requirements. It is important to consult with the Town's Building Department before beginning any pool construction project to ensure compliance with these regulations."}]

In [74]:
def process_input_with_retrieval(user_input):
    delimiter = "```"

    #Step 1: Get documents related to the user input from database
    related_docs = get_top3_similar_docs(get_embeddings(user_input), conn)

    system_message = f"""
    You are a friendly chatbot. \
    You can answer questions about timescaledb, its features and its use cases. \
    You respond in a concise, technically credible tone. \
    """

    messages = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": f"{delimiter}{user_input}{delimiter}"},
        {"role": "assistant", "content": f"Relevant Timescale case studies information: \n {related_docs[0][0]} \n {related_docs[1][0]} {related_docs[2][0]}"}   
    ]

    res = generate_text(messages)
    return res

In [75]:
rag_test = process_input_with_retrieval(query)

TypeError: can only concatenate str (not "dict") to str

In [34]:
embed_model = HuggingFaceEmbeddings(
    model_name=embed_model_id
)

In [19]:
from langchain.vectorstores.pgvector import PGVector

In [20]:
connection_string = "postgresql://postgres:password@localhost:55432/ai_experiments"

In [69]:
conn.rollback()