# LLM flow using RAG

### Install dependencies

In [9]:
%pip install -q langchain transformers sentence_transformers peft
%pip install chromadb==0.3.29
%pip install psycopg2
%pip install --upgrade --quiet  huggingface_hub

Collecting transformers
  Downloading transformers-4.37.2-py3-none-any.whl (8.4 MB)
[K     |████████████████████████████████| 8.4 MB 4.1 MB/s eta 0:00:01
[?25hCollecting sentence_transformers
  Downloading sentence_transformers-2.3.1-py3-none-any.whl (132 kB)
[K     |████████████████████████████████| 132 kB 3.2 MB/s eta 0:00:01
Collecting regex!=2019.12.17
  Downloading regex-2023.12.25-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (777 kB)
[K     |████████████████████████████████| 777 kB 3.7 MB/s eta 0:00:01
[?25hCollecting safetensors>=0.4.1
  Downloading safetensors-0.4.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[K     |████████████████████████████████| 1.3 MB 3.3 MB/s eta 0:00:01
Collecting sentencepiece
  Downloading sentencepiece-0.1.99-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[K     |████████████████████████████████| 1.3 MB 4.4 MB/s eta 0:00:01
Collecting scikit-learn
  Downloading scikit_learn-1.3.2-cp38-cp38-m

### Import packages

In [17]:
import os
from langchain_community.llms import HuggingFacePipeline
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
import logging
from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM, pipeline
from peft import TaskType

from config import DATABASE_NAME, USERNAME, PASSWORD, TABLES

### Setup logging

In [2]:
logger = logging.getLogger('langchain')
logger.setLevel(logging.INFO)
logger.addHandler(logging.StreamHandler())

### Load LLM

In [3]:
repo_id = 'defog/sqlcoder-7b-2'
tokenizer = AutoTokenizer.from_pretrained('./model')
model = AutoModelForCausalLM.from_pretrained('./model', local_files_only=True, use_safetensors=True)

Loading checkpoint shards: 100%|██████████| 3/3 [00:12<00:00,  4.01s/it]


LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32016, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (v_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=4096, out_features=11008, bias=False)
          (up_proj): Linear(in_features=4096, out_features=11008, bias=False)
          (down_proj): Linear(in_features=11008, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm()
        (post_attention_layernorm): LlamaRMSNorm()
      )
    )
    (norm): LlamaRMSNorm()
  )
  (lm_head):

### Load Model

In [29]:
pipe = pipeline(
    task='text-generation',
    model=model,
    tokenizer=tokenizer, 
    max_length=1024
)
# print(os.getenv("HUGGINGFACEHUB_API_TOKEN", default=None))
os.environ['HUGGINGFACEHUB_API_TOKEN'] = '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'

llm = HuggingFacePipeline(pipeline=pipe)

### Setup prompt and schema

In [37]:
schema = ''
for i in range(len(TABLES)):
    schema += TABLES[i]
    schema += ';\n'

user_question = 'Which is the best sushi place in New York with the funniest review?'

template = f"""
### Task
Generate a Postgres SQL query to answer [QUESTION]{user_question}[/QUESTION]

### Database Schema
The query will run on a database with the following schema:
{schema}

### Answer
Given the database schema, here is the SQL query that [QUESTION]{user_question}[/QUESTION]
[SQL]
"""

prompt = PromptTemplate(
    template=template, input_variables=['user_question']
)

### Create chain and retrieve query

In [38]:
llm_chain = LLMChain(prompt=prompt, llm=llm)
print(user_question)
print('*' * 60)
print(llm_chain.run({'user_question': user_question}))

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Which is the best sushi place in New York with the funniest review?
************************************************************
 SELECT b.name, AVG(r.stars) AS average_stars, COUNT(r.funny) AS funny_reviews FROM businesses b JOIN reviews r ON b.business_id = r.business_id WHERE b.city ilike '%New York%' AND r.stars > 4 AND r.funny > 0 GROUP BY b.name ORDER BY average_stars DESC, funny_reviews DESC LIMIT 1;
