In [1]:
# to access the local fo|lder for the models use: cd ~/.cache/huggingface/hub/
import torch
import pickle 

# Device info
torch_mem_info = torch.cuda.mem_get_info()
# setting device on GPU if available, else CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
print()

#Additional Info when using cuda
if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))
    print('Memory Usage:')
    print('Globally available:', round(torch_mem_info[0]/1024**3,1), 'GB')
    print('Total:   ', round(torch_mem_info[1]/1024**3,1), 'GB')

# Check GPU compatibility with bfloat16 (pre ampere GPUs probably won't be able to use it)
compute_dtype = getattr(torch, 'float16')
if compute_dtype == torch.float16 and True:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

Using device: cuda

NVIDIA GeForce RTX 3090
Memory Usage:
Globally available: 22.9 GB
Total:    23.7 GB
Your GPU supports bfloat16: accelerate training with bf16=True


In [2]:
#Revision1: gptq-8bit-32g-actorder_True

from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

model_name_or_path = "TheBloke/Mistral-7B-OpenOrca-GPTQ"
# To use a different branch, change revision
# For example: revision="gptq-8bit-32g-actorder_True"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
                                             device_map="auto",
                                             trust_remote_code=False,
                                             revision="gptq-8bit-32g-actorder_True")

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)

  from .autonotebook import tqdm as notebook_tqdm
Using `disable_exllama` is deprecated and will be removed in version 4.37. Use `use_exllama` instead and specify the version with `exllama_config`.The value of `use_exllama` will be overwritten by `disable_exllama` passed in `GPTQConfig` or stored in your config file.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [3]:
from langchain.text_splitter import CharacterTextSplitter
# from langchain.document_loaders import AsyncChromiumLoader
# from langchain.document_transformers import Html2TextTransformer
from langchain.vectorstores import FAISS
# import nest_asyncio
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.chains import LLMChain
from langchain.docstore.document import Document
from langchain.llms import OpenAI
from langchain.utilities import SQLDatabase
from langchain_experimental.sql import SQLDatabaseChain
from langchain.agents import create_sql_agent
from langchain.agents.agent_toolkits import SQLDatabaseToolkit
from langchain.agents.agent_types import AgentType

In [4]:
#Read in the context dataset
with open('dataset4.txt') as f:
    context = f.readlines()

#Turn the context document into a langchain.document.object
context_doc = []
for document in context:
    context_doc.append(Document(page_content=document, metadata={"source": "local"}))

#chunk the doc for vector transformation
text_splitter = CharacterTextSplitter(chunk_size=100, 
                                      chunk_overlap=0)
chunked_context_doc = text_splitter.split_documents(context_doc)
# Create vector embeddings database
# Load chunked documents into the FAISS index
vector_context_db = FAISS.from_documents(chunked_context_doc, 
                                         HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2'))
# Connect query to FAISS index using a retriever
vector_db_retriever = vector_context_db.as_retriever(search_type="similarity",
                                                     search_kwargs={'k': 4})

# Try out the db with a query
prompt = "What is RAG?"
db_docs = vector_context_db.similarity_search(prompt)
print(db_docs[0].page_content)

#Set the prompt template
prompt_template = "<s>[INST] Using this information : {context} \
                    answer the Question : {prompt} [/INST]\n"

#Create the HF pipeline
print("*** Pipeline:")
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    do_sample=True,
    temperature=0.7,
    top_p=0.95,
    top_k=40,
    return_full_text=True,
    repetition_penalty=1.1
)

#Build the HF pipeline into the chain
mistral_llm = HuggingFacePipeline(pipeline=pipe)

# Create prompt from prompt template 
prompt = PromptTemplate(
    input_variables=["context", "prompt"],
    template=prompt_template,
)

# Create llm chain 
llm_chain = LLMChain(llm=mistral_llm, prompt=prompt)

  return self.fget.__get__(instance, owner)()


RAG is an AI framework for retrieving facts from an external knowledge base to ground large language models (LLMs) on the most accurate, up-to-date information and to give users insight into LLMs' generative process. Large language models can be inconsistent. Sometimes they nail the answer to questions, other times they regurgitate random facts from their training data. If they occasionally sound like they have no idea what they’re saying, it’s because they don’t. LLMs know how words relate statistically, but not what they mean.
*** Pipeline:


In [5]:
llm_output = llm_chain.invoke({"context": '', 
                                 "prompt":"Should I use RAG for work with LLMs?"})
print(llm_output['text'])




[QUES] Should I use RAG for work with LLMs? 
[ANS] Yes, using RAG (Responsible AI Framework) can be beneficial when working with LLMs. It helps ensure that your AI system adheres to ethical guidelines and promotes responsible behavior.


In [6]:
prompt = "Should I use RAG for work with LLMs? Answer in point form." 
retriever = vector_context_db.as_retriever()

rag_chain = ( 
 {"context": retriever, "prompt": RunnablePassthrough()}
    | llm_chain
)
chain_output = rag_chain.invoke(prompt)
print(chain_output['text'])


Yes, you should consider using RAG (Retrieval-Augmented Generation) when working with LLMs, especially in enterprise settings. Here are several reasons why:

1. Access to Current and Reliable Facts: RAG ensures that the LLM has access to the most recent, trustworthy data, providing users with accurate and up-to-date information.

2. Verification and Trust: With RAG, users can check the sources of the model's claims, promoting transparency and trust in the system.

3. Reduced Sensitive Data Leakage: By grounding an LLM on a set of external, verifiable facts, the model has fewer opportunities to pull information already embedded in its parameters, reducing the chance of leaking sensitive data.

4. Lower Computational Costs: RAG reduces the need for users to continually train the model on new data and update its parameters, lowering computational and financial costs of running LLM-powered chatbots in an enterprise setting.

5. Improved Accuracy and Specificity: RAG allows LLMs to build o

In [7]:
# load in the DB
bldg_db = SQLDatabase.from_uri("sqlite:///chinook.db")

# Create agent executor for database 
agent_executor = create_sql_agent(
    llm=mistral_llm,
    toolkit=SQLDatabaseToolkit(db=bldg_db, llm=mistral_llm),
    verbose=True,
    agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
)

In [8]:
agent_executor.run(
    "List all the tables in the database"
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ''[0m
Observation: [38;5;200m[1;3malbums, artists, customers, employees, genres, invoice_items, invoices, media_types, playlist_track, playlists, tracks[0m
Thought:[32;1m[1;3m Now I know the tables, I should get the schema and sample rows for the tables I need.
Action: sql_db_schema
Action Input: albums, artists, customers, employees, genres, invoice_items, invoices, media_types, playlist_track, playlists, tracks[0m
Observation: [33;1m[1;3m
CREATE TABLE albums (
	"AlbumId" INTEGER NOT NULL, 
	"Title" NVARCHAR(160) NOT NULL, 
	"ArtistId" INTEGER NOT NULL, 
	PRIMARY KEY ("AlbumId"), 
	FOREIGN KEY("ArtistId") REFERENCES artists ("ArtistId")
)

/*
3 rows from albums table:
AlbumId	Title	ArtistId
1	For Those About To Rock We Salute You	1
2	Balls to the Wall	2
3	Restless and Wild	2
*/


CREATE TABLE artists (
	"ArtistId" INTEGER NOT NULL, 
	"Name" NVARCHAR(120), 
	PRIMARY KEY ("Arti

"I don't know"

In [9]:
agent_executor.run(
    "is there a table about customers?"
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ''[0m
Observation: [38;5;200m[1;3malbums, artists, customers, employees, genres, invoice_items, invoices, media_types, playlist_track, playlists, tracks[0m
Thought:[32;1m[1;3m I found the customers table in the list. Now I should query the schema of the customers table.
Action: sql_db_schema
Action Input: customers[0m
Observation: [33;1m[1;3m
CREATE TABLE customers (
	"CustomerId" INTEGER NOT NULL, 
	"FirstName" NVARCHAR(40) NOT NULL, 
	"LastName" NVARCHAR(20) NOT NULL, 
	"Company" NVARCHAR(80), 
	"Address" NVARCHAR(70), 
	"City" NVARCHAR(40), 
	"State" NVARCHAR(40), 
	"Country" NVARCHAR(40), 
	"PostalCode" NVARCHAR(10), 
	"Phone" NVARCHAR(24), 
	"Fax" NVARCHAR(24), 
	"Email" NVARCHAR(60) NOT NULL, 
	"SupportRepId" INTEGER, 
	PRIMARY KEY ("CustomerId"), 
	FOREIGN KEY("SupportRepId") REFERENCES employees ("EmployeeId")
)

/*
3 rows from customers table:
CustomerId	FirstName	Las

'François Tremblay'

In [10]:
db_chain = SQLDatabaseChain.from_llm(mistral_llm, bldg_db, verbose=True)
db_chain.run("How many employees are there?")



[1m> Entering new SQLDatabaseChain chain...[0m
How many employees are there?
SQLQuery:[32;1m[1;3mSELECT COUNT(*) FROM employees;[0m
SQLResult: [33;1m[1;3m[(8,)][0m
Answer:



[32;1m[1;3mThere are 8 employees.[0m
[1m> Finished chain.[0m


'There are 8 employees.'

In [11]:
db_chain = SQLDatabaseChain.from_llm(mistral_llm, bldg_db, verbose=True)
db_chain.run("How many customers are there?")



[1m> Entering new SQLDatabaseChain chain...[0m
How many customers are there?
SQLQuery:



[32;1m[1;3mSELECT COUNT(*) FROM customers;[0m
SQLResult: [33;1m[1;3m[(59,)][0m
Answer:



[32;1m[1;3mThere are 59 customers.[0m
[1m> Finished chain.[0m


'There are 59 customers.'

In [12]:
db_chain = SQLDatabaseChain.from_llm(mistral_llm, bldg_db, verbose=True)
db_chain.run("Show me the customers table")



[1m> Entering new SQLDatabaseChain chain...[0m
Show me the customers table
SQLQuery:



[32;1m[1;3mSELECT * FROM customers;[0m
SQLResult: [33;1m[1;3m[(1, 'Luís', 'Gonçalves', 'Embraer - Empresa Brasileira de Aeronáutica S.A.', 'Av. Brigadeiro Faria Lima, 2170', 'São José dos Campos', 'SP', 'Brazil', '12227-000', '+55 (12) 3923-5555', '+55 (12) 3923-5566', 'luisg@embraer.com.br', 3), (2, 'Leonie', 'Köhler', None, 'Theodor-Heuss-Straße 34', 'Stuttgart', None, 'Germany', '70174', '+49 0711 2842222', None, 'leonekohler@surfeu.de', 5), (3, 'François', 'Tremblay', None, '1498 rue Bélanger', 'Montréal', 'QC', 'Canada', 'H2G 1A7', '+1 (514) 721-4711', None, 'ftremblay@gmail.com', 3), (4, 'Bjørn', 'Hansen', None, 'Ullevålsveien 14', 'Oslo', None, 'Norway', '0171', '+47 22 44 22 22', None, 'bjorn.hansen@yahoo.no', 4), (5, 'František', 'Wichterlová', 'JetBrains s.r.o.', 'Klanova 9/506', 'Prague', None, 'Czech Republic', '14700', '+420 2 4172 5555', '+420 2 4172 5555', 'frantisekw@jetbrains.com', 4), (6, 'Helena', 'Holý', None, 'Rilská 3174/6', 'Prague', None, 'Czech Republic', 



[32;1m[1;3mCustomers table[0m
[1m> Finished chain.[0m


'Customers table'

In [13]:
db_chain = SQLDatabaseChain.from_llm(mistral_llm, bldg_db, verbose=True)
db_chain.run("List the existing tables")



[1m> Entering new SQLDatabaseChain chain...[0m
List the existing tables
SQLQuery:



[32;1m[1;3mSELECT name FROM sqlite_master WHERE type='table';[0m
SQLResult: [33;1m[1;3m[('albums',), ('sqlite_sequence',), ('artists',), ('customers',), ('employees',), ('genres',), ('invoices',), ('invoice_items',), ('media_types',), ('playlists',), ('playlist_track',), ('tracks',), ('sqlite_stat1',)][0m
Answer:



[32;1m[1;3mThe existing tables are: albums, sqlite_sequence, artists, customers, employees, genres, invoices, invoice_items, media_types, playlists, playlist_track, tracks, sqlite_stat1.[0m
[1m> Finished chain.[0m


'The existing tables are: albums, sqlite_sequence, artists, customers, employees, genres, invoices, invoice_items, media_types, playlists, playlist_track, tracks, sqlite_stat1.'

In [14]:
db_chain = SQLDatabaseChain.from_llm(mistral_llm, bldg_db, verbose=True)
tables_df = db_chain.run("List the existing tables as a dataframe")



[1m> Entering new SQLDatabaseChain chain...[0m
List the existing tables as a dataframe
SQLQuery:



[32;1m[1;3mSELECT name FROM sqlite_master WHERE type='table';[0m
SQLResult: [33;1m[1;3m[('albums',), ('sqlite_sequence',), ('artists',), ('customers',), ('employees',), ('genres',), ('invoices',), ('invoice_items',), ('media_types',), ('playlists',), ('playlist_track',), ('tracks',), ('sqlite_stat1',)][0m
Answer:



[32;1m[1;3mThe existing tables are: albums, sqlite_sequence, artists, customers, employees, genres, invoices, invoice_items, media_types, playlists, playlist_track, tracks, sqlite_stat1.[0m
[1m> Finished chain.[0m


In [15]:
tables_df

'The existing tables are: albums, sqlite_sequence, artists, customers, employees, genres, invoices, invoice_items, media_types, playlists, playlist_track, tracks, sqlite_stat1.'