In [1]:
import openai 
import tiktoken 
import chromadb 
import langchain

In [2]:
from langchain.text_splitter import Language
from langchain.document_loaders.generic import GenericLoader
from langchain.document_loaders.parsers import LanguageParser

In [3]:
# Ropo Path
repo_path = "./zepo"

In [6]:
# Load
loader = GenericLoader.from_filesystem(
    repo_path,
    glob="**/*",
    suffixes=[".sql"],
    parser=LanguageParser(language=Language.PYTHON, parser_threshold=500)
)
documents = loader.load()
len(documents)

8

In [25]:
#Split 
from langchain.text_splitter import RecursiveCharacterTextSplitter
python_splitter = RecursiveCharacterTextSplitter.from_language(language=Language.PYTHON, 
                                                               chunk_size=2000, 
                                                               chunk_overlap=200)
texts = python_splitter.split_documents(documents)
len(texts)

124

Create the Embedding DB and persist it

In [None]:
#Persist Directory
persist_directory = 'db'

In [26]:
#Creating the Embeddings
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings #Can use any other like Huggingface's embedding

embedding = OpenAIEmbeddings(disallowed_special=())
db = Chroma.from_documents(documents=texts, 
                           embedding=embedding,
                           persist_directory=persist_directory)
#Created Embeddings in db successfully

In [None]:
# persiste the embedding DB to disk then delete it
db.persist()
db = None

In [None]:
# Now we can load the persisted database from disk, and use it as normal. 
db = Chroma(persist_directory=persist_directory, 
                  embedding_function=embedding)

Setting Up Retriever

In [None]:
#Setting up Retriever
retriever = db.as_retriever(
    search_type="mmr", # Also test "similarity"
    search_kwargs={"k": 8}, # Number of relevant document to return
)

In [None]:
#retrieving Relevant Documents
ques = "Who am I?"
relevant_documents = retriever.get_relevant_documents(ques)

Setting up our Local Model

In [9]:
from langchain.llms import LlamaCpp
from langchain import PromptTemplate, LLMChain
from langchain.callbacks.manager import CallbackManager
from langchain.memory import ConversationSummaryMemory
from langchain.chains import ConversationalRetrievalChain 
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

In [11]:
model_path="C:\ProjectLFG\models\codellama-13b-instruct.Q4_K_M.gguf"

In [12]:
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
llm = LlamaCpp(
    model_path=model_path,
    n_ctx=5000,
    n_gpu_layers=1,
    n_batch=512,
    f16_kv=True,
    callback_manager=callback_manager,
    verbose=True,
)

AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


In [14]:
#Standard format
llm('''Question: Only list the tables that are being used to create the table "#AllProductTransactionsNoCalculation"? Answer:''')

Llama.generate: prefix-match hit


 #AllProductTransactionFactTable
1. The All Product Transaction Table is created by combining data from three fact tables (#SalesLine, #PurchaseLine and #TransferOrderLine) with an aggregate function of SumQuantitySold that uses the Unit Cost of Sale to calculate Average Cost.
2. This table is used as a basis for calculations in the Supply Chain Management business process and is available for use in any reports in Supply Chain Management.
3. It is also important to note that this table is not created with the intention to replace the other three fact tables (#SalesLine, #PurchaseLine and #TransferOrderLine). These tables will still be used as a basis for calculations in the Supply Chain Management business process but are now considered a summary of the data contained in each of those tables.
4. The All Product Transaction Table can be compared to the Sales Order Lines table, which aggregates the data from the #SalesOrderLine table with an aggregate function that uses the Unit Cost of

' #AllProductTransactionFactTable\n1. The All Product Transaction Table is created by combining data from three fact tables (#SalesLine, #PurchaseLine and #TransferOrderLine) with an aggregate function of SumQuantitySold that uses the Unit Cost of Sale to calculate Average Cost.\n2. This table is used as a basis for calculations in the Supply Chain Management business process and is available for use in any reports in Supply Chain Management.\n3. It is also important to note that this table is not created with the intention to replace the other three fact tables (#SalesLine, #PurchaseLine and #TransferOrderLine). These tables will still be used as a basis for calculations in the Supply Chain Management business process but are now considered a summary of the data contained in each of those tables.\n4. The All Product Transaction Table can be compared to the Sales Order Lines table, which aggregates the data from the #SalesOrderLine table with an aggregate function that uses the Unit Co

In [18]:
from langchain.chains.question_answering import load_qa_chain

# Prompt
template = """Use the following pieces of context to answer the question at the end. 
If you don't know the answer, just say that you don't know, don't try to make up an answer. 
Use three sentences maximum and keep the answer as concise as possible. 
{context}
Question: {question}
Helpful Answer:"""
#template = prompt_format()
QA_CHAIN_PROMPT = PromptTemplate(
    input_variables=["context", "question"],
    template=template,
)

In [27]:
# #retrieving Relevant Documents for question
question = '''List all the subdependent tables for the table #AllProductTransactionsNoCalculation'''
docs = retriever.get_relevant_documents(question)

In [28]:
docs

[Document(page_content='AND @RevRecEndDate\n\t\t\t\tINNER JOIN [mss].[PricingLevelHierarchy] PLH WITH (NOLOCK)\n\t\t\t\t\tON VT.[DetailPricingLevelID] = PLH.[DetailPricingLevelID]\n\t\t\t\tINNER JOIN [dbo].[AddonOpportunityProduct] OP WITH (NOLOCK)\n\t\t\t\t\tON OP.[OpportunityKey] = TP.[OpportunityKey]\n\t\t\t\t\t\tAND OP.[ProductKey] = TP.[ProductKey]\n\t\t\tEND\n\t\tEND\n\t\tELSE \n\t\tBEGIN\n\t\t\tINSERT INTO #AllProductTransactionsNoCalculation\n\t\t\tSELECT TP.[OpportunityKey]\n\t\t\t\t, VT.[SubscriptionID]\n\t\t\t\t, TP.[TenantID]\n\t\t\t\t, TP.[AgreementID]\n\t\t\t\t, TP.[OrgID]\n\t\t\t\t, TP.[ProductID]\n\t\t\t\t, TP.[ProductKey]\n\t\t\t\t, VT.[SalesDate]\n\t\t\t\t, VT.[OriginalPurchaseAmount]\n\t\t\t\t, VT.[LicenseTransactionItemId]\n\t\t\t\t, VT.[RevSumCategoryID]\n\t\t\t\t, OP.[ExtendedAmount]\n\t\t\tFROM (\n\t\t\t\tSELECT DISTINCT [ProductID]\n\t\t\t\t\t, [OpportunityKey]\n\t\t\t\t\t, [SubscriptionID]\n\t\t\t\t\t, [TenantID]\n\t\t\t\t\t, [AgreementID]\n\t\t\t\t\t, [OrgID]\

In [1]:
# Chain
chain = load_qa_chain(llm, chain_type="stuff", prompt=QA_CHAIN_PROMPT)

NameError: name 'load_qa_chain' is not defined

In [29]:
# Run
chain({"input_documents": docs, "question": question}, return_only_outputs=True)

Llama.generate: prefix-match hit


Deleteing the DB
DO NOT PROCEED
https://colab.research.google.com/drive/1gyGZn_LZNrYXYXa-pltFExbptIe7DAPe?usp=sharing#scrollTo=RWulTG0eKCfk

In [None]:
zip -r db.zip ./db

In [None]:
# To cleanup, you can delete the collection
db.delete_collection()
db.persist()

# delete the directory
rm -rf db/