In [1]:
import openai 
import tiktoken 
import chromadb 
import langchain

In [2]:
from langchain.custom_text_splitter import Language
from langchain.document_loaders.generic import GenericLoader
from langchain.document_loaders.parsers import LanguageParser

In [3]:
#API key
import dotenv
dotenv.load_dotenv()

True

In [9]:
# Ropo Path
repo_path = "C:\ProjectLFG\OCPInsights_MSSalesCloudMigration\ADF\dataset"

In [10]:
# Load
loader = GenericLoader.from_filesystem(
    repo_path,
    glob="**/*",
    suffixes=[],
    parser=LanguageParser(parser_threshold=10)
)
documents = loader.load()
len(documents)

54

In [11]:
#Split 
from langchain.custom_text_splitter import RecursiveCharacterTextSplitter
code_splitter = RecursiveCharacterTextSplitter.from_language( language=Language.SQL, 
                                                               chunk_size=3000, 
                                                               chunk_overlap=300)
texts = code_splitter.split_documents(documents)
len(texts)

55

Create the Embedding DB and persist it

In [12]:
#Persist Directory
persist_directory = 'C:\ProjectLFG\PersistedEmbeddings'

In [8]:
#Creating the Embeddings
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings #Can use any other like Huggingface's embedding

embedding = OpenAIEmbeddings(disallowed_special=())
db = Chroma.from_documents(documents=texts, 
                           embedding=embedding,
                           persist_directory=persist_directory)
#Created Embeddings in db successfully

Retrying langchain.embeddings.openai.embed_with_retry.<locals>._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-epylv2teiFtWSFAAJfOO4l7Y on tokens per min. Limit: 150000 / min. Current: 1 / min. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..


KeyboardInterrupt: 

In [9]:
# persiste the embedding DB to disk then delete it
db.persist()
db = None

In [None]:
# Now we can load the persisted database from disk, and use it as normal. 
db = Chroma(persist_directory=persist_directory, 
                  embedding_function=embedding)

Setting Up Retriever

In [None]:
#Setting up Retriever
retriever = db.as_retriever(
    search_type="mmr", # Also test "similarity"
    search_kwargs={"k": 8}, # Number of relevant document to return
)

In [85]:
# Set up Chat
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationSummaryMemory
from langchain.chains import ConversationalRetrievalChain
llm = ChatOpenAI(model_name="gpt-3.5-turbo-16k-0613") 

memory = ConversationSummaryMemory(llm=llm,memory_key="chat_history",return_messages=True)
qa = ConversationalRetrievalChain.from_llm(llm, retriever=retriever, memory=memory)

In [None]:
question = '''Get me list of all tables from which "#AllProductTransactionsNoCalculation" table is created'''
# To see relevant documents
# docs = retriever.get_relevant_documents(question)
# docs

In [88]:
#Result
result = qa(question)
result['answer']

'The "#AllProductTransactionsNoCalculation" table is created by inserting data from the following tables:\n\n- #CurrentMOSPOpportunities\n- #ValidTransactions\n- [mss].[PricingLevelHierarchy]\n- [dbo].[AddonOpportunityProduct]'

In [87]:
#Expanded Results
print(result['answer'])

The "#AllProductTransactionsNoCalculation" table is created from the following tables:

- #CurrentMOSPOpportunities
- #AllValidTransactions
- [dbo].[ProductHierarchy]
- [dbo].[AddonOpportunityProduct]
- [mss].[PricingLevelHierarchy]
