diff --git a/llm_api/app/documents/__pycache__/urls.cpython-311.pyc b/llm_api/app/documents/__pycache__/urls.cpython-311.pyc index 34c7661..b1e1c4f 100644 Binary files a/llm_api/app/documents/__pycache__/urls.cpython-311.pyc and b/llm_api/app/documents/__pycache__/urls.cpython-311.pyc differ diff --git a/llm_api/app/documents/__pycache__/views.cpython-311.pyc b/llm_api/app/documents/__pycache__/views.cpython-311.pyc index 827fcbc..e2ff93c 100644 Binary files a/llm_api/app/documents/__pycache__/views.cpython-311.pyc and b/llm_api/app/documents/__pycache__/views.cpython-311.pyc differ diff --git a/llm_api/app/documents/docs/example.pdf b/llm_api/app/documents/docs/example.pdf new file mode 100644 index 0000000..3248a16 Binary files /dev/null and b/llm_api/app/documents/docs/example.pdf differ diff --git a/llm_api/app/documents/docs/example1.pdf b/llm_api/app/documents/docs/example1.pdf new file mode 100644 index 0000000..3f46be9 Binary files /dev/null and b/llm_api/app/documents/docs/example1.pdf differ diff --git a/llm_api/app/documents/docs/views.py b/llm_api/app/documents/docs/views.py new file mode 100644 index 0000000..5518eea --- /dev/null +++ b/llm_api/app/documents/docs/views.py @@ -0,0 +1,56 @@ +from django.shortcuts import render +from rest_framework.response import Response +from rest_framework.decorators import api_view +from neomodel import db +import requests +import time + +# Create your views here. +@api_view(['GET']) +def getData(request): + # user = db.cypher_query( + # ''' + # MATCH (n:User) + # RETURN n + # ''' + # )[0] + # print(user) + # # return Response() + + # Fatjon logic to get data and store in graph + # get data from API for different categories + categories = ["SAP", "ServiceNow", "General", "MDE", "Guideline"] + auth = ("BaselHack2023", "Tc13cspLs!eAve") + + + for search_query in categories: + + # Define the API URL with the search query as a parameter + api_url = f'https://bfgtest.service-now.com/api/besag/search_knowledge_baselhack2023/searchkb/{search_query}' + # print(api_url) + + # Make a GET request to the API + response = requests.get(api_url, auth=auth) + # print(response) + + # Check if the request was successful (status code 200) + if response.status_code == 200: + data = response.json() + # print(data["result"][0]) + # Store data in the db + for result in data["result"]: + db.cypher_query( + ''' + MERGE (d:Document{id: $result['number']}) + SET d += $result, d.context = 'tbd', d.tags = 'tbd' + ''', result + ) + else: + # Handle the case where the API request fails + print("API call failed!") + + # Sleep for the specified duration + time.sleep(7) + + + diff --git a/llm_api/app/documents/mydata/4113db52-1d4a-415d-bd03-fef154bdf0e1/data_level0.bin b/llm_api/app/documents/mydata/4113db52-1d4a-415d-bd03-fef154bdf0e1/data_level0.bin new file mode 100644 index 0000000..ea3192e Binary files /dev/null and b/llm_api/app/documents/mydata/4113db52-1d4a-415d-bd03-fef154bdf0e1/data_level0.bin differ diff --git a/llm_api/app/documents/mydata/4113db52-1d4a-415d-bd03-fef154bdf0e1/header.bin b/llm_api/app/documents/mydata/4113db52-1d4a-415d-bd03-fef154bdf0e1/header.bin new file mode 100644 index 0000000..3e0932a Binary files /dev/null and b/llm_api/app/documents/mydata/4113db52-1d4a-415d-bd03-fef154bdf0e1/header.bin differ diff --git a/llm_api/app/documents/mydata/4113db52-1d4a-415d-bd03-fef154bdf0e1/length.bin b/llm_api/app/documents/mydata/4113db52-1d4a-415d-bd03-fef154bdf0e1/length.bin new file mode 100644 index 0000000..1dc89f8 Binary files /dev/null and b/llm_api/app/documents/mydata/4113db52-1d4a-415d-bd03-fef154bdf0e1/length.bin differ diff --git a/llm_api/app/documents/mydata/4113db52-1d4a-415d-bd03-fef154bdf0e1/link_lists.bin b/llm_api/app/documents/mydata/4113db52-1d4a-415d-bd03-fef154bdf0e1/link_lists.bin new file mode 100644 index 0000000..e69de29 diff --git a/llm_api/app/documents/mydata/chroma.sqlite3 b/llm_api/app/documents/mydata/chroma.sqlite3 new file mode 100644 index 0000000..f1a9c23 Binary files /dev/null and b/llm_api/app/documents/mydata/chroma.sqlite3 differ diff --git a/llm_api/app/documents/urls.py b/llm_api/app/documents/urls.py index b01dfd3..2067dc3 100644 --- a/llm_api/app/documents/urls.py +++ b/llm_api/app/documents/urls.py @@ -2,6 +2,6 @@ from . import views urlpatterns = [ - path('', views.getData), + path('', views.load_data), # path('post/', views.postData), ] \ No newline at end of file diff --git a/llm_api/app/documents/views.py b/llm_api/app/documents/views.py index b45d129..7812138 100644 --- a/llm_api/app/documents/views.py +++ b/llm_api/app/documents/views.py @@ -2,16 +2,141 @@ from rest_framework.response import Response from rest_framework.decorators import api_view from neomodel import db +from langchain.document_loaders import PyPDFLoader +from langchain.chains.question_answering import load_qa_chain +from langchain.llms import OpenAI + +from langchain.text_splitter import RecursiveCharacterTextSplitter +from langchain.embeddings import OpenAIEmbeddings +from langchain.memory import ConversationBufferMemory +from langchain.chains import ConversationalRetrievalChain +from langchain.memory import ConversationBufferMemory +from langchain.chains import ConversationalRetrievalChain + + +from bs4 import BeautifulSoup + + +from langchain.vectorstores import Chroma + + + +from django.conf import settings +import os +import requests + + # Create your views here. @api_view(['GET']) -def getData(request): - user = db.cypher_query( - ''' - MATCH (n:User) - RETURN n - ''' - )[0] - print(user) - return Response() \ No newline at end of file +def load_data(request): + vector_db = vectorize_the_context() + # get_answer(vector_db) + # answer_promt() + return Response() + + + +def get_context_tag(text): + from langchain.text_splitter import CharacterTextSplitter + text_splitter = CharacterTextSplitter( + separator="\n", + chunk_size=1000, + chunk_overlap=150, + length_function=len + ) + + docs = text_splitter.split_documents(text) + + +def vectorize_the_context(): + openai_key = "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" + from langchain.document_loaders import PyPDFLoader + directory = os.path.join(settings.BASE_DIR, "documents/docs/") + # print([os.path.join(directory, f) for f in os.listdir(directory) if f.endswith('.pdf')]) + loaders = [PyPDFLoader(os.path.join(directory, f)) for f in os.listdir(directory) if f.endswith('.pdf')] + + # # Load PDF + docs = [] + for loader in loaders: + docs.extend(loader.load()) + + # Define the Text Splitter + from langchain.text_splitter import RecursiveCharacterTextSplitter + text_splitter = RecursiveCharacterTextSplitter( + chunk_size = 1500, + chunk_overlap = 150 + ) + + # #Create a split of the document using the text splitter + splits = text_splitter.split_documents(docs) + + from langchain.vectorstores import Chroma + from langchain.embeddings.openai import OpenAIEmbeddings + + embedding = OpenAIEmbeddings(openai_api_key=openai_key) + + + persist_directory = os.path.join(settings.BASE_DIR, "mydata") + + + # Create the vector store + vectordb = Chroma.from_documents( + documents=splits, + embedding=embedding, + persist_directory=persist_directory + ) + return vectordb + + +def get_answer(vectordb): + def pretty_print_docs(docs): + print(f"\n{'-' * 100}\n".join([f"Document {i+1}:\n\n" + d.page_content for i, d in enumerate(docs)])) + openai_key = "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" + + question = "is there an email i can ask for help" + + docs = vectordb.similarity_search(question,k=5) + + # Check the length of the document + print(docs) + + # Check the content of the first document + pretty_print_docs(docs) + + # Persist the database to use it later + vectordb.persist() + + from langchain.llms import OpenAI + from langchain.retrievers.self_query.base import SelfQueryRetriever + from langchain.chains.query_constructor.base import AttributeInfo + + + metadata_field_info = [ + AttributeInfo( + name="source", + + description="The lecture the chunk is from, should be one of `docs/cs229_lectures/MachineLearning-Lecture01.pdf`, `docs/cs229_lectures/MachineLearning-Lecture02.pdf`, or `docs/cs229_lectures/MachineLearning-Lecture03.pdf`", + type="string", + ), + AttributeInfo( + name="page", + description="The page from the lecture", + type="integer", + ), + ] + document_content_description = "Lecture notes" + llm = OpenAI(temperature=0, openai_api_key=openai_key) + retriever = SelfQueryRetriever.from_llm( + llm, + vectordb, + document_content_description, + metadata_field_info, + verbose=True + ) + question = "what did they say about regression in the third lecture?" + docs = retriever.get_relevant_documents(question) + pretty_print_docs(docs) + + diff --git a/llm_api/app/mydata/chroma.sqlite3 b/llm_api/app/mydata/chroma.sqlite3 new file mode 100644 index 0000000..f09efe6 Binary files /dev/null and b/llm_api/app/mydata/chroma.sqlite3 differ diff --git a/llm_api/app/mydata/e29d41b1-7cb0-442e-81d6-b599157c716a/data_level0.bin b/llm_api/app/mydata/e29d41b1-7cb0-442e-81d6-b599157c716a/data_level0.bin new file mode 100644 index 0000000..fefb82b Binary files /dev/null and b/llm_api/app/mydata/e29d41b1-7cb0-442e-81d6-b599157c716a/data_level0.bin differ diff --git a/llm_api/app/mydata/e29d41b1-7cb0-442e-81d6-b599157c716a/header.bin b/llm_api/app/mydata/e29d41b1-7cb0-442e-81d6-b599157c716a/header.bin new file mode 100644 index 0000000..a2a2df9 Binary files /dev/null and b/llm_api/app/mydata/e29d41b1-7cb0-442e-81d6-b599157c716a/header.bin differ diff --git a/llm_api/app/mydata/e29d41b1-7cb0-442e-81d6-b599157c716a/index_metadata.pickle b/llm_api/app/mydata/e29d41b1-7cb0-442e-81d6-b599157c716a/index_metadata.pickle new file mode 100644 index 0000000..38057e3 Binary files /dev/null and b/llm_api/app/mydata/e29d41b1-7cb0-442e-81d6-b599157c716a/index_metadata.pickle differ diff --git a/llm_api/app/mydata/e29d41b1-7cb0-442e-81d6-b599157c716a/length.bin b/llm_api/app/mydata/e29d41b1-7cb0-442e-81d6-b599157c716a/length.bin new file mode 100644 index 0000000..1dc89f8 Binary files /dev/null and b/llm_api/app/mydata/e29d41b1-7cb0-442e-81d6-b599157c716a/length.bin differ diff --git a/llm_api/app/mydata/e29d41b1-7cb0-442e-81d6-b599157c716a/link_lists.bin b/llm_api/app/mydata/e29d41b1-7cb0-442e-81d6-b599157c716a/link_lists.bin new file mode 100644 index 0000000..55ecdca Binary files /dev/null and b/llm_api/app/mydata/e29d41b1-7cb0-442e-81d6-b599157c716a/link_lists.bin differ diff --git a/llm_api/app/querys/views.py b/llm_api/app/querys/views.py index f8c4a57..42507ea 100644 --- a/llm_api/app/querys/views.py +++ b/llm_api/app/querys/views.py @@ -4,20 +4,58 @@ from neomodel import db -from langchain.llms import OpenAI from langchain.chat_models import ChatOpenAI from django.conf import settings +from langchain.document_loaders import PyPDFLoader +import os + @api_view(['GET']) def get_title_(request): # openai_key = settings.openai_key - openai_key = "" - print(openai_key) + from langchain.llms import OpenAI + openai_key = "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" llm = OpenAI(openai_api_key=openai_key) - # question = request.query_params.get('question') - question = "Can you find the account details for the user with the name 'John Smith'?" + question = request.query_params.get('question') + + # question = "Can you find the account details for the user with the name 'John Smith'?" title = llm.predict(f"can you write a 10 titles for this question: '{question}'") - print(title) - return Response({'title': title}) \ No newline at end of file + return Response({'title': title}) + +@api_view(['GET']) +def answer_promt(request): + openai_key = "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" + + # Load vector database that was persisted earlier and check collection count in it + from langchain.vectorstores import Chroma + from langchain.embeddings.openai import OpenAIEmbeddings + from langchain.chains import RetrievalQA + from langchain.prompts import PromptTemplate + from langchain.chat_models import ChatOpenAI + persist_directory = os.path.join(settings.BASE_DIR, "mydata") + embedding = OpenAIEmbeddings(openai_api_key=openai_key) + vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding) + llm = ChatOpenAI(temperature=0, openai_api_key=openai_key) + + template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that "you don't know, Can you provide us more hints", don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. Always say "Thanks" at the end of the answer. + {context} + Question: {question} + Helpful Answer:""" + QA_CHAIN_PROMPT = PromptTemplate.from_template(template)# Run chain + qa_chain = RetrievalQA.from_chain_type( + llm, + retriever=vectordb.as_retriever(), + return_source_documents=True, + chain_type_kwargs={"prompt": QA_CHAIN_PROMPT} + ) + question = request.query_params.get('question') + result = qa_chain({"query": question}) + # Check the result of the query + # print(result["result"]) + # # Check the source document from where we + # print(result["source_documents"][0]) + + return Response({'results': result}) +