In [2]:
from git import Repo
from langchain.text_splitter import TokenTextSplitter
from langchain.document_loaders.generic import GenericLoader
from langchain.document_loaders.parsers import LanguageParser
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain

In [3]:
!mkdir test_repo

In [4]:
repo_path = "test_repo/"
repo = Repo.clone_from("https://github.com/mkhnoori/complete-medical-chatbot.git", to_path=repo_path)

In [5]:
loader = GenericLoader.from_filesystem(repo_path,
glob = "**/*",
suffixes = [".py"],
parser = LanguageParser(language = "python", parser_threshold=500)
)

In [6]:
documents = loader.load()

In [7]:
documents

[Document(page_content="from setuptools import setup, find_packages\n\nsetup(\n    name = 'Generative AI Project',\n    version = '0.0.0',\n    author = 'Mohammad Khnoori',\n    author_email = 'mkh_noori@yahoo.com',\n    packages = find_packages(),\n    install_requires = []\n)", metadata={'source': 'test_repo/setup.py', 'language': 'python'}),
 Document(page_content='from flask import Flask, render_template, jsonify, request\nfrom src.helper import download_hugging_face_embeddings\nfrom langchain_pinecone import PineconeVectorStore\nfrom pinecone import Pinecone\nfrom langchain_community.llms import CTransformers \nfrom langchain.prompts import PromptTemplate\nfrom langchain.chains import RetrievalQA\nfrom dotenv import load_dotenv\nfrom src.prompt import *\nimport os\n\napp = Flask(__name__)\n\nload_dotenv()\n\nPINECONE_API_KEY = os.environ.get(\'PINECONE_API_KEY\')\nPINECONE_ENVIRONMENT = os.environ.get(\'PINECONE_ENVIRONMENT\')\nOPENAI_API_KEY = os.environ.get(\'OPENAI_API_KEY\')\n

In [8]:
len(documents)

7

In [9]:
documents[0]

Document(page_content="from setuptools import setup, find_packages\n\nsetup(\n    name = 'Generative AI Project',\n    version = '0.0.0',\n    author = 'Mohammad Khnoori',\n    author_email = 'mkh_noori@yahoo.com',\n    packages = find_packages(),\n    install_requires = []\n)", metadata={'source': 'test_repo/setup.py', 'language': 'python'})

In [10]:
documents_splitter = RecursiveCharacterTextSplitter.from_language(language= "python",
chunk_size = 500,
chunk_overlap = 20,
length_function = len,
add_start_index = True,
)

In [11]:
texts = documents_splitter.split_documents(documents)

In [12]:
texts

[Document(page_content="from setuptools import setup, find_packages\n\nsetup(\n    name = 'Generative AI Project',\n    version = '0.0.0',\n    author = 'Mohammad Khnoori',\n    author_email = 'mkh_noori@yahoo.com',\n    packages = find_packages(),\n    install_requires = []\n)", metadata={'source': 'test_repo/setup.py', 'language': 'python', 'start_index': 0}),
 Document(page_content='from flask import Flask, render_template, jsonify, request\nfrom src.helper import download_hugging_face_embeddings\nfrom langchain_pinecone import PineconeVectorStore\nfrom pinecone import Pinecone\nfrom langchain_community.llms import CTransformers \nfrom langchain.prompts import PromptTemplate\nfrom langchain.chains import RetrievalQA\nfrom dotenv import load_dotenv\nfrom src.prompt import *\nimport os\n\napp = Flask(__name__)\n\nload_dotenv()', metadata={'source': 'test_repo/app.py', 'language': 'python', 'start_index': 0}),
 Document(page_content='load_dotenv()\n\nPINECONE_API_KEY = os.environ.get(\

In [13]:
len(texts)

14

In [14]:
from dotenv import load_dotenv
import os
load_dotenv()
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")

In [15]:
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

In [16]:
embeddings=OpenAIEmbeddings(disallowed_special=())

In [18]:
vectordb = Chroma.from_documents(texts, embedding=embeddings, persist_directory='./db')

In [19]:
vectordb.persist()

In [20]:
#llm = ChatOpenAI(model_name="gpt-4")
llm = ChatOpenAI()

In [22]:
from langchain.memory import ConversationSummaryMemory

In [23]:
memory = ConversationSummaryMemory(llm=llm, memory_key = "chat_history", return_message=True)

In [24]:
from re import search


qa = ConversationalRetrievalChain.from_llm(llm, retriever=vectordb.as_retriever(search_type="mmr", search_kwargs={"k":8}), memory=memory)

In [25]:
question = "what is download_hugging_face_embeddings function?"

In [26]:
from unittest import result


result = qa(question)
print(result['answer'])

Number of requested results 20 is greater than number of elements in index 14, updating n_results = 14


The `download_hugging_face_embeddings` function is a function that is used to download embeddings (representations of text data in a numerical format) from the Hugging Face model hub. In this specific case, the function is downloading embeddings from the "sentence-transformers/all-MiniLM-L6-v2" repository for the task of feature extraction. These embeddings are then used for tasks such as text similarity, clustering, and other natural language processing applications within the codebase where this function is defined.
