Skip to content

Commit

Permalink
Partial integration in multiple pull-request for langchain:
Browse files Browse the repository at this point in the history
- [Add a Wrapper vectorstore, compatible with SelfQueryRetriever](langchain-ai/langchain#13190)
- [Adds an in-memory implementation of RecordStore](langchain-ai/langchain#13200)
- [Add SQLDocStore](langchain-ai/langchain#13181)
  • Loading branch information
pprados committed Nov 10, 2023
1 parent eb1e6ec commit 8a5934b
Show file tree
Hide file tree
Showing 41 changed files with 1,769 additions and 1,217 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -160,3 +160,4 @@ cython_debug/
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

.ruff_cache
8 changes: 8 additions & 0 deletions .idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions .idea/aws.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

291 changes: 291 additions & 0 deletions .idea/inspectionProfiles/Project_Default.xml

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions .idea/inspectionProfiles/profiles_settings.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions .idea/jupyter-settings.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 10 additions & 0 deletions .idea/langchain-rag.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions .idea/other.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

75 changes: 53 additions & 22 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -134,33 +134,64 @@ else

endif


sync:
cp -rf ../langchain/libs/experimental/langchain_experimental/chains/qa_with_references/ \
LANGCHAIN_HOME=../langchain
TARGET:=langchain
SRC_PACKAGE=langchain_rag
DST_PACKAGE=langchain
SRC_MODULE:=langchain-rag
DST_MODULE:=langchain

define _push_sync
@$(eval TARGET=$(TARGET))
@$(eval SRC_PACKAGE=$(SRC_PACKAGE))
@$(eval DST_PACKAGE=$(DST_PACKAGE))
@$(eval WORK_DIR=$(shell mktemp -d --suffix ".rsync"))
@mkdir -p "${WORK_DIR}/libs/${TARGET}"
@mkdir -p "${WORK_DIR}/docs/docs"
@echo Copy and patch $(SRC_PACKAGE) to $(DST_PACKAGE) in $(LANGCHAIN_HOME)
@( \
cd $(SRC_PACKAGE)/ ; \
rsync -a \
--exclude ".*" \
--exclude __pycache__ \
--exclude __init__.py \
. "${WORK_DIR}/libs/${TARGET}/$(DST_PACKAGE)" ; \
)
@( \
cd tests/ ; \
rsync -a \
--exclude ".*" \
--exclude __pycache__ \
--exclude __init__.py \
. "${WORK_DIR}/libs/${TARGET}/tests" ; \
)
@( \
cd docs/ ; \
rsync -a \
--exclude ".*" \
. "${WORK_DIR}/docs/docs" ; \
)
@find '${WORK_DIR}' -type f -a \
-exec sed -i "s/${SRC_PACKAGE}/${DST_PACKAGE}/g" {} ';' \
-exec sed -i "s/pip install -q '$(SRC_MODULE)'/pip install -q '$(DST_MODULE)'/g" {} ';'
@cp -R "${WORK_DIR}/libs" "${WORK_DIR}/docs" $(LANGCHAIN_HOME)/
@rm -Rf '${WORK_DIR}'
endef

push-sync:
$(call _push_sync)

pull-sync:
cp -rf $(TARGET)/langchain_experimental/chains/qa_with_references/ \
langchain_qa_with_references/chains/
cp -f ../langchain/libs/experimental/langchain_experimental/chains/__init__.py \
cp -f $(TARGET)/langchain_experimental/chains/__init__.py \
langchain_qa_with_references/chains/
cp -rf ../langchain/libs/experimental/langchain_experimental/chains/qa_with_references_and_verbatims/ \
cp -rf $(TARGET)/langchain_experimental/chains/qa_with_references_and_verbatims/ \
langchain_qa_with_references/chains/
cp -rf ../langchain/libs/experimental/tests/unit_tests/chains/ \
cp -rf $(TARGET)/tests/unit_tests/chains/ \
tests/unit_tests/
cp ../langchain/libs/experimental/docs/qa_with_reference*.ipynb .
cp $(TARGET)/docs/qa_with_reference*.ipynb .
find . -type f \( -name '*.py' -or -name '*.ipynb' \) | xargs sed -i 's/langchain_experimental/langchain_qa_with_references/g'
find . -type f -name '*.ipynb' | xargs sed -i 's/langchain\([_-]\)experimental/langchain\1qa_with_references/g'

reverse-sync:
( cd langchain_qa_with_references/chains/ ; \
find . -type f -not -iname '__init__.py' -exec cp '{}' '../../../langchain/libs/experimental/langchain_experimental/chains/{}' ';' \
)

( cd tests/unit_tests/chains ; \
find . -type f -not -iname '__init__.py' -exec cp '{}' '../../../../langchain/libs/experimental/tests/unit_tests/chains/{}' ';' \
)
( cd tests/integration_tests/chains ; \
find . -type f -not -iname '__init__.py' -name '*_qa_with_*' -exec cp '{}' '../../../../langchain/libs/experimental/tests/integration_tests/chains/{}' ';' \
)

cp *.ipynb ../langchain/libs/experimental/docs/qa_with_reference*.ipynb
find ../langchain/libs/experimental -type f \( -name '*.py' -or -name '*.ipynb' \) -exec sed -i 's/langchain_qa_with_references/langchain_experimental/g' '{}' ';'
find ../langchain/libs/experimental -type f \( -name '*.py' -or -name '*.ipynb' \) -exec sed -i 's/langchain_experimental\.pydantic_v1/langchain\.pydantic_v1/g' '{}' ';'
find ../langchain/libs/experimental/docs -type f -name '*.ipynb' -exec sed -i 's/langchain\([_-]\)qa_with_references/langchain\1experimental/g' '{}' ';'
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ The challenge lies in correctly managing the lifecycle of the three levels of do
The `RAGVectorStore`, in combination with other components, is designed to address this challenge.

# Demo
Read [this notebook](rag_vectorstore.ipynb)
Read [this notebook](docs/vectorstores/rag_vectorstore.ipynb)
Or :
- `poetry run python -m ipykernel install --user --name langchain-parent`
- `poetry run python -m ipykernel install --user --name langchain-rag`
- `jupyter lab`

# Tips
Expand Down
13 changes: 6 additions & 7 deletions demo.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
# ruff: noqa
import logging
import os
import pathlib
import shutil
import tempfile
from typing import *
from typing import Any, Dict, List, Optional, Sequence, Union, cast

from dotenv import load_dotenv
from langchain.chains.query_constructor.schema import AttributeInfo
Expand Down Expand Up @@ -238,7 +239,8 @@ def print_doc(d: Document, i: int) -> str:
# chunk_transformer=chunk_transformer,
# search_kwargs={"k": 10}
# )
# engine = sqlalchemy.engine.create_engine(url=f"sqlite:///{ROOT_PATH}/record_manager.db")
# engine = sqlalchemy.engine.create_engine(
# url=f"sqlite:///{ROOT_PATH}/record_manager.db")
# index_kwargs = {
# "record_manager": SQLRecordManager(
# namespace="record_manager_cache",
Expand All @@ -252,7 +254,8 @@ def print_doc(d: Document, i: int) -> str:
# %% Import documents

documents = WikipediaRetriever(
top_k_results=nb_documents_to_import
top_k_results=nb_documents_to_import,
wiki_client=None,
).get_relevant_documents("mathematic")

index(docs_source=documents, cleanup="incremental", **index_kwargs)
Expand Down Expand Up @@ -288,10 +291,6 @@ def print_doc(d: Document, i: int) -> str:
base_compressor=DocumentCompressorPipeline(
transformers=[
EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.7),
CohereRerank(
top_n=top_k,
user_agent="langchain", # FIXME: bug avec version langchain
),
LongContextReorder(),
]
),
Expand Down
Loading

0 comments on commit 8a5934b

Please sign in to comment.