Skip to content

Commit

Permalink
langchain[patch]: Migrate document transformers (#21098)
Browse files Browse the repository at this point in the history
Migrate document transformers
  • Loading branch information
eyurtsev committed Apr 30, 2024
1 parent aec13a6 commit 9b6d04a
Show file tree
Hide file tree
Showing 11 changed files with 301 additions and 61 deletions.
57 changes: 39 additions & 18 deletions libs/langchain/langchain/document_transformers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,29 +14,50 @@
Document
""" # noqa: E501
import warnings
from typing import Any
from typing import TYPE_CHECKING, Any

from langchain_core._api import LangChainDeprecationWarning
from langchain._api import create_importer

from langchain.utils.interactive_env import is_interactive_env
if TYPE_CHECKING:
from langchain_community.document_transformers import (
BeautifulSoupTransformer,
DoctranPropertyExtractor,
DoctranQATransformer,
DoctranTextTranslator,
EmbeddingsClusteringFilter,
EmbeddingsRedundantFilter,
GoogleTranslateTransformer,
Html2TextTransformer,
LongContextReorder,
NucliaTextTransformer,
OpenAIMetadataTagger,
get_stateful_documents,
)

# Create a way to dynamically look up deprecated imports.
# Used to consolidate logic for raising deprecation warnings and
# handling optional imports.
DEPRECATED_LOOKUP = {
"BeautifulSoupTransformer": "langchain_community.document_transformers",
"DoctranQATransformer": "langchain_community.document_transformers",
"DoctranTextTranslator": "langchain_community.document_transformers",
"DoctranPropertyExtractor": "langchain_community.document_transformers",
"EmbeddingsClusteringFilter": "langchain_community.document_transformers",
"EmbeddingsRedundantFilter": "langchain_community.document_transformers",
"GoogleTranslateTransformer": "langchain_community.document_transformers",
"get_stateful_documents": "langchain_community.document_transformers",
"LongContextReorder": "langchain_community.document_transformers",
"NucliaTextTransformer": "langchain_community.document_transformers",
"OpenAIMetadataTagger": "langchain_community.document_transformers",
"Html2TextTransformer": "langchain_community.document_transformers",
}

_import_attribute = create_importer(__package__, deprecated_lookups=DEPRECATED_LOOKUP)


def __getattr__(name: str) -> Any:
from langchain_community import document_transformers

# If not in interactive env, raise warning.
if not is_interactive_env():
warnings.warn(
"Importing document transformers from langchain is deprecated. Importing "
"from langchain will no longer be supported as of langchain==0.2.0. "
"Please import from langchain-community instead:\n\n"
f"`from langchain_community.document_transformers import {name}`.\n\n"
"To install langchain-community run `pip install -U langchain-community`.",
category=LangChainDeprecationWarning,
)

return getattr(document_transformers, name)
"""Look up attributes dynamically."""
return _import_attribute(name)


__all__ = [
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,25 @@
from langchain_community.document_transformers.beautiful_soup_transformer import (
BeautifulSoupTransformer,
)
from typing import TYPE_CHECKING, Any

__all__ = ["BeautifulSoupTransformer"]
from langchain._api import create_importer

if TYPE_CHECKING:
from langchain_community.document_transformers import BeautifulSoupTransformer

# Create a way to dynamically look up deprecated imports.
# Used to consolidate logic for raising deprecation warnings and
# handling optional imports.
DEPRECATED_LOOKUP = {
"BeautifulSoupTransformer": "langchain_community.document_transformers"
}

_import_attribute = create_importer(__package__, deprecated_lookups=DEPRECATED_LOOKUP)


def __getattr__(name: str) -> Any:
"""Look up attributes dynamically."""
return _import_attribute(name)


__all__ = [
"BeautifulSoupTransformer",
]
Original file line number Diff line number Diff line change
@@ -1,5 +1,25 @@
from langchain_community.document_transformers.doctran_text_extract import (
DoctranPropertyExtractor,
)
from typing import TYPE_CHECKING, Any

__all__ = ["DoctranPropertyExtractor"]
from langchain._api import create_importer

if TYPE_CHECKING:
from langchain_community.document_transformers import DoctranPropertyExtractor

# Create a way to dynamically look up deprecated imports.
# Used to consolidate logic for raising deprecation warnings and
# handling optional imports.
DEPRECATED_LOOKUP = {
"DoctranPropertyExtractor": "langchain_community.document_transformers"
}

_import_attribute = create_importer(__package__, deprecated_lookups=DEPRECATED_LOOKUP)


def __getattr__(name: str) -> Any:
"""Look up attributes dynamically."""
return _import_attribute(name)


__all__ = [
"DoctranPropertyExtractor",
]
28 changes: 24 additions & 4 deletions libs/langchain/langchain/document_transformers/doctran_text_qa.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,25 @@
from langchain_community.document_transformers.doctran_text_qa import (
DoctranQATransformer,
)
from typing import TYPE_CHECKING, Any

__all__ = ["DoctranQATransformer"]
from langchain._api import create_importer

if TYPE_CHECKING:
from langchain_community.document_transformers import DoctranQATransformer

# Create a way to dynamically look up deprecated imports.
# Used to consolidate logic for raising deprecation warnings and
# handling optional imports.
DEPRECATED_LOOKUP = {
"DoctranQATransformer": "langchain_community.document_transformers"
}

_import_attribute = create_importer(__package__, deprecated_lookups=DEPRECATED_LOOKUP)


def __getattr__(name: str) -> Any:
"""Look up attributes dynamically."""
return _import_attribute(name)


__all__ = [
"DoctranQATransformer",
]
Original file line number Diff line number Diff line change
@@ -1,5 +1,25 @@
from langchain_community.document_transformers.doctran_text_translate import (
DoctranTextTranslator,
)
from typing import TYPE_CHECKING, Any

__all__ = ["DoctranTextTranslator"]
from langchain._api import create_importer

if TYPE_CHECKING:
from langchain_community.document_transformers import DoctranTextTranslator

# Create a way to dynamically look up deprecated imports.
# Used to consolidate logic for raising deprecation warnings and
# handling optional imports.
DEPRECATED_LOOKUP = {
"DoctranTextTranslator": "langchain_community.document_transformers"
}

_import_attribute = create_importer(__package__, deprecated_lookups=DEPRECATED_LOOKUP)


def __getattr__(name: str) -> Any:
"""Look up attributes dynamically."""
return _import_attribute(name)


__all__ = [
"DoctranTextTranslator",
]
Original file line number Diff line number Diff line change
@@ -1,11 +1,44 @@
from langchain_community.document_transformers.embeddings_redundant_filter import (
EmbeddingsClusteringFilter,
EmbeddingsRedundantFilter,
_DocumentWithState,
_filter_similar_embeddings,
_get_embeddings_from_stateful_docs,
get_stateful_documents,
)
from typing import TYPE_CHECKING, Any

from langchain._api import create_importer

if TYPE_CHECKING:
from langchain_community.document_transformers import (
EmbeddingsClusteringFilter,
EmbeddingsRedundantFilter,
get_stateful_documents,
)
from langchain_community.document_transformers.embeddings_redundant_filter import (
_DocumentWithState,
_filter_similar_embeddings,
_get_embeddings_from_stateful_docs,
)

# Create a way to dynamically look up deprecated imports.
# Used to consolidate logic for raising deprecation warnings and
# handling optional imports.
DEPRECATED_LOOKUP = {
"EmbeddingsRedundantFilter": "langchain_community.document_transformers",
"EmbeddingsClusteringFilter": "langchain_community.document_transformers",
"_DocumentWithState": (
"langchain_community.document_transformers.embeddings_redundant_filter"
),
"get_stateful_documents": "langchain_community.document_transformers",
"_get_embeddings_from_stateful_docs": (
"langchain_community.document_transformers.embeddings_redundant_filter"
),
"_filter_similar_embeddings": (
"langchain_community.document_transformers.embeddings_redundant_filter"
),
}

_import_attribute = create_importer(__package__, deprecated_lookups=DEPRECATED_LOOKUP)


def __getattr__(name: str) -> Any:
"""Look up attributes dynamically."""
return _import_attribute(name)


__all__ = [
"EmbeddingsRedundantFilter",
Expand Down
28 changes: 24 additions & 4 deletions libs/langchain/langchain/document_transformers/google_translate.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,25 @@
from langchain_community.document_transformers.google_translate import (
GoogleTranslateTransformer,
)
from typing import TYPE_CHECKING, Any

__all__ = ["GoogleTranslateTransformer"]
from langchain._api import create_importer

if TYPE_CHECKING:
from langchain_community.document_transformers import GoogleTranslateTransformer

# Create a way to dynamically look up deprecated imports.
# Used to consolidate logic for raising deprecation warnings and
# handling optional imports.
DEPRECATED_LOOKUP = {
"GoogleTranslateTransformer": "langchain_community.document_transformers"
}

_import_attribute = create_importer(__package__, deprecated_lookups=DEPRECATED_LOOKUP)


def __getattr__(name: str) -> Any:
"""Look up attributes dynamically."""
return _import_attribute(name)


__all__ = [
"GoogleTranslateTransformer",
]
26 changes: 24 additions & 2 deletions libs/langchain/langchain/document_transformers/html2text.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,25 @@
from langchain_community.document_transformers.html2text import Html2TextTransformer
from typing import TYPE_CHECKING, Any

__all__ = ["Html2TextTransformer"]
from langchain._api import create_importer

if TYPE_CHECKING:
from langchain_community.document_transformers import Html2TextTransformer

# Create a way to dynamically look up deprecated imports.
# Used to consolidate logic for raising deprecation warnings and
# handling optional imports.
DEPRECATED_LOOKUP = {
"Html2TextTransformer": "langchain_community.document_transformers"
}

_import_attribute = create_importer(__package__, deprecated_lookups=DEPRECATED_LOOKUP)


def __getattr__(name: str) -> Any:
"""Look up attributes dynamically."""
return _import_attribute(name)


__all__ = [
"Html2TextTransformer",
]
Original file line number Diff line number Diff line change
@@ -1,5 +1,23 @@
from langchain_community.document_transformers.long_context_reorder import (
LongContextReorder,
)
from typing import TYPE_CHECKING, Any

__all__ = ["LongContextReorder"]
from langchain._api import create_importer

if TYPE_CHECKING:
from langchain_community.document_transformers import LongContextReorder

# Create a way to dynamically look up deprecated imports.
# Used to consolidate logic for raising deprecation warnings and
# handling optional imports.
DEPRECATED_LOOKUP = {"LongContextReorder": "langchain_community.document_transformers"}

_import_attribute = create_importer(__package__, deprecated_lookups=DEPRECATED_LOOKUP)


def __getattr__(name: str) -> Any:
"""Look up attributes dynamically."""
return _import_attribute(name)


__all__ = [
"LongContextReorder",
]
Original file line number Diff line number Diff line change
@@ -1,5 +1,25 @@
from langchain_community.document_transformers.nuclia_text_transform import (
NucliaTextTransformer,
)
from typing import TYPE_CHECKING, Any

__all__ = ["NucliaTextTransformer"]
from langchain._api import create_importer

if TYPE_CHECKING:
from langchain_community.document_transformers import NucliaTextTransformer

# Create a way to dynamically look up deprecated imports.
# Used to consolidate logic for raising deprecation warnings and
# handling optional imports.
DEPRECATED_LOOKUP = {
"NucliaTextTransformer": "langchain_community.document_transformers"
}

_import_attribute = create_importer(__package__, deprecated_lookups=DEPRECATED_LOOKUP)


def __getattr__(name: str) -> Any:
"""Look up attributes dynamically."""
return _import_attribute(name)


__all__ = [
"NucliaTextTransformer",
]
36 changes: 31 additions & 5 deletions libs/langchain/langchain/document_transformers/openai_functions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,32 @@
from langchain_community.document_transformers.openai_functions import (
OpenAIMetadataTagger,
create_metadata_tagger,
)
from typing import TYPE_CHECKING, Any

__all__ = ["OpenAIMetadataTagger", "create_metadata_tagger"]
from langchain._api import create_importer

if TYPE_CHECKING:
from langchain_community.document_transformers import OpenAIMetadataTagger
from langchain_community.document_transformers.openai_functions import (
create_metadata_tagger,
)

# Create a way to dynamically look up deprecated imports.
# Used to consolidate logic for raising deprecation warnings and
# handling optional imports.
DEPRECATED_LOOKUP = {
"OpenAIMetadataTagger": "langchain_community.document_transformers",
"create_metadata_tagger": (
"langchain_community.document_transformers.openai_functions"
),
}

_import_attribute = create_importer(__package__, deprecated_lookups=DEPRECATED_LOOKUP)


def __getattr__(name: str) -> Any:
"""Look up attributes dynamically."""
return _import_attribute(name)


__all__ = [
"OpenAIMetadataTagger",
"create_metadata_tagger",
]

0 comments on commit 9b6d04a

Please sign in to comment.