Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

core[minor]: Adds an in-memory implementation of RecordManager #13200

Merged
merged 41 commits into from
Jun 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
249d8a1
Adds an in-memory implementation of RecordStore
pprados Nov 10, 2023
0d5f09f
Fix spell
pprados Nov 10, 2023
3d9e062
Update to last langchain version
pprados Nov 27, 2023
f667410
Fix a race condition in SQLAlchemyMd5Cache
pprados Jan 23, 2024
ec53014
Fix a race condition in SQLAlchemyMd5Cache
pprados Jan 23, 2024
cb37574
Add MemoryRecordManager
pprados Jan 23, 2024
6fe4072
Fix TU
pprados Jan 24, 2024
37099c8
Add __init__.py
pprados Jan 24, 2024
8c036b5
Merge remote-tracking branch 'upstream/master' into pprados/memory_re…
pprados May 6, 2024
2dd7366
Adds an in-memory implementation of RecordStore
pprados Nov 10, 2023
c83cb73
Fix spell
pprados Nov 10, 2023
4dff80b
Update to last langchain version
pprados Nov 27, 2023
8f4b258
Fix a race condition in SQLAlchemyMd5Cache
pprados Jan 23, 2024
a0bcc8b
Add MemoryRecordManager
pprados Jan 23, 2024
2cc8a3f
Fix TU
pprados Jan 24, 2024
2ee0721
Add __init__.py
pprados Jan 24, 2024
6670807
Merge remote-tracking branch 'upstream/master' into pprados/memory_re…
pprados Jun 10, 2024
a3a8a75
Merge remote-tracking branch 'upstream/master' into pprados/memory_re…
pprados Jun 10, 2024
04b55d0
Add the TU, like test_sql_record_manager
pprados Jun 10, 2024
e19d627
Merge remote-tracking branch 'origin/pprados/memory_recordmanager' in…
pprados Jun 10, 2024
17b370f
Fix lint and __init__
pprados Jun 10, 2024
074129d
Merge branch 'master' into pprados/memory_recordmanager
pprados Jun 10, 2024
3d79bbf
Merge branch 'master' into pprados/memory_recordmanager
pprados Jun 11, 2024
1850348
Merge branch 'master' into pprados/memory_recordmanager
pprados Jun 12, 2024
1e4bde6
Merge branch 'master' into pprados/memory_recordmanager
pprados Jun 12, 2024
5402b31
Merge branch 'master' into pprados/memory_recordmanager
pprados Jun 12, 2024
8205777
migrate to langchain-core
pprados Jun 13, 2024
4cf163b
Merge branch 'master' into pprados/memory_recordmanager
pprados Jun 13, 2024
0a41079
Fix
pprados Jun 13, 2024
67adcbb
Fix test_public_api.py
pprados Jun 13, 2024
de36e11
Fix long_context_reorder.py async
pprados Jun 13, 2024
90bb3d7
Fix
pprados Jun 13, 2024
c12c666
Merge branch 'master' into pprados/memory_recordmanager
pprados Jun 13, 2024
d97dcc1
Merge branch 'master' into pprados/memory_recordmanager
pprados Jun 13, 2024
bd24d2a
Merge remote-tracking branch 'upstream/master' into pprados/memory_re…
pprados Jun 14, 2024
1bf3821
Merge branch 'master' into pprados/memory_recordmanager
pprados Jun 14, 2024
1ce6aed
Merge branch 'master' into pprados/memory_recordmanager
pprados Jun 14, 2024
c62bb8c
qxqx
eyurtsev Jun 14, 2024
ca79b22
x
eyurtsev Jun 14, 2024
bfc83ea
Merge branch 'master' into pprados/memory_recordmanager
pprados Jun 18, 2024
f841067
Merge branch 'master' into pprados/memory_recordmanager
pprados Jun 19, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion libs/core/langchain_core/indexing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@
if it's unchanged.
"""
from langchain_core.indexing.api import IndexingResult, aindex, index
from langchain_core.indexing.base import RecordManager
from langchain_core.indexing.base import InMemoryRecordManager, RecordManager

__all__ = [
"aindex",
"index",
"IndexingResult",
"InMemoryRecordManager",
"RecordManager",
]
104 changes: 103 additions & 1 deletion libs/core/langchain_core/indexing/base.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from __future__ import annotations

import time
from abc import ABC, abstractmethod
from typing import List, Optional, Sequence
from typing import Dict, List, Optional, Sequence, TypedDict


class RecordManager(ABC):
Expand Down Expand Up @@ -183,3 +184,104 @@ async def adelete_keys(self, keys: Sequence[str]) -> None:
Args:
keys: A list of keys to delete.
"""


class _Record(TypedDict):
group_id: Optional[str]
updated_at: float


class InMemoryRecordManager(RecordManager):
"""An in-memory record manager for testing purposes."""

def __init__(self, namespace: str) -> None:
super().__init__(namespace)
# Each key points to a dictionary
# of {'group_id': group_id, 'updated_at': timestamp}
self.records: Dict[str, _Record] = {}
self.namespace = namespace

def create_schema(self) -> None:
"""In-memory schema creation is simply ensuring the structure is initialized."""

async def acreate_schema(self) -> None:
"""In-memory schema creation is simply ensuring the structure is initialized."""

def get_time(self) -> float:
"""Get the current server time as a high resolution timestamp!"""
return time.time()

async def aget_time(self) -> float:
"""Get the current server time as a high resolution timestamp!"""
return self.get_time()

def update(
self,
keys: Sequence[str],
*,
group_ids: Optional[Sequence[Optional[str]]] = None,
time_at_least: Optional[float] = None,
) -> None:
if group_ids and len(keys) != len(group_ids):
raise ValueError("Length of keys must match length of group_ids")
for index, key in enumerate(keys):
group_id = group_ids[index] if group_ids else None
if time_at_least and time_at_least > self.get_time():
raise ValueError("time_at_least must be in the past")
self.records[key] = {"group_id": group_id, "updated_at": self.get_time()}

async def aupdate(
self,
keys: Sequence[str],
*,
group_ids: Optional[Sequence[Optional[str]]] = None,
time_at_least: Optional[float] = None,
) -> None:
self.update(keys, group_ids=group_ids, time_at_least=time_at_least)

def exists(self, keys: Sequence[str]) -> List[bool]:
return [key in self.records for key in keys]

async def aexists(self, keys: Sequence[str]) -> List[bool]:
return self.exists(keys)

def list_keys(
self,
*,
before: Optional[float] = None,
after: Optional[float] = None,
group_ids: Optional[Sequence[str]] = None,
limit: Optional[int] = None,
) -> List[str]:
result = []
for key, data in self.records.items():
if before and data["updated_at"] >= before:
continue
if after and data["updated_at"] <= after:
continue
if group_ids and data["group_id"] not in group_ids:
continue
result.append(key)
if limit:
return result[:limit]
return result

async def alist_keys(
self,
*,
before: Optional[float] = None,
after: Optional[float] = None,
group_ids: Optional[Sequence[str]] = None,
limit: Optional[int] = None,
) -> List[str]:
return self.list_keys(
before=before, after=after, group_ids=group_ids, limit=limit
)

def delete_keys(self, keys: Sequence[str]) -> None:
for key in keys:
if key in self.records:
del self.records[key]

async def adelete_keys(self, keys: Sequence[str]) -> None:
self.delete_keys(keys)
105 changes: 0 additions & 105 deletions libs/core/tests/unit_tests/indexing/in_memory.py

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pytest
import pytest_asyncio

from tests.unit_tests.indexing.in_memory import InMemoryRecordManager
from langchain_core.indexing import InMemoryRecordManager


@pytest.fixture()
Expand Down
3 changes: 1 addition & 2 deletions libs/core/tests/unit_tests/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,9 @@
from langchain_core.document_loaders.base import BaseLoader
from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings
from langchain_core.indexing import aindex, index
from langchain_core.indexing import InMemoryRecordManager, aindex, index
from langchain_core.indexing.api import _abatch, _HashedDocument
from langchain_core.vectorstores import VST, VectorStore
from tests.unit_tests.indexing.in_memory import InMemoryRecordManager


class ToyLoader(BaseLoader):
Expand Down
1 change: 1 addition & 0 deletions libs/core/tests/unit_tests/indexing/test_public_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,6 @@ def test_all() -> None:
"aindex",
"index",
"IndexingResult",
"InMemoryRecordManager",
"RecordManager",
]
Loading