-
Notifications
You must be signed in to change notification settings - Fork 4.5k
/
firestore_kvstore.py
232 lines (200 loc) · 7.62 KB
/
firestore_kvstore.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
from typing import Any, Dict, List, Optional, Tuple
from llama_index.legacy.storage.kvstore.types import (
DEFAULT_BATCH_SIZE,
DEFAULT_COLLECTION,
BaseKVStore,
)
# keyword "_" is reserved in Firestore but referred in llama_index/constants.py.
FIELD_NAME_REPLACE_SET = {"__data__": "data", "__type__": "type"}
FIELD_NAME_REPLACE_GET = {"data": "__data__", "type": "__type__"}
# "/" is not supported in Firestore Collection ID.
SLASH_REPLACEMENT = "_"
IMPORT_ERROR_MSG = (
"`firestore` package not found, please run `pip3 install google-cloud-firestore`"
)
USER_AGENT = "LlamaIndex"
DEFAULT_FIRESTORE_DATABASE = "(default)"
class FirestoreKVStore(BaseKVStore):
"""Firestore Key-Value store.
Args:
project (str): The project which the client acts on behalf of.
database (str): The database name that the client targets.
"""
def __init__(
self,
project: Optional[str] = None,
database: str = DEFAULT_FIRESTORE_DATABASE,
) -> None:
try:
from google.cloud.firestore_v1.async_client import AsyncClient
from google.cloud.firestore_v1.client import Client
from google.cloud.firestore_v1.services.firestore.transports.base import (
DEFAULT_CLIENT_INFO,
)
except ImportError:
raise ImportError(IMPORT_ERROR_MSG)
client_info = DEFAULT_CLIENT_INFO
client_info.user_agent = USER_AGENT
self._adb = AsyncClient(
project=project, database=database, client_info=client_info
)
self._db = Client(project=project, database=database, client_info=client_info)
def firestore_collection(self, collection: str) -> str:
return collection.replace("/", SLASH_REPLACEMENT)
def replace_field_name_set(self, val: Dict[str, Any]) -> Dict[str, Any]:
val = val.copy()
for k, v in FIELD_NAME_REPLACE_SET.items():
if k in val:
val[v] = val[k]
val.pop(k)
return val
def replace_field_name_get(self, val: Dict[str, Any]) -> Dict[str, Any]:
val = val.copy()
for k, v in FIELD_NAME_REPLACE_GET.items():
if k in val:
val[v] = val[k]
val.pop(k)
return val
def put(
self,
key: str,
val: dict,
collection: str = DEFAULT_COLLECTION,
) -> None:
"""Put a key-value pair into the Firestore collection.
Args:
key (str): key
val (dict): value
collection (str): collection name
"""
collection_id = self.firestore_collection(collection)
val = self.replace_field_name_set(val)
doc = self._db.collection(collection_id).document(key)
doc.set(val, merge=True)
async def aput(
self,
key: str,
val: dict,
collection: str = DEFAULT_COLLECTION,
) -> None:
"""Put a key-value pair into the Firestore collection.
Args:
key (str): key
val (dict): value
collection (str): collection name
"""
collection_id = self.firestore_collection(collection)
val = self.replace_field_name_set(val)
doc = self._adb.collection(collection_id).document(key)
await doc.set(val, merge=True)
def put_all(
self,
kv_pairs: List[Tuple[str, dict]],
collection: str = DEFAULT_COLLECTION,
batch_size: int = DEFAULT_BATCH_SIZE,
) -> None:
batch = self._db.batch()
for i, (key, val) in enumerate(kv_pairs, start=1):
collection_id = self.firestore_collection(collection)
val = self.replace_field_name_set(val)
batch.set(self._db.collection(collection_id).document(key), val, merge=True)
if i % batch_size == 0:
batch.commit()
batch = self._db.batch()
batch.commit()
async def aput_all(
self,
kv_pairs: List[Tuple[str, dict]],
collection: str = DEFAULT_COLLECTION,
batch_size: int = DEFAULT_BATCH_SIZE,
) -> None:
"""Put a dictionary of key-value pairs into the Firestore collection.
Args:
kv_pairs (List[Tuple[str, dict]]): key-value pairs
collection (str): collection name
"""
batch = self._adb.batch()
for i, (key, val) in enumerate(kv_pairs, start=1):
collection_id = self.firestore_collection(collection)
doc = self._adb.collection(collection_id).document(key)
val = self.replace_field_name_set(val)
batch.set(doc, val, merge=True)
if i % batch_size == 0:
await batch.commit()
batch = self._adb.batch()
await batch.commit()
def get(self, key: str, collection: str = DEFAULT_COLLECTION) -> Optional[dict]:
"""Get a key-value pair from the Firestore.
Args:
key (str): key
collection (str): collection name
"""
collection_id = self.firestore_collection(collection)
result = self._db.collection(collection_id).document(key).get().to_dict()
if not result:
return None
return self.replace_field_name_get(result)
async def aget(
self, key: str, collection: str = DEFAULT_COLLECTION
) -> Optional[dict]:
"""Get a key-value pair from the Firestore.
Args:
key (str): key
collection (str): collection name
"""
collection_id = self.firestore_collection(collection)
result = (
await self._adb.collection(collection_id).document(key).get()
).to_dict()
if not result:
return None
return self.replace_field_name_get(result)
def get_all(self, collection: str = DEFAULT_COLLECTION) -> Dict[str, dict]:
"""Get all values from the Firestore collection.
Args:
collection (str): collection name
"""
collection_id = self.firestore_collection(collection)
docs = self._db.collection(collection_id).list_documents()
output = {}
for doc in docs:
key = doc.id
val = self.replace_field_name_get(doc.get().to_dict())
output[key] = val
return output
async def aget_all(self, collection: str = DEFAULT_COLLECTION) -> Dict[str, dict]:
"""Get all values from the Firestore collection.
Args:
collection (str): collection name
"""
collection_id = self.firestore_collection(collection)
docs = self._adb.collection(collection_id).list_documents()
output = {}
async for doc in docs:
key = doc.id
data = doc.get().to_dict()
if data is None:
continue
val = self.replace_field_name_get(data)
output[key] = val
return output
def delete(self, key: str, collection: str = DEFAULT_COLLECTION) -> bool:
"""Delete a value from the Firestore.
Args:
key (str): key
collection (str): collection name
"""
collection_id = self.firestore_collection(collection)
doc = self._db.collection(collection_id).document(key)
doc.delete()
return True
async def adelete(self, key: str, collection: str = DEFAULT_COLLECTION) -> bool:
"""Delete a value from the Firestore.
Args:
key (str): key
collection (str): collection name
"""
collection_id = self.firestore_collection(collection)
doc = self._adb.collection(collection_id).document(key)
await doc.delete()
return True