In [1]:
import pandas as pd

In [2]:
df = pd.DataFrame(
    {"id": [1, 2, 3, 4], "name": ["Alice", "Bob", "Charlie", "Alice 2"], "age": [25, 30, 35, 25]}
)

In [3]:
import sqlalchemy

db = sqlalchemy.create_engine("sqlite:///memory")

In [4]:
df.to_sql("test", db, if_exists="replace", index=False)

4

In [5]:
df2 = pd.read_sql_table("test", db)

In [6]:
df2.equals(df)

True

In [7]:
from sqlalchemy import MetaData

metadata = MetaData()

In [8]:
metadata.reflect(db)
tables = metadata.tables

In [9]:
df

Unnamed: 0,id,name,age
0,1,Alice,25
1,2,Bob,30
2,3,Charlie,35
3,4,Alice 2,25


In [10]:
df[df['id'].isin([1, 2])].to_dict(orient='records')

[{'id': 1, 'name': 'Alice', 'age': 25}, {'id': 2, 'name': 'Bob', 'age': 30}]

In [11]:
len(df)

4

In [12]:
def make_gen():
    for i in range(10):
        yield i
    print('done')
    return

In [13]:
g = make_gen()

In [14]:
g

<generator object make_gen at 0x000001DE031D4AD0>

In [15]:
for i in g:
    print(f'{i=}')

i=0
i=1
i=2
i=3
i=4
i=5
i=6
i=7
i=8
i=9
done


In [16]:
def twowaygen():
    for i in range(10):
        even = yield i 
        print(f'gen:: {even=}')
    
g = twowaygen()
n = next(g)

try:
    while True:
        print(n)
        even = n % 2 == 0
        n = g.send(even)
except StopIteration:
    print('done')
    

0
gen:: even=True
1
gen:: even=False
2
gen:: even=True
3
gen:: even=False
4
gen:: even=True
5
gen:: even=False
6
gen:: even=True
7
gen:: even=False
8
gen:: even=True
9
gen:: even=False
done


In [17]:
import tempfile

from simple_rag.knowledge_base.store.db_engine import DBEngine
from simple_rag.knowledge_base.store.default_store import Store


_, db_fname = tempfile.mkstemp()
db_link = f"sqlite:///{db_fname}"
tbl_name = "sample_kbase"

df = pd.read_csv('assets\support_kbase.csv')

engine = DBEngine({
    'db_link': db_link,
    'model_name': tbl_name
})
assert engine.is_configured

version, ids = engine.store_dataframe(df)
assert version == 1, "Incorrect version after saving DataFrame"
assert len(ids) == len(df), "Length of IDs doesn't match DataFrame size"

store = Store(
    db_cfg={
        "db_link": db_link,
        "model_name": tbl_name,
    },
    vectorstore_cfg={
        "type": "chroma",
        "collection_name": "support_knowledge_base",
        "persist_directory": tempfile.mkdtemp(),
    },
)

store.check_and_vectorize_unprocessed()

  from .autonotebook import tqdm as notebook_tqdm
[32m2025-04-18 15:27:54.866[0m | [1mINFO    [0m | [36msimple_rag.knowledge_base.store.db_engine[0m:[36mstore_dataframe[0m:[36m112[0m - [1mTable 'sample_kbase' created in relational DB[0m
[32m2025-04-18 15:27:54.889[0m | [34m[1mDEBUG   [0m | [36msimple_rag.knowledge_base.store.db_engine[0m:[36mstore_dataframe[0m:[36m137[0m - [34m[1mDataFrame saved to DB with version 1[0m
[32m2025-04-18 15:27:54.890[0m | [34m[1mDEBUG   [0m | [36msimple_rag.knowledge_base.store.default_store[0m:[36mbuild_db_manager[0m:[36m49[0m - [34m[1mCreate default DBEngine()[0m
[32m2025-04-18 15:27:55.237[0m | [34m[1mDEBUG   [0m | [36msimple_rag.knowledge_base.store.db_engine[0m:[36mload_dataframe[0m:[36m77[0m - [34m[1mmax_version: 1[0m
[32m2025-04-18 15:27:55.242[0m | [34m[1mDEBUG   [0m | [36msimple_rag.knowledge_base.store.db_engine[0m:[36mprocess_unvectorized_rows[0m:[36m183[0m - [34m[1mprocessing ro

In [18]:
data = store.vectorStore.get()

In [19]:
all_docs = data['documents']
ids = data['ids']
metadatas = data['metadatas']

In [20]:
store.vectorStore.get_by_ids([ids[0]])

[Document(id='2433386e-1541-4256-b640-8d6a48acf0b7', metadata={'_db_id': 1, '_version': 1}, page_content='Question: What is IaaS?\nDescription: Definition of Infrastructure as a Service\nSolution: IaaS is a cloud computing model where providers offer virtualized computing resources over the internet. It includes servers, storage, networking, and more.')]

In [23]:
s = set(d['_version'] for d in data['metadatas'])

In [24]:
len(s)

1