In [29]:
from langchain.document_loaders import TextLoader
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import SQLiteVSS
import time

In [5]:
# load the document and split it into chunks
loader = TextLoader("lovecraft.txt")
documents = loader.load()

# split it into chunks
text_splitter = CharacterTextSplitter(chunk_size=1500, chunk_overlap=0)
docs = text_splitter.split_documents(documents)
texts = [" ".join(doc.page_content.split()) for doc in docs]

Created a chunk of size 2598, which is longer than the specified 1500
Created a chunk of size 2294, which is longer than the specified 1500
Created a chunk of size 1559, which is longer than the specified 1500
Created a chunk of size 2101, which is longer than the specified 1500
Created a chunk of size 2442, which is longer than the specified 1500
Created a chunk of size 1830, which is longer than the specified 1500
Created a chunk of size 1551, which is longer than the specified 1500
Created a chunk of size 1580, which is longer than the specified 1500
Created a chunk of size 1534, which is longer than the specified 1500
Created a chunk of size 1675, which is longer than the specified 1500
Created a chunk of size 2183, which is longer than the specified 1500
Created a chunk of size 1551, which is longer than the specified 1500
Created a chunk of size 1994, which is longer than the specified 1500
Created a chunk of size 2488, which is longer than the specified 1500
Created a chunk of s

In [10]:
embedding_function = SentenceTransformerEmbeddings(model_name="jinaai/jina-embedding-s-en-v1")


# load it in sqlite-vss in a table named state_union.
# the db_file parameter is the name of the file you want
# as your sqlite database.
db = SQLiteVSS.from_texts(
    texts=texts,
    embedding=embedding_function,
    table="mythos",
    db_file="lovecraft.db",
)


In [20]:
res = db.similarity_search("The moon was low over the ancient and timeless waters of foul Karkossa")

In [24]:
" ".join([x.page_content for x in res])

"well as the odd haze we had ourselves perceived around the rampart-crowned peak, might not be caused by the tortuous-channelled rising of some such vapour from the unfathomed regions of earth's core. Out of the South it was that the White Ship used to come when the moon was full and high in the heavens. Out of the South it would glide very smoothly and silently over the sea. And whether the sea was rough or calm, and whether the wind was friendly or adverse, it would always glide smoothly and silently, its sails distant and its long strange tiers of oars moving rhythmically. One night I espied upon the deck a man, bearded and robed, and he seemed to beckon me to embark for fair unknown shores. Many times aften/vard I saw him under the full moon, and ever did he beckon me. Very brightly did the moon shine on the night I answered the call, and I walked out over the waters to the White Ship on a bridge of moonbeams. The man who had beckoned now spoke a welcome to me in a soft language I 

In [25]:
help(db.similarity_search)

Help on method similarity_search in module langchain_community.vectorstores.sqlitevss:

similarity_search(query: 'str', k: 'int' = 4, **kwargs: 'Any') -> 'List[Document]' method of langchain_community.vectorstores.sqlitevss.SQLiteVSS instance
    Return docs most similar to query.



In [36]:
from sqlite_utils import Database
from datetime import datetime

db = Database("history.db")

In [37]:
examples = []
ts = []
for x in range(10): 
    time.sleep(3)
    examples.append("This is a test message" + str(x))
    ts.append(datetime())

TypeError: function missing required argument 'year' (pos 1)

In [40]:
help(datetime.now)

Help on built-in function now:

now(tz=None) method of builtins.type instance
    Returns new datetime object representing current time local to tz.
    
      tz
        Timezone object.
    
    If no tz is specified, uses local timezone.



In [31]:
db["history"].insert_all([{
    "text": text,
    "time_ingested_dt": t,
} for text,t in zip(examples,ts)])

<Table history (text, time_ingested_dt)>

In [44]:
db['history'].insert({'text': 'does this work', 'time_ingested_dt': time.time()})

<Table history (text, time_ingested_dt)>

In [45]:
result = db.query("""
    select text, time_ingested_dt
    from history
    order by time_ingested_dt desc
""")

In [46]:
list(result)

[{'text': 'does this work', 'time_ingested_dt': 1704054947.5767336},
 {'text': 'This is a test message9', 'time_ingested_dt': 1704054483.9196944},
 {'text': 'This is a test message8', 'time_ingested_dt': 1704054480.9166272},
 {'text': 'This is a test message7', 'time_ingested_dt': 1704054477.9159162},
 {'text': 'This is a test message6', 'time_ingested_dt': 1704054474.9128203},
 {'text': 'This is a test message5', 'time_ingested_dt': 1704054471.9117453},
 {'text': 'This is a test message4', 'time_ingested_dt': 1704054468.908634},
 {'text': 'This is a test message3', 'time_ingested_dt': 1704054465.9079719},
 {'text': 'This is a test message2', 'time_ingested_dt': 1704054462.9047947},
 {'text': 'This is a test message1', 'time_ingested_dt': 1704054459.9023907},
 {'text': 'This is a test message0', 'time_ingested_dt': 1704054456.900818}]

In [48]:
type(time.time())

float