In [21]:
from openai import OpenAI
import os
from dotenv import find_dotenv, load_dotenv
import json
import requests
import helpers
import config
from qdrant_client import QdrantClient
import uuid

In [5]:
qdrant_client = QdrantClient(url="http://localhost:6333")
openai_client = OpenAI()

In [17]:
COLLECTION_NAME = 'ai_devs'
qdrant_client.create_collection(COLLECTION_NAME, vectors_config={"size" : 1536,
                                                                 "distance" : "Cosine",
                                                                 "on_disk" : True})

True

In [19]:
collection_info = qdrant_client.get_collection(COLLECTION_NAME)
collection_info

CollectionInfo(status=<CollectionStatus.GREEN: 'green'>, optimizer_status=<OptimizersStatusOneOf.OK: 'ok'>, vectors_count=0, indexed_vectors_count=0, points_count=0, segments_count=8, config=CollectionConfig(params=CollectionParams(vectors=VectorParams(size=1536, distance=<Distance.COSINE: 'Cosine'>, hnsw_config=None, quantization_config=None, on_disk=True), shard_number=1, sharding_method=None, replication_factor=1, write_consistency_factor=1, read_fan_out_factor=None, on_disk_payload=True, sparse_vectors=None), hnsw_config=HnswConfig(m=16, ef_construct=100, full_scan_threshold=10000, max_indexing_threads=0, on_disk=False, payload_m=None), optimizer_config=OptimizersConfig(deleted_threshold=0.2, vacuum_min_vector_number=1000, default_segment_number=0, max_segment_size=None, memmap_threshold=None, indexing_threshold=20000, flush_interval_sec=5, max_optimization_threads=None), wal_config=WalConfig(wal_capacity_mb=32, wal_segments_ahead=0), quantization_config=None), payload_schema={})

In [9]:
with open("memory.md", "r") as f:
    memory = f.read()
memory

'Adam has various skills but describes himself as "just curious."\n\nAdam have a dog named Alexa.\n\nAdam lives in Krakow with his fiancĂ©e and dog.\n\nAdam is involved in a couple of projects like eduweb.pl, ahoy.so, easy.tools, overment.com, heyalice.app, automation.house, and more.\n\nAdam knows JavaScript and Python very well. He\'s full-stack engineer.\n\nAdam loves music. He listens to Spotify all the time.\n\nAdam\'s nickname is \'overment\'.\n\nAdam has a youtube channel named \'overment\'.\n\nAdam is a big fan of Apple products.\n\nAdam is a big fan of Tesla cars.'

In [23]:
docs = memory.split("\n")
docs = [x for x in docs if x != ""] # remove empty lines
docs

['Adam has various skills but describes himself as "just curious."',
 'Adam have a dog named Alexa.',
 'Adam lives in Krakow with his fiancĂ©e and dog.',
 'Adam is involved in a couple of projects like eduweb.pl, ahoy.so, easy.tools, overment.com, heyalice.app, automation.house, and more.',
 "Adam knows JavaScript and Python very well. He's full-stack engineer.",
 'Adam loves music. He listens to Spotify all the time.',
 "Adam's nickname is 'overment'.",
 "Adam has a youtube channel named 'overment'.",
 'Adam is a big fan of Apple products.',
 'Adam is a big fan of Tesla cars.']

In [24]:
documents = [{"pageContent": doc} for doc in docs]
for document in documents:
    if 'metadata' not in document:
        document['metadata'] = {}
    document['metadata']['source'] = COLLECTION_NAME
    document['metadata']['content'] = document.get('pageContent', '')
    document['metadata']['uuid'] = str(uuid.uuid4())

In [25]:
documents

[{'pageContent': 'Adam has various skills but describes himself as "just curious."',
  'metadata': {'source': 'ai_devs',
   'content': 'Adam has various skills but describes himself as "just curious."',
   'uuid': '1809b4fe-cc9a-46f9-85f7-b64286080095'}},
 {'pageContent': 'Adam have a dog named Alexa.',
  'metadata': {'source': 'ai_devs',
   'content': 'Adam have a dog named Alexa.',
   'uuid': '8a004a1a-2161-4573-95ef-65fd9fa6aa63'}},
 {'pageContent': 'Adam lives in Krakow with his fiancĂ©e and dog.',
  'metadata': {'source': 'ai_devs',
   'content': 'Adam lives in Krakow with his fiancĂ©e and dog.',
   'uuid': 'db4bcf72-0dd0-4ff4-aeea-814f2340b6ac'}},
 {'pageContent': 'Adam is involved in a couple of projects like eduweb.pl, ahoy.so, easy.tools, overment.com, heyalice.app, automation.house, and more.',
  'metadata': {'source': 'ai_devs',
   'content': 'Adam is involved in a couple of projects like eduweb.pl, ahoy.so, easy.tools, overment.com, heyalice.app, automation.house, and more.

In [57]:
def get_embeddings(openai_client, text):
    response = openai_client.embeddings.create(
        model="text-embedding-ada-002",
        input=text,
        encoding_format="float"
        )
    return response.data[0].embedding

points = []
for document in documents:
    embedding = get_embeddings(openai_client, document['pageContent'])
    points.append(
        {"id" : document['metadata']['uuid'],
         "payload" : document['metadata'],
         "vector" : embedding}
    )

In [61]:
qdrant_client.upsert(collection_name=COLLECTION_NAME, 
                     points=points,
                     wait=True,
                     )

UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

In [73]:
question = "What is Adam's dog name?"
query_embedding = get_embeddings(openai_client, question)

In [74]:
qdrant_client.search(collection_name=COLLECTION_NAME,
                     query_vector=query_embedding,
                     limit=1
                     )

[ScoredPoint(id='8a004a1a-2161-4573-95ef-65fd9fa6aa63', version=0, score=0.9014549, payload={'content': 'Adam have a dog named Alexa.', 'source': 'ai_devs', 'uuid': '8a004a1a-2161-4573-95ef-65fd9fa6aa63'}, vector=None, shard_key=None)]