In [2]:
import psycopg2
import numpy as np
from langchain_openai import OpenAIEmbeddings

In [3]:
# Sample texts for embedding generation
texts = [
'Type: Desktop, OS: Windows 10, CPU: Intel i7, RAM: 16GB, Storage: 512GB SSD',
'Type: Laptop, OS: macOS Big Sur, CPU: M1, RAM: 8GB, Storage: 256GB SSD',
'Type: Server, OS: Ubuntu 20.04, CPU: AMD EPYC, RAM: 64GB, Storage: 2TB NVMe',
'Type: Tablet, OS: Android 11, CPU: Snapdragon 865, RAM: 6GB, Storage: 128GB',
'Type: Smartphone, OS: iOS 14, CPU: A14 Bionic, RAM: 4GB, Storage: 128GB',
'Type: Smartwatch, OS: watchOS 7, CPU: S6, RAM: 1GB, Storage: 32GB','Type: Gaming Console, OS: Custom, CPU: AMD Ryzen, RAM: 16GB, Storage: 1TB SSD',
'Type: Smart TV, OS: Tizen, CPU: Quad-core, RAM: 2GB, Storage: 16GB',
'Type: VR Headset, OS: Custom, CPU: Snapdragon XR2, RAM: 6GB, Storage: 128GB',
'Type: IoT Device, OS: Linux-based, CPU: ARM Cortex-M, RAM: 512MB, Storage: 4GB'
]

In [4]:
# Get OPENAI API key from .env next to this notebook
import os
from pathlib import Path
from dotenv import load_dotenv

def _resolve_dotenv_path() -> Path | None:
    cwd = Path.cwd()
    candidate = cwd / '.env'
    if candidate.exists():
        return candidate

    # Fallback when the notebook is run from repo root or elsewhere.
    for parent in [cwd, *cwd.parents]:
        candidate = parent / 'services' / 'ai-suite' / '.env'
        if candidate.exists():
            return candidate

    return None

dotenv_path = _resolve_dotenv_path()
if dotenv_path:
    load_dotenv(dotenv_path=dotenv_path)
else:
    raise FileNotFoundError('Could not find .env in services/ai-suite')

OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')


In [5]:
embeddings = OpenAIEmbeddings(api_key=OPENAI_API_KEY)
embeddings_list = []

for text in texts:
    embeddings_list.append(embeddings.embed_query(text))

In [7]:
len(embeddings_list[0])

1536

In [9]:
conn = psycopg2.connect('dbname=test user=postgres password=0301sonaL')
cur = conn.cursor()

for i in range(len(embeddings_list)):
    embedding = embeddings_list[i]
    content = texts[i]
    cur.execute('INSERT INTO items (content, embedding) VALUES (%s, %s)', (content, embedding))

conn.commit()

cur.close()
conn.close()

In [10]:
new_text = 'Type: Laptop, OS: Windows 11, CPU: Intel i5, RAM: 8GB, Storage: 256GB SSD'
new_embedding = embeddings.embed_query(new_text)

In [12]:
conn = psycopg2.connect('dbname=test user=postgres password=0301sonaL')
cur = conn.cursor()

cur.execute('SELECT id, content, embedding FROM items ORDER BY embedding <-> %s::vector LIMIT 3', (new_embedding,))

In [13]:
cur.fetchall()

[(1,
  'Type: Desktop, OS: Windows 10, CPU: Intel i7, RAM: 16GB, Storage: 512GB SSD',
  '[-0.005741825,-0.022321632,-0.012162259,-0.031861693,-0.0058604167,0.0064731417,-0.025549967,-0.025694912,-0.014112437,-0.02620881,-0.0043022507,0.028382996,-0.022637878,-0.015482833,-0.008966866,0.01662922,-0.005264163,-0.013822545,0.013941137,-0.020661345,-0.035814755,-0.0017739373,0.016365683,0.013328413,-0.009513706,-0.0063413726,0.009296288,-0.02840935,0.017894201,-0.0034721075,0.013315235,-0.023230838,-0.01004737,-0.050335675,-0.017670194,0.020345101,-0.0074778786,0.018223623,0.012254497,-0.0086110905,0.024390401,0.014178322,0.0014675749,0.0040123593,-0.0103767915,-0.003389752,-0.021333367,-0.009513706,-0.02367885,-0.0056034676,0.02001568,0.037211504,-0.0040288307,-0.00035989349,0.0033321031,-0.00206877,-0.01536424,0.0013843958,0.00021927149,-0.019185536,-0.0033337502,0.0040716557,-0.022940945,-0.0038212948,-0.028014043,-0.0080642495,-0.0069046845,0.025207369,-0.00097014767,-0.0013218056,0.03