In [1]:
import getpass
import os

# Abre input para incluir a key da OpenAI
os.environ["OPENAI_API_KEY"] = getpass.getpass()

# Instanciação do modelo de embeddings da OpenAI (text-embedding-3-small)

In [2]:
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-small", dimensions=512)

# Embeddings com inputs de teste

In [3]:
data = [
    "Australian Open 2024: Jannik Sinner, Aryna Sabalenka crowned as Grand Slam singles champions at Melbourne Park"
    "Sinner and Sabalenka took down Daniil Medvedev and Qinwen Zheng in their respective finals",
    "Sinner, Sabalenka win Australian Open singles titles",
    "Jannik Sinner came back from two sets down to beat Daniil Medvedev 3-6, 3-6, 6-4, 6-4, 6-3 in the Australian Open men's singles final, earning him his first ever Grand Slam title",
    "Sinner was the champion in 2024"
]

data_embedded = embeddings.embed_documents(data)
len(data_embedded), len(data_embedded[0])

(4, 512)

In [4]:
%pip install psycopg2-binary

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [5]:
host = 'localhost'
database = 'vectordb'
user = 'rag'
password = 'rag123'

In [6]:
import psycopg2

def connect():
  conn = None
  try:
    conn = psycopg2.connect(
                   host=host,
                   database=database,
                   user=user,
                   password=password)
  except (Exception, psycopg2.DatabaseError) as error:
    print(error)
    return

  print('Conectado!')
  return conn

In [7]:
connection = connect()
cursor = connection.cursor()
try:
    for text, embedding in zip(data, data_embedded):
        cursor.execute(
            "INSERT INTO embeddings (embedding, content) VALUES (%s, %s)",
            (embedding, text)
        )
    connection.commit()
except (Exception, psycopg2.Error) as error:
    print("Error while writing to DB", error)
finally:
    if cursor:
        cursor.close()
    if connection:
        connection.close()

Conectado!


# Busca da resposta da query inicial apenas para a vector database

In [15]:
query = "Who was the Australian Open champion in 2024?"
embedding = embeddings.embed_documents(query)

In [16]:
embedding[0]

[0.0979732079559255,
 0.05808515640603512,
 0.021775866551177026,
 -0.0033270370702651025,
 0.05226208046788294,
 0.08574474848580593,
 -0.016801992912629113,
 -0.020429282103124537,
 -0.04190186531183467,
 -0.07317661703654163,
 0.02355918355723613,
 -0.030207191528002792,
 -0.03229379373917399,
 0.0018727852102445699,
 0.04757936062624264,
 0.06313181849380993,
 0.03678241104954255,
 -0.06026880615755178,
 0.04051888477652353,
 0.06313181849380993,
 0.05323259312424164,
 0.020659778859009728,
 0.04369731000080786,
 -0.05279586242888023,
 0.013453725179514214,
 -0.008522310538359393,
 -0.019216141282676166,
 -0.016195422502404928,
 0.033992190887801706,
 -0.06638303589261156,
 0.04478913673921139,
 -0.0336282486416672,
 0.01432718657023704,
 -0.03188132586022154,
 -0.02033223083748867,
 0.001689297777567079,
 -0.017432826139262267,
 0.08341552556112587,
 -0.017093146709536725,
 0.049520385938960035,
 -0.012458950638069153,
 -0.08278469233449272,
 -0.005804876031877647,
 -0.00722425032

In [19]:
connection = connect()
cursor = connection.cursor()
try:
    cursor.execute(f"""
        SELECT content, 1 - (embedding <=> '{embedding[0]}') AS cosine_similarity
        FROM embeddings
        ORDER BY cosine_similarity desc
        LIMIT 3
    """)
    # cursor.execute(f"""
    #     SELECT content, cosine_distance(embedding, '{embedding[0]}') AS cosine_similarity
    #     FROM embeddings
    #     ORDER BY cosine_similarity desc
    #     LIMIT 3
    # """)
    for r in cursor.fetchall():
        print(f"Text: {r[0]}; Similarity: {r[1]}")

except Exception as error:
    print("Error..", error)
finally:
    cursor.close()
    connection.close()

Conectado!
Text: Sinner was the champion in 2024; Similarity: 0.12509186194168342
Text: Australian Open 2024: Jannik Sinner, Aryna Sabalenka crowned as Grand Slam singles champions at Melbourne ParkSinner and Sabalenka took down Daniil Medvedev and Qinwen Zheng in their respective finals; Similarity: 0.04973303674855989
Text: Sinner, Sabalenka win Australian Open singles titles; Similarity: 0.03695258083515607
