In [25]:
import numpy as np
import faiss
import psycopg2
import openai

openai.api_key = "xxxx"

# Define the function to generate embeddings
def get_embedding(text, model="text-embedding-ada-002"):
    text = text.replace("\n", " ")
    return openai.Embedding.create(input=[text], model=model)["data"][0]["embedding"]

# Define the texts for which we want to generate embeddings
texts = ["This is the first text.", "This is the second text.", "And this is the third text."]

# Generate embeddings for the texts
embeddings = np.array([get_embedding(text) for text in texts])
print(embeddings)

# Store the embeddings in a Faiss index
index = faiss.IndexFlatIP(embeddings.shape[1])
index.add(embeddings)

# Create a PostgreSQL database and table
conn = psycopg2.connect(
    host="localhost",
    database="postgres",
    user="postgres",
    password="xxxxxx"
)

cur = conn.cursor()
cur.execute("CREATE TABLE IF NOT EXISTS piidetection (id SERIAL PRIMARY KEY, embedding bytea);")
conn.commit()

# Insert the Faiss index into the database
embedding_bytes = embeddings.tobytes()
cur.execute("INSERT INTO piidetection (embedding) VALUES (%s);", (psycopg2.Binary(embedding_bytes),))
conn.commit()

# Define a function to perform search based on similarity score
def search(query, k=1):
    # Retrieve the Faiss index from the database
    cur.execute("SELECT embedding FROM piidetection WHERE id = 1;")
    embedding_bytes = cur.fetchone()[0]
    retrieved_embeddings = np.frombuffer(embedding_bytes, dtype=np.float32).reshape((-1, embeddings.shape[1]))

    # Search the Faiss index
    query_embedding = get_embedding(query)
    distances, indices = index.search(np.array([query_embedding]), k)
    return [(texts[i], distances[0][j]) for j, i in enumerate(indices[0])]

# Perform a search based on similarity score
results = search("This is a text.")
print(results[0][1])

if(results[0][1] > 0.85):
    print("PII")
else:
    print("Non-PII")

# Close the cursor and the connection
cursor.close()
conn.close()


[[-0.00928653  0.00186054 -0.02263714 ... -0.01674812 -0.00685327
  -0.01055494]
 [-0.01629999  0.00237545 -0.00908385 ... -0.01844198 -0.01731874
  -0.00446682]
 [-0.01924564  0.00535853 -0.0083033  ... -0.01169543 -0.0124807
  -0.02541197]]
0.94135267
PII
