In [None]:
%pip install openai openai"[embeddings]"

In [18]:
# OpenAI Settings

import os
import openai

openai.api_type = "azure"
openai.api_base = os.getenv("OPENAI_API_ENDPOINT")
openai.api_version = "2023-03-15-preview"
openai.api_key = os.getenv("OPENAI_API_KEY")

In [19]:
# Load dataset

import pandas as pd

input_datapath = "data/Sample-Faq.csv"
df = pd.read_csv(input_datapath)


df["Combined"] = (
    "Question: " + df.Question.str.strip() + "; Answer: " + df.Answer.str.strip()
)

df.head(2)

Unnamed: 0,Question,Answer,Combined
0,How can I order,You can order easily using our online platfor...,Question: How can I order; Answer: You can ord...
1,Why should I buy online?,Speeding up the process. By ordering online yo...,Question: Why should I buy online?; Answer: Sp...


In [20]:
# Query Embeddings

from openai.embeddings_utils import get_embedding

df["Embedding"] = df.Combined.apply(
    lambda x: get_embedding(x, engine="textembeddingada002")
)
df.to_csv("data/Sample-Faq-Embeddings.csv")

df.head(2)

Unnamed: 0,Question,Answer,Combined,Embedding
0,How can I order,You can order easily using our online platfor...,Question: How can I order; Answer: You can ord...,"[0.008618628606200218, -0.012366986833512783, ..."
1,Why should I buy online?,Speeding up the process. By ordering online yo...,Question: Why should I buy online?; Answer: Sp...,"[-0.0024808107409626245, -0.00058461056323722,..."


In [None]:
# Store embeddings in redis
# https://blog.baeke.info/2023/03/21/storing-and-querying-for-embeddings-with-redis/
#
# To start redis with Docker:
# docker run \
#   -p 6380:6379 \
#   -e REDIS_ARGS="--requirepass 'password123' --user default on >password123 ~* allcommands" \
#   redis/redis-stack-server:latest

%pip install redis

In [22]:
import redis

from redis.commands.search.field import VectorField, TextField
from redis.commands.search.indexDefinition import IndexDefinition, IndexType

# Redis connection details
redis_host = os.getenv("REDIS_HOST")
redis_port = os.getenv("REDIS_PORT")
redis_password = os.getenv("REDIS_PASSWORD")

# Connect to the Redis server
conn = redis.Redis(
    host=redis_host,
    port=redis_port,
    password=redis_password,
    encoding="utf-8",
    decode_responses=True,
)

In [None]:
# Store embeddings

import numpy as np
import uuid


def store_embedding(text, embedding):
    # Convert to numpy array and bytes
    vector = np.array(embedding).astype(np.float32).tobytes()

    # Create a new hash with url and embedding
    post_hash = {"qa": text, "embedding": vector}

    # Create hash
    conn.hset(name=f"qa:{uuid.uuid1()}", mapping=post_hash)


p = conn.pipeline(transaction=False)
df.apply(lambda x: store_embedding(x.Combined, x.Embedding), axis=1)
p.execute()

In [24]:
# Create the index

SCHEMA = [
    TextField("qa"),
    VectorField(
        "embedding",
        "HNSW",
        {"TYPE": "FLOAT32", "DIM": 1536, "DISTANCE_METRIC": "COSINE"},
    ),
]

try:
    conn.ft("qas").create_index(
        fields=SCHEMA,
        definition=IndexDefinition(prefix=["qa:"], index_type=IndexType.HASH),
    )
except Exception as e:
    print("Index already exists")

In [25]:
# Search vector

import numpy as np
from redis.commands.search.query import Query


def search_vectors(query_vector, client, top_k=5):
    base_query = "*=>[KNN 5 @embedding $vector AS vector_score]"
    query = (
        Query(base_query)
        .return_fields("qa", "vector_score")
        .sort_by("vector_score")
        .dialect(2)
    )

    try:
        results = client.ft("qas").search(query, query_params={"vector": query_vector})
    except Exception as e:
        print("Error calling Redis search: ", e)
        return None

    return results


# Enter a query
query = input("Enter your query: ")

# Vectorize the query using OpenAI's text-embedding-ada-002 model
print("Vectorizing query...")
query_vector = get_embedding(query, engine="textembeddingada002")

# Convert the vector to a numpy array
query_vector = np.array(query_vector).astype(np.float32).tobytes()

# Perform the similarity search
print("Searching for similar texts...")
results = search_vectors(query_vector, conn)

if results:
    print(f"Found {results.total} results:")
    for i, result in enumerate(results.docs):
        score = 1 - float(result.vector_score)
        print(f"\t{i}. {result.qa} (Score: {round(score, 3) })")
else:
    print("No results found")

Vectorizing query...
Searching for similar texts...
Found 5 results:
	0. Question: How can I order; Answer: You can order easily using our online platform. When you find a product you need, you can add it to cart, login and go through the ordering process. After the order is ready, you will receive order summary to your email. Order summary will also be stored to your account.

You can also easily make reorders afterwards by clicking the “reorder” button on any of your previously made orders. After clicking the “reorder” button the cart will open and you can change quantities or products. (Score: 0.874)
	1. Question: Do I have to order online?; Answer: You can also send your order to hytest@hytest.fi. You can also order using FAX (+358 2 512 0909) or by calling. Online ordering is preferred in most cases because by ordering online, you will save time, you will have easier payment process and all the information about the order will be accessible for you anytime. Also if you want to mak