## VectorSearch.ipynb

### Written by Taiob Ali

@sqlworldwide

Reference: [Azure OpenAI Embeddings](https:\github.com\AzureSQLDB\GenAILab\blob\main\docs\2-creating-embedding-and-storing-in-SQL-database.md)

Create a function to create embeddings. You will need to change the the url and api-key value.

An embedding is a special format of data representation that machine learning models and algorithms can easily use. The embedding is an information dense representation of the semantic meaning of a piece of text. Each embedding is a vector of floating-point numbers, such that the distance between two embeddings in the vector space is correlated with semantic similarity between two inputs in the original format. For example, if two texts are similar, then their vector representations should also be similar.

In [None]:
CREATE OR ALTER PROCEDURE dbo.create_embeddings
@inputText nvarchar(max),
@embedding vector(1536) OUT
AS
DECLARE @url nvarchar(4000) = N'https://ta-openai.openai.azure.com/openai/deployments/ta-model-text-embedding-ada-002/embeddings?api-version=2023-05-15';

DECLARE @headers nvarchar(300) = N'{"api-key": "*******************************************************************"}';

DECLARE @message nvarchar(max);
DECLARE @payload nvarchar(max) = N'{"input": "' + @inputText + '"}';
DECLARE @retval int, @response nvarchar(max);

exec @retval = sp_invoke_external_rest_endpoint 
    @url = @url,
    @method = 'POST',
    @headers = @headers,
    @payload = @payload,
    @timeout = 230,
    @response = @response output;

DECLARE @re vector(1536);
IF (@retval = 0) 
	BEGIN
    SET @re = cast(json_query(@response, '$.result.data[0].embedding') AS vector(1536))
	END ELSE BEGIN
	DECLARE @msg nvarchar(max) =  
			'Error calling OpenAI API' + char(13) + char(10) + 
			'[HTTP Status: ' + json_value(@response, '$.response.status.http.code') + '] ' +
			json_value(@response, '$.result.error.message');
	THROW 50000, @msg, 1;
END

SET @embedding = @re;

RETURN @retval
GO

In [None]:
/*
A function to clean up your data (My colleague Howard Dunn wrote this )
*/
SET ANSI_NULLS ON
GO

SET QUOTED_IDENTIFIER ON
GO

CREATE FUNCTION [dbo].[cleanString] (@str NVARCHAR(MAX))
RETURNS NVARCHAR(MAX)
AS
BEGIN
    DECLARE @i INT = 1
    DECLARE @cleaned NVARCHAR(MAX) = ''

    WHILE @i <= LEN(@str)
    BEGIN
        IF SUBSTRING(@str, @i, 1) LIKE '[a-zA-Z0-9 .,!?]'
            SET @cleaned = @cleaned + SUBSTRING(@str, @i, 1)
        SET @i = @i + 1
    END

    RETURN @cleaned
END
GO

SELECT
    sku, 
    brand, 
    review_count, 
    trim(dbo.cleanString(description)) as description, 
    product_id, 
    product_name, 
    root_category_name, 
    unit_price, 
    unit, aisle, 
    free_returns, 
    discount, id
INTO dbo.walmartProductsNew
FROM [dbo].[walmart-products]
GO

In [None]:
DROP TABLE IF EXISTS  vectorTable
SELECT TOP 250 ID, product_name, sku, brand, review_count, description
INTO dbo.vectortable
FROM [dbo].[walmartProducts]
WHERE ID not IN (2, 7)
ORDER BY [ID]
GO

ALTER TABLE vectorTable
ADD description_vector vector(1536) NULL;
GO

DECLARE @i int = 1;
DECLARE @text nvarchar(max);
DECLARE @vector vector(1536);

while @i <= 1000
    BEGIN
    SET @text = (SELECT isnull([product_name],'') + ': ' + isnull([brand],'')+': ' + isnull([description],'' ) 
	  FROM dbo.vectortable 
	  WHERE ID = @i);

    IF(@text <> '')
        BEGIN TRY
          exec dbo.create_embeddings @text, @vector OUTPUT;
          update dbo.vectortable set [description_vector ] = @vector WHERE ID= @i;
        END TRY
        BEGIN CATCH
          SELECT ERROR_NUMBER() AS ErrorNumber,
          ERROR_MESSAGE() AS ErrorMessage;
        END CATCH
    
    SET @i = @i + 1;
END

In [None]:
DELETE FROM dbo.vectortable WHERE description_vector IS NULL;
SELECT Count(*) FROM dbo.vectortable;
SELECT TOP 10 * FROM dbo.vectortable;

In [None]:
-- Declare the search text
declare @search_text nvarchar(max) = 'help me plan a high school graduation party';

-- Declare a variable to hold the search vector
declare @search_vector vector(1536);

-- Generate the search vector using the 'create_embeddings' stored procedure
exec dbo.create_embeddings @search_text, @search_vector output;

-- Perform the search query
SELECT TOP(10) 
  product_name, brand, DESCRIPTION,
  -- Calculate the cosine distance between the search vector and product description vectors
  vector_distance('cosine', @search_vector, description_vector) AS distance
FROM [dbo].[vectorTable]
WHERE vector_distance('cosine', @search_vector, description_vector) IS NOT NULL
ORDER BY distance; -- Order by the closest distance

### Filtered Semantic Search with SQL

[](https:\github.com\AzureSQLDB\GenAILab\blob\main\docs\4-filtered-semantic-search.md#filtered-semantic-search-with-sql)

This section explains how to implement a Filtered Search query in SQL. Hybrid Search combines traditional SQL queries with vector-based search capabilities to enhance search results.

### SQL Query for Hybrid Search

[](https:\github.com\AzureSQLDB\GenAILab\blob\main\docs\4-filtered-semantic-search.md#sql-query-for-hybrid-search)

The following SQL script demonstrates a hybrid search in an SQL database. It uses vector embeddings to find the most relevant products based on a textual description and combines with the availability of free returns

In [8]:
-- Declare the search text
declare @search_text nvarchar(max) = 'help me plan a high school graduation party';

-- Declare a variable to hold the search vector
declare @search_vector vector(1536);

-- Generate the search vector using the 'create_embeddings' stored procedure
exec dbo.create_embeddings @search_text, @search_vector output;

-- Perform the search query
SELECT TOP(10) 
  vt.product_name, vt.brand, vt.DESCRIPTION,
  -- Calculate the cosine distance between the search vector and product description vectors
  vector_distance('cosine', @search_vector, description_vector) AS distance
FROM [dbo].[vectorTable] AS vt
JOIN dbo.walmartProducts AS wpn
ON vt.id = wpn.id
WHERE vector_distance('cosine', @search_vector, description_vector) IS NOT NULL
AND wpn.free_returns ='Free 30-day returns'
ORDER BY distance; -- Order by the closest distance

product_name,brand,DESCRIPTION,distance
Made by Johnny Women's Chic Palazzo Lounge Pants S HEATHER_DARK_GREY,Made by Johnny,"These palazzo pants are the perfect addition to any wardrobe! Comfortable and stylish, these pants offer an effortless and chic look. The lightweight fabric 95 Rayon 5 Spandex is airy and breathable, keeping you cool and comfortable all day long. The wide leg design creates a flattering silhouette that will flatter any figure. These pants are available in a variety of colors and tie dyes to suit any style. Whether youre heading to the office or a night out, these pants will be sure to complete your look. Get ready to make a statement in these stylish and comfortable pants!",0.2906389327601112
IBTOM CASTLE Baby Girls Formal Dress Lace Bowknot Baptism Embroidery Princess Birthday Wedding Flower Tutu Gown with Headwear 3-6 Months Red,Ibtom Castle,"Baby girls clothing Cute christening baptism flower dress for toddler baby little girl princess formal prom tutu ball gown party wedding birthday dress sleeveless round neck tulle evening formal gown, match with a bowknot and headband, 3pcs outfits clothes set. Baptism dress for girl Slim fitted bodice and dress hem adorned with lace flowers decoration, illusion vneckline, concealed backzip fastener. Elastic band head band for better wearing. A big removeable bowknot can decor back waist. Multlayers tulle adds to a fuller look. Flower girl dress Breathable and lace multilayer tutu design, bowknot tie on the back, sleeveless and sweet elegant hemline. Hidden zipper back easily to allow you to easily get inout of the dress. Bow is also removable, providing more optional autonomy design possibility. Popular element design, a best gift for your lovely baby girl. Wedding dress for baby girl Flower girl dress is suitable 36 months, 69 months, 912 months, 1218 months. Flower pattern print, gauze cover up, multilayer tulle skirt in knee length with soft lining, focus on protecting your childs delicate skin. Pretty design makes your baby be the princess. Occasion Prefect for special occasions, christening, photo shoot, pageant, birthday party, first name day, formal day, baptism, wedding party, vacation, christmas new year, valentines day, stage performance, holiday, first communion, ceremony, artistic photo shoot, beach, formal event, bride junior bridesmaids wedding guest, recital evening communication, family gathering and other special occasion.",0.2912040851656404
"WestinTrends Julia 10 Ft Outdoor Patio Cantilever Umbrella with Base Included, Market Hanging Offset Umbrella with 4-Pieces Fillable Base Weight, Coffee",WestinTrends,"The offset patio umbrella can provide you with a shade and protection that can keep you cool and comfortable outside. A best choice for your garden, yard, pool, deck, background beach, balcony, restaurant, and any other outdoor area. Durable long lasting 180 GSM polyester canopy fabric features fade resistant, water repellent and sun protection. Blocks up to 95 of UV rays, UV protection 30 Stay cool at all times, tilt the canopy from 90 to 180 degrees effortlessly with our ergonomically designed sliding handle, keeping the area protected throughout the day. while a wind vent cools air under the umbrella and enhance the stability. Easy openclose with manual crank lift mechanism puts the umbrella up in just seconds with minimal effort, wrapped with strap to secure and protect the patio umbrella in a closed state. The 8 long ribs are made of premium iron, and the 1.8 inch strong metal pole is sealed with a thick antioxidant coating to prevent corrosion and rust. We proudly offer a 1year warranty service and 247 customer service. If you have any questions about the product, please contact us.",0.2927422323744332
Men's The Little Mermaid Vintage Characters Graphic Tee Turquoise Medium,The Little Mermaid,"Enjoy comfort and fashion at the same time with this unique Mens graphic T Shirt from The Little Mermaid. Strut your stuff with this stylish new Mens Tee Shirt that is perfect for every occasion! Featuring your favorite characters like Ariel, Flounder, King Triton, Sebastian, Ursula. Find the perfect fit for you! Check out size charts for more information. Made from the highest quality materials on firstclass digital directtogarment printers. This item is made to order please consider this when ordering multiple sizes. Printed In the U.S.A with safe waterbased inks. Easy care machine wash cold inside out with similar colors, tumble dry low, hang dry for best results. Officially licensed apparel. Designed by an exclusive collaboration with top brands amp worldclass artists.",0.2947672290175656
Hanes Men's Perfect-T Tri-Blend Short Sleeve T-Shirt Solid Navy M,Hanes,"Perfect is a high bar, but our updated PerfectT TriBlend tee offers the look and feel of a fashion brand and more. Enjoy the comfort and great feel of U.S. grown cotton and recycled polyester in a lightweight, modern fit. Made with cotton sourced from American farms. Luxuriously soft, lightweight 60 recycled polyester30 U.S. grown ringspun cotton10 rayon. Features an easily removed tearaway neck label. Triblend performance fabric feels amazingly soft and light. Slightly tapered fit updates your casual look.",0.2951387735955416
Amay Blackout Grommet Curtain Panel Beige 42 Inch Wide by 108 Inch Long- 1Panel,Amay's,"PACKAGE CONTAINS Beige 42 Inch Wide by 108 Inch Long1 Panel. Along with set of tie back to gather and hold the drapes, along with hooks to hang the door window curtains straightaway NATURAL BLACKOUT Super heavy and soft Blackout Curtain Panels are very upmarket. Besides, 8599 light blocking, thermal insulated, soundproof, Faderesistant, energyefficient. Thread trimmed and wrinklefree, both sides are the same color. AMAZING MATERIAL Made of high quality durable 100 polyester fabric, our room darkening curtains are drapery, very pleasant to touch and free from odor. Decorative choice for your room. WIDELY TO USE These versatile curtains are available in a variety of colors and sizes. Simple solid pattern with aesthetic look, they will look great in bedroom, living room, dinning room, outdoor occasions as per requirements. PRIVACY PROTECTION These fabulous curtains can provide a real sense of separation amp privacy. The fabric is very soft and has a good sense of verticality. Help you have a good sleep and make you energetic every day. DESIGN and TEXTURE We offer innovative and affordable designs that are on trend yet unique. From neutral solids to colorful boho designs, our team of inhouse designers offers highquality products that everyone can enjoy.",0.2956878929595584
"V8 +Energy Black Cherry and Pomegranate Blueberry Juice Energy Drinks, 8 fl oz Can, 24 Count (4 Packs of 6)",V8 Juice,"Experience a delicious plantpowered energy boost with V8 Energy Black Cherry and Pomegranate Blueberry Juice Energy Drinks. These concentrated fruit and vegetable juice blends are infused with natural energy from black and green tea. Each 8 fl oz can contains 1 combined serving of veggies and fruit along with 80 milligrams of caffeine as much as leading energy drink brands. A healthy energy drink alternative, V8 Energy contains 4550 calories and 1112 grams of carbs per can. Its also an excellent source of B Vitamins. V8 Energy caffeinated juice drinks are nonGMO and glutenfree with no sugar added and no artificial colors. Enjoy V8 Energy as a coffee substitute in the morning, in place of soda during the afternoon, or as a mocktail or cocktail mixer in the evening. Its great anytime you need a plant based boost of natural caffeine. Not a low calorie food see nutrition panel for sugar and calorie content",0.2957408354433093
nuLOOM Vintage Erline Area Rug,nuLOOM,"100 Polypropylene,100 Polypropylene, made in Turkey,Designed with resilience against everyday wearandtear, this rug is kid and pet friendly and perfect for high traffic areas of your home such as living room, dining room, kitchen, and hallways,Sleek and functional 0.43 pile height allows for convenient placement in entryways, underneath furniture, and will not obstruct doorways,Brimming with artistic charm, bohemian rugs help you unleash your creativity,Easy to clean and maintain, we recommend vacuuming regularly and spot treating any mild stains with carpet cleaner",0.2970297592044099
NINA NEW YORK Womens Silver Goring Embellished Nadette Round Toe Wedge Slip On Dress Sandals Shoes 5.5 M,NINA NEW YORK,"Update your closet with fashion designs from NINA NEW YORK and discover all the stylish pieces they have to offer. Youll find versatile wardrobe trends that will look perfect with various outfits and occasions. Whether you are missing something modern, casual, or chic NINA NEW YORK has the selection your closet, and you, have been waiting for!",0.2972733516174449
Amay Grommet Top Blackout Curtain Panel Greyish White 60 Inch Wide by 120 Inch Long-1Panel,Amay's,"PACKAGE CONTAINS Greyish White 60 Inch Wide by 120 Inch Long1 Panel.Along with set of tie back to gather and hold the drapes, along with hooks to hang the door window curtains straightaway NATURAL BLACKOUT Super heavy and soft Blackout Curtain Panels are very upmarket. Besides, 8599 light blocking, thermal insulated, soundproof, Faderesistant, energyefficient. Thread trimmed and wrinklefree, both sides are the same color. AMAZING MATERIAL Made of high quality durable 100 polyester fabric, our room darkening curtains are drapery, very pleasant to touch and free from odor. Decorative choice for your room. WIDELY TO USE These versatile curtains are available in a variety of colors and sizes. Simple solid pattern with aesthetic look, they will look great in bedroom, living room, dinning room, outdoor occasions as per requirements. PRIVACY PROTECTION These fabulous curtains can provide a real sense of separation amp privacy. The fabric is very soft and has a good sense of verticality. Help you have a good sleep and make you energetic every day. DESIGN and TEXTURE We offer innovative and affordable designs that are on trend yet unique. From neutral solids to colorful boho designs, our team of inhouse designers offers highquality products that everyone can enjoy.",0.2974599098818783


### Azure OpenAi Recommendations

Copied and edited from [here](https:\github.com\AzureSQLDB\GenAILab\blob\main\docs\5-azure-openai-recommendation.md).

In [None]:
declare @search_text nvarchar(max) = 'help me plan a high school graduation party'

-- Get the search vector for the search text
declare @search_vector vector(1536)
exec dbo.create_embeddings @search_text, @search_vector output;

-- Get the top 50 products that are closest to the search vector
drop table if exists #t;
with cte as 
(
    select         
        id, product_name, [description], description_vector,        
        row_number() over (partition by product_name order by id ) as rn
        FROM [dbo].[vectorTable]
WHERE vector_distance('cosine', @search_vector, description_vector) IS NOT NULL
), 
cte2 as -- remove duplicates
(
    select 
        *
    from
        cte 
    where
        rn = 1
)
select top(25)
    id, product_name, [description],
    vector_distance('cosine', @search_vector, description_vector) as distance
into
    #t
from 
    cte2
order by 
    distance;

-- Aggregate the search results to make them easily consumable by the LLM
declare @search_output nvarchar(max);
select 
    @search_output = string_agg(cast(t.[id] as varchar(10)) +'=>' + t.[product_name] + '=>' + t.[description], char(13) + char(10))
from 
    #t as t;

-- Generate the payload for the LLM
declare @llm_payload nvarchar(max);
set @llm_payload = 
json_object(
    'messages': json_array(
            json_object(
                'role':'system',
                'content':'
                    You are an awesome AI shopping assistant  tasked with helping users find appropriate items they are looking for the occasion. 
                    You have access to a list of products, each with an ID, product name, and description, provided to you in the format of "Id=>Product=>Description". 
                    When users ask for products for specific occasions, you can leverage this information to provide creative and personalized suggestions. 
                    Your goal is to assist users in planning memorable celebrations using the available products.
                '
            ),
            json_object(
                'role':'user',
                'content': '## Source ##
                    ' + @search_output + '
                    ## End ##

                    Your answer needs to be a json object with the following format.
                    {
                        "answer": // the answer to the question, add a source reference to the end of each sentence. Source reference is the product Id.
                        "products": // a comma-separated list of product ids that you used to come up with the answer.
                        "thoughts": // brief thoughts on how you came up with the answer, e.g. what sources you used, what you thought about, etc.
                    }'
            ),
            json_object(
                'role':'user',
                'content': + @search_text
            )
    ),
    'max_tokens': 800,
    'temperature': 0.3,
    'frequency_penalty': 0,
    'presence_penalty': 0,
    'top_p': 0.95,
    'stop': null
);

-- Invoke the LLM to get the response
declare @retval int, @response nvarchar(max);
declare @headers nvarchar(300) = N'{"api-key": "*******************************************************************", "content-type": "application/json"}';
exec @retval = sp_invoke_external_rest_endpoint
    @url = N'https://ta-openai.openai.azure.com/openai/deployments/ta-model-gpt-4/chat/completions?api-version=2024-08-01-preview',
    @headers = @headers,
    @method = 'POST',    
    @timeout = 120,
    @payload = @llm_payload,
    @response = @response output;
select @retval as 'Return Code', @response as 'Response';

-- Get the answer from the response
select [key], [value] 
from openjson(( 
    select t.value 
    from openjson(@response, '$.result.choices') c cross apply openjson(c.value, '$.message') t
    where t.[key] = 'content'
))