In [1]:
import csv
import json
import random
import openai
import time
from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility
import edgedb

In [None]:
COLLECTION_NAME = 'reviews'  # Collection name
DIMENSION = 1536  # Embeddings size
MILVUS_HOST = 'localhost'  # Milvus server URI
MILVUS_PORT = '19530'
OPENAI_ENGINE = 'text-embedding-ada-002'  # Which engine to use
openai.api_key = 'ADD_YOUR_OPEN_API_KEY'  # Use your own Open AI API Key here
QUERY_PARAM = {
    "metric_type": "L2",
    "params": {"ef": 64},
}

In [3]:
connections.connect(host=MILVUS_HOST, port=MILVUS_PORT)

if utility.has_collection(COLLECTION_NAME):
    utility.drop_collection(COLLECTION_NAME)

fields = [
    FieldSchema(name='id', dtype=DataType.VARCHAR, description='Ids', is_primary=True, auto_id=False,max_length=64000),
    FieldSchema(name='Business_name', dtype=DataType.VARCHAR, description='Business_name',max_length=64000),
    FieldSchema(name='zipcode', dtype=DataType.VARCHAR, description='Zipcode', max_length=64000),
    FieldSchema(name='business_community', dtype=DataType.VARCHAR, description='Business Community', max_length=64000),
    FieldSchema(name='rating', dtype=DataType.FLOAT, description='Rating'),
    FieldSchema(name='review', dtype=DataType.VARCHAR, description='Review',max_length=64000),
    FieldSchema(name='date_reviewed', dtype=DataType.VARCHAR,description='Date of the review' ,max_length=64000),
    FieldSchema(name='embedding', dtype=DataType.FLOAT_VECTOR, description='Embedding vectors', dim=DIMENSION)
]
schema = CollectionSchema(fields=fields, description='Review collection')
collection = Collection(name=COLLECTION_NAME, schema=schema)

index_params = {
    'index_type': 'IVF_FLAT',
    'metric_type': 'L2',
    'params': {'nlist': 1024}
}
collection.create_index(field_name="embedding", index_params=index_params)
collection.load()

In [4]:
client = edgedb.create_client()
def fetch_reviews():
    query = """
        SELECT Review {
            id,
            rating,
            review,
            review_date,
            reviews_business: {
            ID,
            name,
            in_communityArea:{
              name
            },
              has_address: {
                in_zipcode: {
                    digits
                },
            },

            },  
        };
    """
    result = client.query(query)
    return result

In [5]:
result=fetch_reviews()

In [6]:
result[0]

Object{id := UUID('7a72c492-8bf9-11ee-8969-c3e93274e364'), rating := 5.0, review := "Best rotisserie chicken I've ever had. Seasoned perfectly. Tender and delicious every time. Awesome job Chicken Man!", review_date := datetime.datetime(2020, 11, 12, 6, 0, tzinfo=datetime.timezone.utc), reviews_business := Object{ID := 'rdE3vHw6ojctlpYftl-Hkg', name := 'Country Grill Chicagoland', in_communityArea := Object{name := 'Chicago'}, has_address := Object{in_zipcode := Object{digits := '60450'}}}}

In [None]:
# Fetch embeddings for each Review to insert into Milvus
from openai import OpenAI
openAiClient = OpenAI(api_key='ADD_YOUR_OPEN_API_KEY')
def embed(text_to_embed):
    # Embed a line of text
    response = openAiClient.embeddings.create(input=text_to_embed, model=OPENAI_ENGINE)

    # Return the list of embeddings
    return response.data[0].embedding


In [8]:
from tqdm import tqdm
data = [[],[],[],[],[],[],[],[]]
BATCH_SIZE=100
for i in tqdm(range(0,len(result))):
    if  result[i]!=None and result[i].reviews_business!=None and result[i].review!='' and result[i].reviews_business.has_address!=None and result[i].reviews_business.in_communityArea!=None and result[i].reviews_business.has_address.in_zipcode!=None:
        data[0].append(result[i].reviews_business.ID)
        data[1].append(result[i].reviews_business.name or '')
        data[2].append(result[i].reviews_business.has_address.in_zipcode.digits or '')
        data[3].append(result[i].reviews_business.in_communityArea.name or '')
        data[4].append(result[i].rating)
        data[5].append(result[i].review or '')
        formatted_date = result[i].review_date.strftime("%Y-%m-%dT%H:%M:%SZ")
        data[6].append(formatted_date or '')
        data[7].append(embed(data[5]))
    if len(data[0])%BATCH_SIZE==0:
#         data[6].append(embed(data[4]))
        collection.insert(data)
        data = [[],[],[],[],[],[],[],[]]
    
if len(data[0])!=0:
#        data[6].append(embed(data[4]))
       collection.insert(data)
       data = [[],[],[],[],[],[],[],[]]


100%|███████████████████████████████████████| 5709/5709 [47:11<00:00,  2.02it/s]


In [9]:
# Filtered Search Function
def query(query, top_k = 5):
    text, expr= query
    res = collection.search([embed(text)], anns_field='embedding', expr=expr, param=QUERY_PARAM, limit = top_k, output_fields=['id', 'zipcode', 'business_community', 'rating', 'review', 'date_reviewed',"Business_name"])
    for i, hit in enumerate(res):
        print(f'\nDisplaying Top {top_k} Results for query "{text}"')
        for ii, hits in enumerate(hit):
            print('\t' + 'Rank:', ii + 1, '| Score:', hits.score)
            print('\t\t' + '  Review:', hits.entity.get('review'))
            print('\t\t' + '  Business_name:', hits.entity.get('Business_name'))
            print('\t\t' + '  Community Area:', hits.entity.get('business_community'))
            print('\t\t' + '  Rating:', hits.entity.get('rating'))
            print('\t\t' + '  Date of review:', hits.entity.get('date_reviewed'))
            print("\n")
            
            
    
            


In [21]:
# “Chicago Style Hotdogs” reviews for the date between 10/29/2023 and 11/5/2023.

query(('Chicago Style Hotdogs','date_reviewed>="2023-10-29T00:00:00Z" and date_reviewed<="2023-11-05T00:00:00Z"'), top_k=5)


Displaying Top 5 Results for query "Chicago Style Hotdogs"
	Rank: 1 | Score: 0.34424567222595215
		  Review: They make the best Vanilla Cappuccino in town ! I love this joint , also the honey ginger wellness shot is fantastic!
		  Business_name: Kafe De Kasa
		  Community Area: Chicago
		  Rating: 5.0
		  Date of review: 2023-11-04T05:00:00Z


	Rank: 2 | Score: 0.34424567222595215
		  Review: These sandwiches are excellent!

My favorite is the tuna. They are a bit pricy, but worth it. They are big sandwiches and will fill you up.

It's best to...
		  Business_name: Nonna's Pizza & Sandwiches
		  Community Area: Chicago
		  Rating: 5.0
		  Date of review: 2023-10-30T05:00:00Z


	Rank: 3 | Score: 0.34424567222595215
		  Review: First: The best Argentine fusion hamburgers in the Chicago suburbs. 

Second: Francia... 

Muchachos...
		  Business_name: Asadoras Argentina Burgers
		  Community Area: Chicago
		  Rating: 5.0
		  Date of review: 2023-11-01T05:00:00Z


	Rank: 4 | Score: 0.364841

In [23]:
# Top businesses that offer Steak in Community Area The Loop.
query(('Steak', 'business_community like \"The Loop%\"'), top_k=5)


Displaying Top 5 Results for query "Steak"


In [24]:
#Top businesses that offer Hot Dogs in Community Area Lincoln Park
query(('Hot Dogs', 'business_community like \"Lincoln Park%\"'), top_k=5)


Displaying Top 5 Results for query "Hot Dogs"
	Rank: 1 | Score: 0.3671034574508667
		  Review: Good bar with nice drinks. Seating is fairly limited and it's a strange venue with the theater right there. Amazing intermission / pre-theater bar, but a...
		  Business_name: Front Bar
		  Community Area: Lincoln Park
		  Rating: 4.0
		  Date of review: 2023-06-26T05:00:00Z


	Rank: 2 | Score: 0.3672153949737549
		  Review: This was my first time going to Chicago and my friend and I came here for breakfast! The inside of the restaurant is pretty colorful and it feels fun!...
		  Business_name: Dash of Salt and Pepper
		  Community Area: Lincoln Park
		  Rating: 4.0
		  Date of review: 2023-09-11T05:00:00Z




In [25]:
# Top businesses that offer Chicago Hot Dogs in Zip-Code 60605 and have ratings greater than 3 stars.
query(('Chicago HotDogs', 'zipcode=="60605" and rating>3'), top_k=5)


Displaying Top 5 Results for query "Chicago HotDogs"
	Rank: 1 | Score: 0.3360784947872162
		  Review: Food was amazing and fully paired with Macallan whiskey pours.  The service was 5 stars. The team was very professional.  The ambiance is subdued and chic....
		  Business_name: M Lounge
		  Community Area: Near South Side
		  Rating: 5.0
		  Date of review: 2023-06-25T05:00:00Z


	Rank: 2 | Score: 0.36762499809265137
		  Review: Cool thing to look at if having a few minutes to look around. If you ever have been on this part of Wabash you have noticed the murals that are colorful....
		  Business_name: Wabash Arts Corridor
		  Community Area: Chicago
		  Rating: 5.0
		  Date of review: 2023-03-02T06:00:00Z


	Rank: 3 | Score: 0.36766761541366577
		  Review: Quality/Quantity - Museum on the water.
Sadly when we got here, they were already closed for a private event. 
A wedding, should we crash...?

So we decided...
		  Business_name: Museum Campus
		  Community Area: Near South Side
		