In [1]:
import time
import os
from typing import Dict, Any
from loguru import logger
from dotenv import load_dotenv

dotenv_path = "../secrets.env"
load_dotenv(dotenv_path)

import pandas as pd
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.embeddings.openai import OpenAIEmbeddings
import pinecone
from langchain.vectorstores import Pinecone
from langchain.chains.question_answering import load_qa_chain
from langchain.chat_models import ChatOpenAI

  from tqdm.autonotebook import tqdm


In [2]:
df = pd.read_csv("../data/reviews.csv")

In [3]:
len(df["name"].unique())

18

In [4]:
df.columns

Index(['caption', 'name', 'overall_rating', 'category', 'opening_hours'], dtype='object')

In [5]:
loader = CSVLoader(file_path="../data/reviews.csv")
data = loader.load()

In [6]:
# Check that each row of the CSV has been loaded into the document correctly.
len(data)

1220

In [5]:
pinecone.init(
    api_key=os.environ.get("PINECONE_API_KEY"),  # find at app.pinecone.io
    environment=os.environ.get("PINECONE_API_ENV"),  # next to api key in console
)
embeddings = OpenAIEmbeddings()
index = "dating-plan-gpt-index"

In [10]:
# pinecone.create_index("dating-plan-gpt-index", dimension=1536)
# pinecone.delete_index('dating-plan-gpt-index')

In [6]:
# docsearch = Pinecone.from_texts(
#     [t.page_content for t in data], embeddings, index_name=index
# )
docsearch = Pinecone.from_existing_index(index_name=index, embedding=embeddings)

In [7]:
llm = ChatOpenAI(temperature=0, model="gpt-4")
chain = load_qa_chain(llm, chain_type="stuff")

In [8]:
dictionary = {}

In [9]:
def summarize_dating_activities(
    df: pd.DataFrame,
    docsearch,
    chain,
    dictionary: Dict[str, Any],
    max_attempts: int = 15,
    k: int = 30,
) -> Dict[str, Any]:
    """
    This function summarizes the dating activities for each unique name in the DataFrame.

    Args:
        df (pd.DataFrame): The DataFrame containing the data.
        docsearch: The document search object.
        chain: The chain object.
        dictionary (Dict[str, Any]): The dictionary to store the results.
        max_attempts (int, optional): The maximum number of attempts for each name. Defaults to 15.
        k (int, optional): The initial number of documents to search. Decreases with each attempt. Defaults to 30.

    Returns:
        Dict[str, Any]: The dictionary with the results.
    """
    # Iterate over each unique name in the DataFrame
    for name in df["name"].unique():
        # Initialize the number of attempts and the number of documents to search
        attempts = max_attempts
        num_docs = k

        # Try to summarize the dating activities for the current name
        while attempts > 0:
            try:
                # Define the query
                query = f"""
                Analyze the provided Google reviews for {name} and generate a clear and concise summary highlighting the potential 
                activities suitable for a dating scenario. Consider factors such as ambiance, activities, food and drink options, and overall romantic 
                appeal as mentioned in the reviews. The summary should be structured to provide a comprehensive but short, about 600 characters, 
                understanding of what a date at this location might entail.            
                """

                # Search for similar documents
                doc = docsearch.similarity_search(query, k=num_docs)

                # Run the chain and store the result in the dictionary
                dictionary[name] = chain.run(
                    {"input_documents": doc, "question": query}
                )

                # Log the success and break the loop
                logger.info(f"Successfully summarized dating activities for {name}.")
                break
            except:
                # Decrease the number of attempts and the number of documents to search
                num_docs -= 1
                attempts -= 1

                # Log the failure and wait for 1 second before the next attempt
                logger.warning(
                    f"Failed to summarize dating activities for {name}. Retrying in 10 second..."
                )
                time.sleep(10)

    return dictionary

In [10]:
dictionary = summarize_dating_activities(df, docsearch, chain, dictionary)

[32m2023-07-27 11:51:55.045[0m | [1mINFO    [0m | [36m__main__[0m:[36msummarize_dating_activities[0m:[36m49[0m - [1mSuccessfully summarized dating activities for The Projector.[0m
[32m2023-07-27 11:52:06.511[0m | [1mINFO    [0m | [36m__main__[0m:[36msummarize_dating_activities[0m:[36m49[0m - [1mSuccessfully summarized dating activities for Yunomori Onsen & Spa.[0m
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Rate limit reached for 10KTPM-200RPM in organization org-AtFNVtneM1sGgYAX20J7ccCz on tokens per min. Limit: 10000 / min. Please try again in 6ms. Contact us through our help center at help.openai.com if you continue to have issues..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 2.0 seconds as it raised RateLimitError: Rate limit reached for 10KTPM-200RPM in organization org-AtFNVtneM1sGgYAX20J7ccCz 

In [11]:
len(dictionary)

18

In [12]:
for key, item in dictionary.items():
    print(key, item, "\n")

The Projector The Projector is a unique, retro-style cinema that offers a diverse selection of independent and foreign films, making it an ideal spot for a movie date. The cinema's old-school ambiance, complete with vintage decor and comfortable seating, adds to its charm. It also offers a bar with a variety of food and drink options, including alcohol, which can be enjoyed during the movie. The cinema also hosts special events and screenings, adding to its appeal. However, some reviews mention that the seating can be a bit uncomfortable and the venue can be a bit cold. 

Yunomori Onsen & Spa Yunomori Onsen & Spa offers a unique dating experience with a variety of relaxing activities. Couples can enjoy a variety of baths, including traditional Japanese hot springs and modern hydrotherapy facilities. The ambiance is serene and calming, perfect for a romantic escape. There's also a sauna and steam room for added relaxation. After the spa, couples can dine at the on-site cafe, which offer

In [18]:
df = pd.DataFrame(list(dictionary.items()), columns=["name", "review"])

In [19]:
df.to_csv("../data/data.csv", index=False)

In [31]:
dictionary.keys()

dict_keys(['The Projector', 'Yunomori Onsen & Spa', 'Yueh Hai Ching Temple', 'The Summerhouse', 'Smoke & Mirrors', 'The Shoppes at Marina Bay Sands', 'Hay Dairies Pte Ltd (Goat Farm)', 'National Gallery Singapore', 'National Museum of Singapore', 'CHIJMES', 'Marina at Keppel Bay', 'KOMA Singapore', 'Chomp Chomp Food Centre', 'The Other Room', 'East Coast Park', 'Pasta Bar', 'Esplanade - Theatres on the Bay, Singapore', 'Science Centre Singapore'])

In [15]:
for key in dictionary:
    print(len(dictionary[key]))

562
639
569
560
608
510
626
625
560
530
533
655
555
567
581
576
559
709
