In [None]:
import pandas as pd
import weaviate.classes as wvc
from weaviate.classes.config import Configure
from dotenv import load_dotenv
import warnings
import gc
import time
import json
import weaviate.classes as wvc
import boto3

from exploratory_functions import connect_weaviate_client_docker, add_collection_batch, generate_aggregated_review, refine_df_for_specific_game, divide_and_process_generated_summary, check_dynamo_db_key, remove_collection_batch

warnings.filterwarnings('ignore')
load_dotenv("../.env")

pd.set_option("display.max_columns", 30)
pd.set_option("display.max_rows", 30)

ai_generator = "gpt-4o-mini"
collection_name = "Reviews"
sample_pct=.05

In [None]:
client = connect_weaviate_client_docker()

meta_info = client.get_meta()
meta_info['modules']

In [None]:
if client.collections.exists(collection_name):
    client.collections.delete(collection_name)
    pass

client.collections.create(
        name=collection_name,
        vectorizer_config=[
            Configure.NamedVectors.text2vec_transformers(
                name="title_vector",
                source_properties=["title"],
            )
        ],
        generative_config=wvc.config.Configure.Generative.openai(model=ai_generator),
        properties=[
            wvc.config.Property(
                name="review_text",
                data_type=wvc.config.DataType.TEXT,
            ),
            wvc.config.Property(
                name="product_id",
                data_type=wvc.config.DataType.TEXT,
                skip_vectorization=True,
                vectorize_property_name=False
            )
        ]
    )

In [None]:
user_df = pd.read_pickle("../data/prod/users/user_dfs_clean/complete_user_ratings.pkl")
game_df = pd.read_pickle("../data/prod/games/game_dfs_clean/games_clean.pkl")

game_mean = game_df["AvgRating"].describe()['mean'].round(2)
game_std = game_df["AvgRating"].describe()['std'].round(2)
two_under = round(game_mean - 2*game_std, 2)
one_under = round(game_mean - game_std, 2)
half_over = round(game_mean + .5*game_std, 2)
one_over = round(game_mean + game_std, 2)

all_games_df = user_df.merge(game_df[['BGGId','Name','Description','AvgRating', 'BayesAvgRating']], on="BGGId", how="left")
all_games_df["BGGId"] = all_games_df["BGGId"].astype("string")
del game_df
del user_df
gc.collect()

In [None]:
top_25 = all_games_df.sort_values("BayesAvgRating", ascending=False)['BGGId'][:25].to_list()
top_25 = [str(x) for x in top_25]

### Produce synopses

In [None]:
overall_summary = {}

In [None]:
all_prompts = json.loads(open('prompt.json').read())
generate_prompt = all_prompts['gpt4o_mini_generate_prompt_structured']

In [None]:
def prompt_replacement(generate_prompt, game_name, avg_rating):
    current_prompt = generate_prompt.replace("{GAME_NAME_HERE}", game_name)
    current_prompt = current_prompt.replace("{GAME_AVERAGE_HERE}", str(avg_rating))
    current_prompt = current_prompt.replace("{TWO_UNDER}", str(two_under))
    current_prompt = current_prompt.replace("{ONE_UNDER}", str(one_under))
    current_prompt = current_prompt.replace("{ONE_OVER}", str(one_over))
    current_prompt = current_prompt.replace("{HALF_OVER}", str(half_over))
    current_prompt = current_prompt.replace("{OVERALL_AVERAGE}", str(game_mean))
    return current_prompt

In [None]:
for game_id in ['318009']:
    if not check_dynamo_db_key(game_id):
        df, game_name, avg_rating = refine_df_for_specific_game(all_games_df, game_id, sample_pct=sample_pct)
        game_id = df['BGGId'].iloc[0]
        reviews = df['combined_review'].to_list()
        add_collection_batch(client, collection_name, game_id, reviews)
        current_prompt = prompt_replacement(generate_prompt, game_name, avg_rating)
        summary = generate_aggregated_review(client, collection_name, game_id, current_prompt)
        divide_and_process_generated_summary(game_id, summary=summary.generated)
        print(f"\n\n{summary.generated}")
        remove_collection_batch(client, collection_name, game_id, reviews)

In [None]:
for game_id in top_25[:1]:

    if not check_dynamo_db_key(game_id):
        df, game_name, avg_rating = refine_df_for_specific_game(all_games_df, game_id, sample_pct=sample_pct)
        game_id = df['BGGId'].iloc[0]
        reviews = df['combined_review'].to_list()
        add_collection_batch(client, collection_name, game_id, reviews)
        current_prompt = prompt_replacement(generate_prompt, game_name, avg_rating)
        summary = generate_aggregated_review(client, collection_name, game_id, current_prompt)
        divide_and_process_generated_summary(game_id, summary=summary.generated)
        print(f"\n\n{summary.generated}")
        remove_collection_batch(client, collection_name, game_id, reviews)

In [None]:
# get item from dynamodb
dynamodb_client = boto3.client('dynamodb')
table_name = 'game_generated_descriptions'
response = dynamodb_client.get_item(TableName=table_name, Key={'game_id': {'S': game_id}})['Item']
response
