In [30]:
import pandas as pd
from config import CONFIGS
import os
import gc
import boto3

from utils.processing_functions import load_file_local_first, save_file_local_first

In [3]:
ENVIRONMENT = os.environ.get("ENVIRONMENT", "dev")
S3_SCRAPER_BUCKET = CONFIGS["s3_scraper_bucket"]
GAME_CONFIGS = CONFIGS["games"]
RATINGS_CONFIGS = CONFIGS["ratings"]
IS_LOCAL = True if os.environ.get("IS_LOCAL", "False").lower() == "true" else False

In [12]:
def load_reduced_game_df():
        print(f"\nLoading game data from {GAME_CONFIGS['clean_dfs_directory']}")
        game_df = load_file_local_first(
            path=GAME_CONFIGS["clean_dfs_directory"], file_name="games_clean.pkl"
        )

        game_df_reduced = game_df.sort_values("BayesAvgRating", ascending=False)[
            0 : 1000
        ]

        del game_df
        gc.collect()
        print("Loaded and refined games data")

        return game_df_reduced

In [None]:
df = load_reduced_game_df()

In [None]:
df.shape

In [None]:
df.head()

In [None]:
game_ids = df["BGGId"].values

In [18]:
dynamodb_client = boto3.client("dynamodb")
item = dynamodb_client.get_item(
                TableName="game_generated_descriptions", Key={"game_id": {"S": "224517"}}
            )["Item"]

In [None]:
for game in game_ids:
    print(f"Getting data for game {game}")

    dynamodb_id = str(game)
    try:
        item = dynamodb_client.get_item(
            TableName="game_generated_descriptions", Key={"game_id": {"S": dynamodb_id}}
        )["Item"]
        df.loc[df['BGGId'] == game, 'generated_pros'] = item['generated_pros']['S']
        df.loc[df['BGGId'] == game, 'generated_cons'] = item['generated_cons']['S']
        df.loc[df['BGGId'] == game, 'generated_description'] = item['generated_description']['S']
    except Exception as e:
        print(f"Game {dynamodb_id} not found in dynamodb, Error: {e}")


In [None]:
df.head()

In [None]:
save_file_local_first(path=GAME_CONFIGS["clean_dfs_directory"], file_name="top_1000_with_attached_rag.pkl", data=df)