In [51]:
import pandas as pd
from config import CONFIGS
import os
import re
import os
import json
import gc
from typing import Tuple

from utils.processing_functions import load_file_local_first, save_file_local_first
import weaviate
import weaviate.classes as wvc
from pydantic import BaseModel, ConfigDict
from weaviate.classes.config import Configure
from weaviate.classes.query import Filter
from weaviate.classes.query import MetadataQuery
from weaviate.util import generate_uuid5

from sklearn.preprocessing import MinMaxScaler


ENVIRONMENT = os.environ.get("ENVIRONMENT", "dev")
S3_SCRAPER_BUCKET = CONFIGS["s3_scraper_bucket"]
GAME_CONFIGS = CONFIGS["games"]
RATINGS_CONFIGS = CONFIGS["ratings"]
IS_LOCAL = True if os.environ.get("IS_LOCAL", "False").lower() == "true" else False

In [152]:


class WeaviateClient(BaseModel):
    model_config = ConfigDict(arbitrary_types_allowed=True)
    weaviate_client: weaviate.client = None
    collection: weaviate.collections.Collection = None

    def model_post_init(self, __context):
        self.weaviate_client = self.connect_weaviate_client_docker()

    def connect_weaviate_client_docker(self) -> weaviate.client:
        if not IS_LOCAL:
            client = weaviate.connect_to_local(
                host="127.0.0.1",
                port=8081,
                grpc_port=50051,
                headers={
                    "X-OpenAI-Api-Key": os.environ["OPENAI_API_KEY"],
                },
            )
            return client

        return weaviate.connect_to_local(
            port=8081,
            headers={
                "X-OpenAI-Api-Key": os.environ["OPENAI_API_KEY"],
            },
        )
    
    def find_near_objects(self, collection_name, uuid, limit:int =20):
        self.collection = self.weaviate_client.collections.get(collection_name)
        response = self.collection.query.near_object(
            near_object=uuid,
            limit=limit,
            return_metadata=MetadataQuery(distance=True),
        )
        return response.objects
    
    def close_client(self):
        self.weaviate_client.close()
    
    def create_collection(self, collection_name: str, reset=True, use_about=True, attributes:list=[]) -> None:

        if self.weaviate_client.collections.exists(collection_name):
            print("Collection already exists for this block")
            if reset:
                self.weaviate_client.collections.delete(collection_name)
                print("Deleted and recreating collection")
            return
        
        build_properties = [
                wvc.config.Property(
                    name="bggid",
                    data_type=wvc.config.DataType.TEXT,
                    skip_vectorization=True,
                    vectorize_property_name=False,
                ),
                wvc.config.Property(
                    name="name",
                    data_type=wvc.config.DataType.TEXT,
                    skip_vectorization=True,
                    vectorize_property_name=False,
                ),
            ]
        if use_about:
            build_properties.append(wvc.config.Property(name="about", data_type=wvc.config.DataType.TEXT))
        if len(attributes):
            build_properties+=[wvc.config.Property(
                    name=x, data_type=wvc.config.DataType.NUMBER, vectorize_property_name=False, skip_vectorization=True
                ) for x in attributes]


        self.weaviate_client.collections.create(
            name=collection_name,
            vectorizer_config=[
                Configure.NamedVectors.text2vec_transformers(
                    name="title_vector",
                    source_properties=["title"],
                )
            ],
            properties=build_properties,
        )

    def add_collection_item(self, item:pd.Series, collection_name:str, use_about=True, attributes:list=[]) -> None:

        self.collection = self.weaviate_client.collections.get(collection_name)

        game_object = {
            "bggid": str(item["bggid"]),
            "name": str(item["name"]).lower()
            }

        if use_about:
            game_object.update({"about": str(item["about"]).lower()})
        if len(attributes):
            game_object.update({x: float(item[x]) for x in attributes})
        
        uuid = generate_uuid5(game_object)

        # if self.collection.data.exists(uuid):
        #     return uuid
        # else:
        #     self.collection.data.insert(properties=game_object, uuid=uuid)
        
        self.collection.data.insert(properties=game_object, uuid=uuid)

        return uuid

    def close_client(self):
        self.weaviate_client.close()


weaviate_client = WeaviateClient()

  warn(
            Please make sure to close the connection using `client.close()`.


### Content Similarity

In [153]:
name_df = pd.read_pickle("data/prod/games/game_dfs_clean/games_clean.pkl")
name_df = name_df[['BGGId','Name']]
df = pd.read_pickle("top_1000_cleaned_rag_with_ratings_extrap.pkl")
df = name_df.merge(df, on="BGGId", how="inner")
df = df.drop(columns=["Description","Positive_Components","Negative_Components","Positive_Sentences","Negative_Sentences"])
df = df.rename(columns={x:x.lower().replace(" ","_") for x in df.columns.tolist()})
df = df.fillna(0)
small_df = df[['bggid',	'name',	'about']]
del name_df
gc.collect()



128

### Match Keywords Only

In [None]:
collection_name = "attributes_only"

In [None]:
positive_columns = [x for x in df.columns if x.startswith('positive_')]
negative_columns = [x for x in df.columns if x.startswith('negative_')]

all_columns_to_produce = positive_columns + negative_columns

scaler = MinMaxScaler(feature_range=(0,.3))

for col in all_columns_to_produce:
    df[col] = scaler.fit_transform(df[[col]])

all_columns_to_produce[:5]

['positive_strategic_depth',
 'positive_excellent_design',
 'positive_player_interaction',
 'positive_replayability',
 'positive_theme_integration']

In [None]:
weaviate_client.connect_weaviate_client_docker()
weaviate_client.create_collection(collection_name=collection_name, reset=True, use_about=False, attributes=all_columns_to_produce)

df['UUID'] = df.apply(lambda x: weaviate_client.add_collection_item(item=x, collection_name=collection_name, use_about=False, attributes=all_columns_to_produce), axis=1)

            Please make sure to close the connection using `client.close()`.


Collection already exists for this block
Adding data for game 1
Adding data for game 100423
Adding data for game 100901
Adding data for game 101721
Adding data for game 102652
Adding data for game 102680
Adding data for game 102794
Adding data for game 103343
Adding data for game 1035
Adding data for game 103885
Adding data for game 103886
Adding data for game 104006
Adding data for game 104162
Adding data for game 105134
Adding data for game 10547
Adding data for game 105551
Adding data for game 10630
Adding data for game 107529
Adding data for game 108687
Adding data for game 108745
Adding data for game 109276
Adding data for game 11
Adding data for game 110277
Adding data for game 110327
Adding data for game 111341
Adding data for game 11170
Adding data for game 111799
Adding data for game 113294
Adding data for game 113924
Adding data for game 115746
Adding data for game 116998
Adding data for game 117915
Adding data for game 117959
Adding data for game 118
Adding data for game 118

  df['UUID'] = df.apply(lambda x: weaviate_client.add_attribute_item(x, collection_name="sim_with_cols", features=all_columns_to_produce), axis=1)


In [None]:
uuid = small_df.loc[small_df['name']=='Pandemic Legacy: Season 1']['UUID'].values[0]
uuid = small_df.loc[small_df['name']=='Gloomhaven']['UUID'].values[0]
uuid = small_df.loc[small_df['name']=='Great Western Trail']['UUID'].values[0]
similars = weaviate_client.find_near_objects(collection_name=collection_name, uuid=uuid, limit=20)

picks = {}

for item in similars:
    picks[str(item.uuid)]=item.metadata.distance
    
picks = pd.DataFrame.from_dict(picks, columns=['distance'], orient='index').sort_values(by='distance', ascending=True).reset_index().rename(columns={'index':'UUID'})

picks = picks.merge(small_df, on='UUID', how='inner')[1:]
picks.head()

### Without attributes

In [154]:
collection_name = "similarities"

small_df.head()

Unnamed: 0,bggid,name,about
0,1,Die Macher,This game revolves around political strategy a...
1,100423,Elder Sign,This game is a cooperative board game that imm...
2,100901,Flash Point: Fire Rescue,This game is a cooperative board game where pl...
3,101721,Mage Wars Arena,This game is a tactical strategy experience th...
4,102652,Sentinels of the Multiverse,This game is a cooperative card game where pla...


In [None]:
weaviate_client.connect_weaviate_client_docker()
weaviate_client.create_collection(collection_name=collection_name, reset=True, use_about=True)

small_df['UUID'] = small_df.apply(lambda x: weaviate_client.add_collection_item(item=x, collection_name=collection_name, use_about=True), axis=1)

            Please make sure to close the connection using `client.close()`.


Collection already exists for this block
Deleted and recreating collection


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  small_df['UUID'] = small_df.apply(lambda x: weaviate_client.add_collection_item(item=x, collection_name="similarities", use_about=True), axis=1)


In [None]:
uuid = small_df.loc[small_df['name']=='Pandemic Legacy: Season 1']['UUID'].values[0]
uuid = small_df.loc[small_df['name']=='Gloomhaven']['UUID'].values[0]
uuid = small_df.loc[small_df['name']=='Great Western Trail']['UUID'].values[0]
similars = weaviate_client.find_near_objects(collection_name=collection_name, uuid=uuid, limit=20)

picks = {}

for item in similars:
    picks[str(item.uuid)]=item.metadata.distance
    
picks = pd.DataFrame.from_dict(picks, columns=['distance'], orient='index').sort_values(by='distance', ascending=True).reset_index().rename(columns={'index':'UUID'})

picks = picks.merge(small_df, on='UUID', how='inner')[1:]
picks.head()

Unnamed: 0,UUID,distance,bggid,name,about
1,ea29573d-8e39-5502-b05b-dc639e8d0625,0.132097,341169,Great Western Trail: Second Edition,This game is a strategic board game that combi...
2,158b9223-9652-5128-9d4d-f1cb28fa1c50,0.149783,364011,Great Western Trail: Argentina,This game is a strategic board game that revol...
3,32417ccd-d076-5be2-9392-a907dcb7f1c4,0.208928,380607,Great Western Trail: New Zealand,This game is a strategic board game that revol...
4,b2c4cca7-ba95-539d-8fba-f3c6c139dc8a,0.286462,140620,Lewis & Clark: The Expedition,This game is a strategic adventure set during ...
5,938b010e-c5da-5692-beb4-bf07e0cb0c20,0.29101,390092,Ticket to Ride Legacy: Legends of the West,This game is a legacy-style board game that bu...


### With all attributes

In [159]:
collection_name = "all_attributes"
df.head()

Unnamed: 0,bggid,name,about,positive_strategic_depth,positive_excellent_design,positive_player_interaction,positive_replayability,positive_theme_integration,positive_component_quality,negative_steep_learning_curve,...,positive_attractive_artwork,negative_mixed_reception_guilds,negative_catch_mechanics,negative_activity_vs,negative_action_limitations,negative_difficulty_levels,positive_fast_playtime,positive_variety_guilds,positive_simple_ruleset,positive_theme_artwork
0,1,Die Macher,This game revolves around political strategy a...,0.3,0.225984,0.203326,0.262119,0.0,0.204429,0.25235,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,100423,Elder Sign,This game is a cooperative board game that imm...,0.124745,0.192873,0.036662,0.250372,0.0,0.274214,0.21399,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,100901,Flash Point: Fire Rescue,This game is a cooperative board game where pl...,0.101115,0.200008,0.136261,0.239957,0.0,0.233217,0.217765,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,101721,Mage Wars Arena,This game is a tactical strategy experience th...,0.3,0.225984,0.126963,0.3,0.0,0.211963,0.261472,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,102652,Sentinels of the Multiverse,This game is a cooperative card game where pla...,0.072572,0.196492,0.180532,0.3,0.0,0.213759,0.224743,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [160]:
positive_columns = [x for x in df.columns if x.startswith('positive_')]
negative_columns = [x for x in df.columns if x.startswith('negative_')]

all_columns_to_produce = positive_columns + negative_columns

scaler = MinMaxScaler(feature_range=(0,.3))

for col in all_columns_to_produce:
    df[col] = scaler.fit_transform(df[[col]])

all_columns_to_produce[:5]

['positive_strategic_depth',
 'positive_excellent_design',
 'positive_player_interaction',
 'positive_replayability',
 'positive_theme_integration']

In [None]:
weaviate_client.connect_weaviate_client_docker()
weaviate_client.create_collection(collection_name=collection_name, reset=True, use_about=True, attributes=all_columns_to_produce)

df['UUID'] = df.apply(lambda x: weaviate_client.add_collection_item(item=x, collection_name=collection_name, use_about=True, attributes=all_columns_to_produce), axis=1)

            Please make sure to close the connection using `client.close()`.


Collection already exists for this block
Deleted and recreating collection


  raise KeyError("{!r} is not registered".format(fileobj)) from None


In [None]:
uuid = df.loc[df['name']=='Pandemic Legacy: Season 1']['UUID'].values[0]
uuid = df.loc[df['name']=='Gloomhaven']['UUID'].values[0]
similars = weaviate_client.find_near_objects(collection_name="sim_with_cols", uuid=uuid, limit=20)

picks = {}

for item in similars:
    picks[str(item.uuid)]=item.metadata.distance
    
picks = pd.DataFrame.from_dict(picks, columns=['distance'], orient='index').sort_values(by='distance', ascending=True).reset_index().rename(columns={'index':'UUID'})

picks = picks.merge(df, on='UUID', how='inner')
picks.head()

Unnamed: 0,UUID,distance,bggid,name,about,positive_strategic_depth,positive_excellent_design,positive_player_interaction,positive_replayability,positive_theme_integration,...,positive_attractive_artwork,negative_mixed_reception_guilds,negative_catch_mechanics,negative_activity_vs,negative_action_limitations,negative_difficulty_levels,positive_fast_playtime,positive_variety_guilds,positive_simple_ruleset,positive_theme_artwork
0,c2ea3335-f770-5cc1-898f-61a2193dc267,-1.192093e-07,174430,Gloomhaven,This game is a cooperative tactical adventure ...,0.190533,0.200112,0.043876,0.3,0.202031,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,ef72821f-720b-5ed9-ad41-c447ce8d5232,0.1613314,295770,Frosthaven,This game is a cooperative dungeon crawler tha...,0.035811,0.203091,0.101444,0.15705,0.184245,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,bf26cc82-5a30-57ad-ac2c-84d1924b5d6e,0.1843603,291457,Gloomhaven: Jaws of the Lion,This game is a cooperative dungeon crawler tha...,0.096801,0.0,0.115204,0.3,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,a0417099-488b-5240-8ac7-1676a24fad7b,0.1920178,150997,Shadows of Brimstone: Swamps of Death,This game is a cooperative dungeon crawler set...,0.101757,0.0,0.126402,0.3,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,5c545481-f11b-5fa1-9e57-a5ab38328cb7,0.1999224,286063,The 7th Citadel,This game is a narrative-driven adventure that...,0.074391,0.185954,0.036562,0.201123,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,6fe7499a-36a7-5224-b201-be667a83b18d,0.20143,215341,Thunderstone Quest,This game is a blend of dungeon crawling and d...,0.3,0.193409,0.083338,0.092307,0.184493,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,2ad762c4-cf07-5b23-9e43-9744f16c6de2,0.203619,322708,Descent: Legends of the Dark,This game is a cooperative dungeon crawler tha...,0.159903,0.0,0.133423,0.098665,0.217382,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0d9ccd0b-d770-5f8b-9ee5-6ce0b8f69b16,0.2103726,181521,Warhammer Quest: The Adventure Card Game,This game is a cooperative dungeon crawler tha...,0.050316,0.0,0.048488,0.3,0.180391,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,6c5cd774-ee3f-5f1d-9f63-5fa34a4b7015,0.2136629,254708,Roll Player Adventures,This game is a cooperative adventure that comb...,0.129097,0.190114,0.147819,0.3,0.230147,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,d09f1c50-76a5-5ec0-b0b4-9e4f95019e8f,0.2153109,181530,Runebound (Third Edition),This game is a fantasy-themed adventure board ...,0.066694,0.196787,0.139457,0.3,0.224003,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
