In [40]:
import pandas as pd
import json
import plotly.express as px
from sklearn.neighbors import NearestNeighbors
from concurrent.futures import ThreadPoolExecutor

In [46]:
diffs = pd.read_parquet('diffs.parquet')
df_vis = pd.read_parquet('df_vis.parquet')
df = pd.read_parquet('full_clash_battles_zstd.parquet').sample(100000, random_state=42).reset_index(drop=True)
winners = df[[col for col in df.columns if 'winner' in col and 'level' not in col]].copy().drop(columns = 'winner').rename(columns=lambda x: x.replace('winner', 'player'))
losers = df[[col for col in df.columns if 'loser' in col and 'level' not in col]].copy().drop(columns = 'loser').rename(columns=lambda x: x.replace('loser', 'player'))

In [47]:
with open('./dicts/card_mappings.json') as f:
    card_mappings = json.load(f)

def get_names(i):
    for col in i.columns:
        if col.endswith('_id'):
            i[col] = i[col].astype(str).map(card_mappings)
    return i

def ohe(i):
    melted = i.reset_index().melt(id_vars='index', value_name='card', var_name='slot')
    i = pd.get_dummies(melted.set_index('index')['card'], dtype = int)
    i = i.groupby(level=0).max()
    return i

winners = get_names(winners)
losers = get_names(losers)

In [21]:
df_vis['WinnerDeck'] = df_vis['WinnerDeck'].astype(str)
df_vis['LoserDeck'] = df_vis['LoserDeck'].astype(str)

In [44]:
def knn_cards(df, indices, common_set_size=3, num_threads=8):
    card_columns = [f'player_card_{i}_id' for i in range(1, 9)]
    
    def get_common_cards(idx):
        neighbor_indices = indices[idx]
        all_neighbor_cards = df.iloc[neighbor_indices][card_columns].values.flatten()
        card_counts = pd.Series(all_neighbor_cards).value_counts()
        return card_counts.head(common_set_size).index.tolist()
    
    with ThreadPoolExecutor(max_workers=num_threads) as executor:
        shared_cards_list = list(executor.map(get_common_cards, range(len(df))))
        
    return pd.Series([str(sorted(i)) for i in shared_cards_list])

def train_nn(n_neighbors):
    coordinates = pd.DataFrame(df_vis[['UMAP 1', 'UMAP 2']].values, columns=['x', 'y'])
    nn_model = NearestNeighbors(n_neighbors=n_neighbors, metric='euclidean')
    nn_model.fit(coordinates)
    _, indices = nn_model.kneighbors(coordinates)
    return indices

def get_only_card_type(df, card, col):
    return df[df_vis[col] == f"['{card}']"].copy() 

In [49]:
card_type = "Golden Knight"
card_df = get_only_card_type(df_vis, card_type, 'Loser1000')

indices = train_nn(100)
winner_common = knn_cards(get_only_card_type(winners, card_type, 'Loser1000'), indices, common_set_size=3)

IndexError: positional indexers are out-of-bounds

In [22]:

color_by = "WinnerDeck"
hover = ["WinCon", "Elixir"]
#hover = ['WinnerDeck', 'LoserDeck']

fig = px.scatter(
    card_df,
    x='UMAP 1',
    y='UMAP 2',
    color=color_by,           
    hover_data=hover,     
    title=f'UMAP Projection of {card_type} Decks',
    width=1200,
    height=800
)
fig.update_traces(marker={'size': 4, 'opacity': 0.6})
fig.show()