In [47]:
import pandas as pd
import numpy as np
import json

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [6]:
df = pd.read_parquet('full_clash_battles_zstd.parquet').reset_index(drop=True)

In [31]:
keep, swap = train_test_split(df, test_size=0.5, random_state=42)
keep['Win'] = 1
swap['Win'] = 0
keep = keep[[col for col in keep.columns if '_id' in col or col == 'Win']]
swap = swap[[col for col in swap.columns if '_id' in col or col == 'Win']]
swap = swap[[f'loser_card_{i}_id' for i in range(1, 9)] + ['loser_tower_card_id'] + [f'winner_card_{i}_id' for i in range(1, 9)] + ['winner_tower_card_id'] + ['Win']]
stacked_data = np.vstack([keep.values, swap.values])
stacked = pd.DataFrame(stacked_data, columns = keep.columns).rename(columns=lambda x: x.replace('winner', 'player1')).rename(columns=lambda x: x.replace('loser', 'player2'))

In [34]:
with open('./dicts/card_mappings.json') as f:
    card_mappings = json.load(f)

for col in stacked.columns:
    if col.endswith('_id'):
        stacked[col] = stacked[col].astype(str).map(card_mappings)
stacked.head()

Unnamed: 0,player1_card_1_id,player1_card_2_id,player1_card_3_id,player1_card_4_id,player1_card_5_id,player1_card_6_id,player1_card_7_id,player1_card_8_id,player1_tower_card_id,player2_card_1_id,player2_card_2_id,player2_card_3_id,player2_card_4_id,player2_card_5_id,player2_card_6_id,player2_card_7_id,player2_card_8_id,player2_tower_card_id,Win
0,Mega Knight,Golem,Elite Barbarians,Goblin Barrel,Inferno Tower,Rocket,Poison,Balloon,Tower Princess,Skeleton Army,Musketeer,Baby Dragon,Valkyrie,Arrows,Witch,Mini P.E.K.K.A,Goblin Barrel,Tower Princess,1
1,Mega Knight,Wizard,Furnace,Boss Bandit,Goblin Barrel,Balloon,Rage,Skeleton Army,Royal Chef,Valkyrie,Royal Recruits,Bandit,The Log,Mega Knight,Musketeer,Arrows,Witch,Dagger Duchess,1
2,Skeletons,Bats,Little Prince,Mother Witch,P.E.K.K.A,Ice Wizard,Goblin Demolisher,Zap,Tower Princess,Valkyrie,Elite Barbarians,Prince,Hog Rider,Goblin Barrel,Skeleton Army,The Log,Musketeer,Tower Princess,1
3,Knight,Goblin Barrel,Ice Spirit,Goblin Gang,Inferno Tower,The Log,Princess,Rocket,Tower Princess,Mega Knight,Firecracker,Valkyrie,Goblin Gang,Magic Archer,The Log,Rocket,Witch,Dagger Duchess,1
4,The Log,Valkyrie,Tesla,Goblin Gang,Princess,Goblin Barrel,Ice Spirit,Rocket,Tower Princess,Tesla,Knight,X-Bow,Rocket,Ice Wizard,Skeletons,The Log,Tornado,Tower Princess,1


In [36]:
p1 = stacked[[col for col in stacked.columns if 'player1' in col]]
p2 = stacked[[col for col in stacked.columns if 'player2' in col]]

In [37]:
def ohe(i):
    melted = i.reset_index().melt(id_vars='index', value_name='card', var_name='slot')
    i = pd.get_dummies(melted.set_index('index')['card'], dtype = int)
    i = i.groupby(level=0).max()
    return i

p1 = ohe(p1)
p2 = ohe(p2)

In [41]:
combo = p1 - p2
combo.head()

Unnamed: 0_level_0,Archer Queen,Archers,Arrows,Baby Dragon,Balloon,Bandit,Barbarian Barrel,Barbarian Hut,Barbarians,Bats,...,Tornado,Tower Princess,Valkyrie,Void,Wall Breakers,Witch,Wizard,X-Bow,Zap,Zappies
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0,0,-1,-1,1,0,0,0,0,0,...,0,0,-1,0,0,-1,0,0,0,0
1,0,0,-1,0,1,-1,0,0,0,0,...,0,0,-1,0,0,-1,1,0,0,0
2,0,0,0,0,0,0,0,0,0,1,...,0,0,-1,0,0,0,0,0,1,0
3,0,0,0,0,0,0,0,0,0,0,...,0,1,-1,0,0,-1,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,-1,0,1,0,0,0,0,-1,0,0


In [62]:
from sklearn.cluster import KMeans  
kmeans = KMeans(n_clusters=100, random_state=42).fit(combo)  
centers = kmeans.cluster_centers_  

In [71]:
def vectorize_deck(cards, tower, combo_df):
    feature = pd.Series(0, index=combo_df.columns, dtype=int)
    card_list = cards + [tower]
    for card in card_list:
        if card in feature.index:
            feature.at[card] = 1
        else:
            print(f"Warning: '{card}' not found in combo_df.columns")
    return feature

def recommend_true_counters(v_query, centers, combo_df, n_keep=8):
    """
    Recommend the deck that most counters v_query by:
      1) Finding the cluster whose centroid c is most negative on v_query’s cards.
      2) Picking the 8 regular + 1 tower cards that minimize ||(v_cand – v_query) – c||².

    Parameters
    ----------
    v_query : array-like, shape (M,)
        One-hot vector (nine 1’s: 8 reg + 1 tower).
    centers : ndarray, shape (K, M)
        KMeans centroids fit on F = V_win – V_loss.
    combo_df : pd.DataFrame
        DataFrame whose columns are the M card names, same order as v_query.
    n_keep : int
        Number of regular cards to pick (default=8).

    Returns
    -------
    dict with keys:
      'regular_cards' : list of str
      'tower_card'    : str
      'deck_vector'   : ndarray, shape (M,)
    """
    # identify tower vs regular indices
    tower_names = ['Dagger Duchess', 'Tower Princess', 'Royal Chef']
    tower_idx = [combo_df.columns.get_loc(n) for n in tower_names]
    all_idx   = np.arange(len(combo_df.columns))
    regular_idx = [i for i in all_idx if i not in tower_idx]

    # which features are in our query deck?
    input_idx = np.where(v_query == 1)[0]

    # 1) pick cluster minimizing sum(c[input_idx])
    sums = centers[:, input_idx].sum(axis=1)    # shape (K,)
    best_k = int(np.argmin(sums))
    c = centers[best_k]

    # 2) compute the per-card weight = 1 - 2*v_query - 2*c
    weight = 1 - 2*v_query - 2*c

    # 3) choose tower with smallest weight among tower cards
    t_idx = tower_idx[int(np.argmin(weight[tower_idx]))]
    tower_choice = combo_df.columns[t_idx]

    # 4) choose n_keep regular cards with smallest weight
    regs = np.array(regular_idx)
    reg_weights = weight[regs]
    top_regs = regs[np.argsort(reg_weights)[:n_keep]]
    regular_choices = combo_df.columns[top_regs].tolist()

    # 5) assemble the one-hot for the counter deck
    v_cand = np.zeros_like(v_query, dtype=int)
    v_cand[top_regs] = 1
    v_cand[t_idx] = 1

    return {
        'regular_cards': regular_choices,
        'tower_card': tower_choice,
        'deck_vector': v_cand
    }


In [74]:
my_deck = ['Night Witch', 'Golem', 'Ice Spirit', 'Hog Rider', 'Skeleton Army', 'Valkyrie', 'Knight', 'Electro Wizard']
tower = 'Tower Princess'
v = vectorize_deck(my_deck, tower, combo)
rec = recommend_true_counters(v, centers, combo)
print("Recommended tower:", rec['tower_card'])
print("Recommended regular cards:", rec['regular_cards'])

Recommended tower: Tower Princess
Recommended regular cards: ['Skeleton Army', 'Knight', 'Electro Wizard', 'Night Witch', 'Golem', 'Valkyrie', 'Mega Knight', 'Ice Spirit']


  t_idx = tower_idx[int(np.argmin(weight[tower_idx]))]
  reg_weights = weight[regs]
