# Synergy - This Time We're Serious

In [None]:
from collections import defaultdict
from itertools import combinations

In [None]:
# Pre-filter column names
draw_columns = [col for col in games_df.columns if col.startswith("drawn_") or col.startswith("opening_hand_")]

def extract_drawn_cards(row):
    draft_id = row["draft_id"]
    seen_cards = set()
    won = int(row["won"])  # Convert to 1/0 without modifying games_df

    for col in draw_columns:
        if row[col] > 0:  # First check is cheap
            if col.startswith("drawn_"):
                card_name = col.split("_", 1)[1]
            elif col.startswith("opening_hand_"):
                card_name = col.split("_", 2)[2]
            else:
                continue  # Skip non-relevant columns

            if card_name not in basic_lands:  # Secondary check
                seen_cards.add(card_name)

    return draft_id, seen_cards, won

# Apply function to create final draws_df
draws_data = [extract_drawn_cards(row) for _, row in games_df.iterrows()]
draws_df = pd.DataFrame(draws_data, columns=["draft_id", "seen_cards", "won"])
#TODO change this to check for parquet first
draws_df = pd.read_parquet("draws_data.parquet")
#draws_df.to_parquet("draws_data.parquet", index=False)


In [None]:
# Get unique card names
all_cards = sorted({card for cards in draws_df["seen_cards"] for card in cards})
valid_cards = set(cards_df['name'].tolist())
# Create a MultiIndex for the synergy table
index = pd.MultiIndex.from_tuples([(card1, card2) for card1 in all_cards for card2 in all_cards if card1 != card2])

# Initialize the DataFrame with (win count, total games count) tuples
synergy_table = pd.DataFrame(index=index, columns=["wins_together", "games_together"]).fillna(0)

synergy_table.head() # empty, just to see structure

In [None]:
# Initialize synergy tracking
synergy_dict = defaultdict(lambda: [0, 0])  # { (card1, card2): [games_together, wins_together] }

def update_synergy_dict(row):
    """Efficiently accumulates synergy counts using a dictionary instead of direct DataFrame operations."""
    seen_cards = row["seen_cards"]
    won_game = row["won"]  # 1 for win, 0 for loss

    # Process all unique card pairs
    for card1, card2 in combinations(seen_cards, 2):
        synergy_dict[(card1, card2)][0] += 1  # Increment games_together
        synergy_dict[(card1, card2)][1] += won_game  # Increment wins_together if won

# Apply the function efficiently
draws_df.apply(update_synergy_dict, axis=1)

# Convert accumulated dictionary to DataFrame (fast batch conversion)
synergy_table = pd.DataFrame.from_dict(synergy_dict, orient="index", columns=["games_together", "wins_together"])
synergy_table.index = pd.MultiIndex.from_tuples(synergy_table.index, names=["card1", "card2"])
synergy_table["win_rate"] = synergy_table["wins_together"] / synergy_table["games_together"]
synergy_table.sort_values(by="win_rate", ascending=False).head(10)

In [None]:
synergy_table_filtered = synergy_table[synergy_table["games_together"] >= 10]
synergy_table_filtered.shape
synergy_table_filtered.to_csv("synergy_table.csv", index=True)
synergy_table_filtered.to_parquet("synergy_table.parquet", index=True)

synergy_table_filtered.sort_values(by="win_rate", ascending=False).head(10)

Some of these are probably just examples of "both of these cards are good/somewhat rare, and if you drew them both it's likely a win" even if they don't particuarly synergize. But some of these do make sense. Nocturnal Hunger + Ral allows you to get rid of something that might kill your planeswalker AND it boosts him. Fountainport generates blockers to keep Ral alive. 

But then some of these are just bizarre. Thornplate Intimidator and Shore Up? Neither are great individually, and they have no obvious synergy.

I'm thinking that if we raise the threshold any higher we do start losing some potential synergies. 

In [None]:
plt.figure(figsize=(10, 6))
plt.hist(synergy_table_filtered["games_together"], bins=50, edgecolor="black")
plt.xlabel("Games Together")
plt.ylabel("Frequency")
plt.title("Distribution of Games_Together")
plt.yscale("log")  # Log scale to highlight tail distributions
plt.show()


In [None]:
synergy_table_filtered.sort_values(by="wins_together", ascending=False).head(10)

In [None]:
#TODO we can probably remove this

# Sample 20 random rows
sampled_synergies = synergy_table_filtered.sample(20, random_state=42)

# Compute and print results
for (card1, card2), row in sampled_synergies.iterrows():
    card1_wr = gp_wr_map.get(card1, np.nan)  # Default to NaN if missing
    card2_wr = gp_wr_map.get(card2, np.nan)
    pair_wr = row["win_rate"]

    # Calculate synergy strength only if both individual win rates exist
    if np.isnan(card1_wr) or np.isnan(card2_wr):
        synergy_strength = np.nan
    else:
        expected_wr = (card1_wr + card2_wr) / 2
        synergy_strength = pair_wr - expected_wr

    print(f"{card1} + {card2}: Pair WR = {pair_wr:.3f}, C1WR = {card1_wr:.3f}, C2WR = {card2_wr:.3f}, SynStr = {synergy_strength:.3f}, Seen={row['games_together']}")


In [None]:
def synergy_score(row, gp_wr_map):
    """Calculates synergy strength and applies frequency-based adjustments."""
    
    card1, card2 = row.name  # Extract card pair from MultiIndex
    games_together = row["games_together"]
    pair_wr = row["win_rate"]

    # Get individual card win rates
    card1_wr = gp_wr_map.get(card1, np.nan)
    card2_wr = gp_wr_map.get(card2, np.nan)

    # Ensure both individual win rates exist
    if np.isnan(card1_wr) or np.isnan(card2_wr):
        return np.nan  # If either win rate is missing, return NaN

    # Calculate synergy strength
    expected_wr = (card1_wr + card2_wr) / 2
    synergy_strength = pair_wr - expected_wr

    # Apply frequency-based scaling
    if games_together >= 500:
        adjusted_syn_score = synergy_strength  # No change
    elif games_together >= 100:
        adjusted_syn_score = synergy_strength / 2  # Reduced impact
    else:
        adjusted_syn_score = np.sqrt(max(synergy_strength, 0) * 100) / 100  # Handle negatives safely

    return adjusted_syn_score

# Apply function safely
synergy_table_filtered.loc["synergy_score"] = synergy_table_filtered.apply(synergy_score, axis=1, args=(gp_wr_map,))
synergy_table_filtered.head()


In [None]:
synergy_table_filtered.sort_values(by="synergy_score", ascending=False).head(10)
# Still has a minor issue. If base win rate is 53%, drawing two 65% win rate cards
#  probably puts you closer to 75% win rate in a vacuum. Circle back