## Importing of data

In [2]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

df = pd.read_csv('data/heroes_updated.csv')
df_hero = pd.read_csv('data/batch1.csv') # needs to be replaced by full draft dataset
df_match_history = pd.read_csv('data/toBe7_process_FulldraftWinLose.csv') # needs to be replaced by full match history dataset
print(df.head())        
print(len(df))          
print(df.columns)  

   id     name         roles  specialities        lane possible_lanes  \
0   1     Miya      Marksman   Reap,Damage  Gold Laner            NaN   
1   2  Balmond       Fighter  Damage,Regen     Jungler      Exp Laner   
2   3    Saber      Assassin   Charge,Reap      Roamer        Jungler   
3   4    Alice     Mage,Tank  Charge,Regen   Exp Laner        Jungler   
4   5     Nana  Mage,Support    Poke,Burst   Mid Laner            NaN   

                                                icon  
0  https://static.wikia.nocookie.net/mobile-legen...  
1  https://static.wikia.nocookie.net/mobile-legen...  
2  https://static.wikia.nocookie.net/mobile-legen...  
3  https://static.wikia.nocookie.net/mobile-legen...  
4  https://static.wikia.nocookie.net/mobile-legen...  
131
Index(['id', 'name', 'roles', 'specialities', 'lane', 'possible_lanes',
       'icon'],
      dtype='object')


## Preprocessing of data
1. df_hero_id to show id and name only for encoding_state
2. df_role to show name and role 
3. df_specialty to show name and specialty
4. df_lane to show name and lane/possible lanes

In [3]:
df_hero_to_id = {row['name']: row['id'] - 1 for _, row in df.iterrows()}
df_id_to_hero = {v: k for k, v in df_hero_to_id.items()}
df_role = {row['name']: row['roles'] for _, row in df.iterrows()}
df_specialities = {row['name']: row['specialities'] for _, row in df.iterrows()}
df_lane = {row['name']: row['lane'] for _, row in df.iterrows()}

# Example usage:
print(df_hero_to_id["Saber"])
print(df_id_to_hero[2])
print(df_role["Saber"])
print(df_specialities["Saber"])
print(df_lane["Saber"])

# Total Dimension / Available Heroes
print(len(df_hero_to_id))  

2
Saber
Assassin
Charge,Reap
Roamer
131


## Banned Hero Function

In [4]:
import random

# Scraped from mobilelegends.com/rank
# 2/17/2026
BAN_RATES = {
    "Gloo": 81.00,        "Sora": 67.09,        "Aamon": 41.63,
    "Helcurt": 37.93,     "Freya": 35.15,       "Yi Sun-shin": 33.03,
    "Alice": 30.55,       "Estes": 28.16,       "Diggie": 24.59,
    "Saber": 24.27,       "Floryn": 22.42,      "Hayabusa": 21.41,
    "Leomord": 20.30,     "Fredrinn": 20.22,    "Angela": 18.43,
    "Hilda": 18.22,       "Lancelot": 13.79,    "Guinevere": 13.37,
    "Ixia": 13.28,        "Sun": 12.69,         "Yu Zhong": 11.67,
    "Gusion": 11.23,      "Franco": 11.05,      "Granger": 10.72,
    "Grock": 10.12,       "Zetian": 8.80,       "X.Borg": 8.02,
    "Hanzo": 7.40,        "Minotaur": 7.40,     "Belerick": 7.05,
    "Tigreal": 6.85,      "Thamuz": 6.58,       "Minsitthar": 6.47,
    "Fanny": 6.45,        "Kadita": 6.15,       "Eudora": 6.02,
    "Lesley": 5.94,       "Lapu-Lapu": 5.45,    "Kalea": 5.01,
    "Cici": 4.97,         "Rafaela": 4.90,      "Chip": 4.61,
    "Hanabi": 4.57,       "Nana": 4.34,         "Yin": 4.29,
    "Zhuxin": 4.18,       "Claude": 4.16,       "Karrie": 4.12,
    "Harley": 3.50,       "Atlas": 3.37,        "Johnson": 3.29,
    "Obsidia": 3.29,      "Julian": 3.17,       "Chou": 2.85,
    "Miya": 2.68,         "Esmeralda": 2.48,    "Cyclops": 2.44,
    "Natalia": 2.38,      "Lolita": 2.25,       "Alucard": 2.21,
    "Akai": 2.20,         "Karina": 2.05,       "Joy": 1.86,
    "Argus": 1.84,        "Lukas": 1.80,        "Uranus": 1.72,
    "Vexana": 1.71,       "Silvanna": 1.67,     "Badang": 1.66,
    "Khufra": 1.64,       "Carmilla": 1.50,     "Phoveus": 1.47,
    "Arlott": 1.45,       "Alpha": 1.43,        "Pharsa": 1.39,
    "Layla": 1.36,        "Selena": 1.33,       "Benedetta": 1.29,
    "Suyou": 1.17,        "Kaja": 1.16,         "Clint": 1.16,
    "Valir": 1.11,        "Hylos": 1.07,        "Kagura": 1.03,
    "Gatotkaca": 1.02,    "Melissa": 0.99,      "Chang'e": 0.95,
    "Mathilda": 0.95,     "Kimmy": 0.91,        "Lylia": 0.90,
    "Ruby": 0.80,         "Zilong": 0.75,       "Faramis": 0.73,
    "Wanwan": 0.71,       "Irithel": 0.66,      "Odette": 0.65,
    "Martis": 0.57,       "Aldous": 0.56,       "Cecilion": 0.54,
    "Dyrroth": 0.54,      "Valentina": 0.53,    "Nolan": 0.52,
    "Lunox": 0.51,        "Khaleed": 0.50,      "Ling": 0.46,
    "Brody": 0.46,        "Xavier": 0.44,       "Natan": 0.42,
    "Jawhead": 0.37,      "Popol and Kupa": 0.37, "Paquito": 0.35,
    "Terizla": 0.35,      "Gord": 0.34,         "Yve": 0.33,
    "Bane": 0.33,         "Masha": 0.32,        "Zhask": 0.32,
    "Balmond": 0.32,      "Baxia": 0.30,        "Aurora": 0.30,
    "Vale": 0.29,         "Beatrix": 0.29,      "Moskov": 0.27,
    "Aulus": 0.23,        "Novaria": 0.22,      "Roger": 0.17,
    "Barats": 0.14,       "Luo Yi": 0.14,       "Edith": 0.14,
    "Bruno": 0.11,        "Harith": 0.11,
}
# Draft progression: {state: (ally_pick_count, enemy_pick_count)}
DRAFT_STATES = {
    'FP1': (0, 0),   # First pick, no picks yet(picking 1)
    'FP2': (1, 2),   # First pick, ally took 1(picking 2), enemy took 2
    'FP3': (3, 4),   # First pick, ally took 3(picking 2), enemy took 4
    'SP1': (0, 1),   # Second pick, enemy took 1(picking 2)
    'SP2': (2, 3),   # Second pick, ally took 2(picking 2), enemy has 3
    'SP3': (4, 5),   # Second pick, ally took 2(picking 1), enemy has 5
}

HERO_POOL = list(df_hero_to_id)   # 131 heroes
print(f'Loaded {HERO_POOL} heroes.')

Loaded ['Miya', 'Balmond', 'Saber', 'Alice', 'Nana', 'Tigreal', 'Alucard', 'Karina', 'Akai', 'Franco', 'Bane', 'Bruno', 'Clint', 'Rafaela', 'Eudora', 'Zilong', 'Fanny', 'Layla', 'Minotaur', 'Lolita', 'Hayabusa', 'Freya', 'Gord', 'Natalia', 'Kagura', 'Chou', 'Sun', 'Alpha', 'Ruby', 'Yi Sun-shin', 'Moskov', 'Johnson', 'Cyclops', 'Estes', 'Hilda', 'Aurora', 'Lapu-Lapu', 'Vexana', 'Roger', 'Karrie', 'Gatotkaca', 'Harley', 'Irithel', 'Grock', 'Argus', 'Odette', 'Lancelot', 'Diggie', 'Hylos', 'Zhask', 'Helcurt', 'Pharsa', 'Lesley', 'Jawhead', 'Angela', 'Gusion', 'Valir', 'Martis', 'Uranus', 'Hanabi', "Chang'e", 'Kaja', 'Selena', 'Aldous', 'Claude', 'Vale', 'Leomord', 'Lunox', 'Hanzo', 'Belerick', 'Kimmy', 'Thamuz', 'Harith', 'Minsitthar', 'Kadita', 'Faramis', 'Badang', 'Khufra', 'Granger', 'Guinevere', 'Esmeralda', 'Terizla', 'X.Borg', 'Ling', 'Dyrroth', 'Lylia', 'Baxia', 'Masha', 'Wanwan', 'Silvanna', 'Carmilla', 'Cecilion', 'Atlas', 'Popol and Kupa', 'Yu Zhong', 'Luo Yi', 'Benedetta', 'Kha

In [5]:
import math

# Pre-compute softmax weights once (temperature controls spread)
# Higher temperature, more uniform  (less bias toward top heroes)
# Lower  temperature, more skewed   (top heroes dominate)
# T = 20 gives a good realistic middle ground
TEMPERATURE = 20.0

def _compute_softmax_weights(ban_rates: dict, temperature: float) -> list:
    heroes = list(ban_rates.keys())
    rates  = [ban_rates[h] for h in heroes]
    scaled = [r / temperature for r in rates]
    max_s  = max(scaled)
    exps   = [math.exp(s - max_s) for s in scaled]
    total  = sum(exps)
    return [e / total for e in exps]

SOFTMAX_WEIGHTS = _compute_softmax_weights(BAN_RATES, TEMPERATURE)


def getBannedHeroes() -> list:
    """
    Returns a list of 10 unique banned heroes sampled without replacement,
    weighted by real ban rates (softmax-scaled to avoid heavy bias).
    """

    keys = [
        (random.random() ** (1.0 / w), hero)
        for hero, w in zip(HERO_POOL, SOFTMAX_WEIGHTS)
    ]
    keys.sort(reverse=True)
    return [hero for weight, hero in keys[:10]]


sample = getBannedHeroes()
print('Sample call, getBannedHeroes():')
print(sample)

Sample call, getBannedHeroes():
['Miya', 'Ling', 'Balmond', 'Yin', 'Arlott', 'Moskov', 'Gord', 'Julian', 'Saber', 'Alice']


## Function getPick

In [6]:
def getPicks(banned_heroes, state):
    ally_needed, enemy_needed = DRAFT_STATES[state]

    banned_set = set(banned_heroes)

    found_valid_game = False
    while not found_valid_game:
        # Pick a random game from the CSV
        random_row = df_hero.sample(n=1).iloc[0]

        # Get all 5 heroes from each team
        ally_team = []
        for i in range(1, 6):  # Left_1 to Left_5
            hero = random_row[f'Left_{i}']
            ally_team.append(hero)

        enemy_team = []
        for i in range(1, 6):  # Right_1 to Right_5
            hero = random_row[f'Right_{i}']
            enemy_team.append(hero)

        # Remove banned heroes from both teams
        ally_available = []
        for hero in ally_team:
            if hero not in banned_set:
                ally_available.append(hero)

        enemy_available = []
        for hero in enemy_team:
            if hero not in banned_set:
                enemy_available.append(hero)

        # Check if we have enough heroes left
        if len(ally_available) >= ally_needed and len(enemy_available) >= enemy_needed:
            found_valid_game = True

    # Take only the number of picks we need
    ally_picks = []
    for i in range(ally_needed):
        ally_picks.append(ally_available[i])

    enemy_picks = []
    for i in range(enemy_needed):
        enemy_picks.append(enemy_available[i])

    return {
        'ally': ally_picks,
        'enemy': enemy_picks
    }

## Training Dataset Preprocess

### Helper Functions

In [7]:
def split_attr(val):
    if pd.isna(val):
        return []
    return [x.strip() for x in str(val).replace('/', ',').split(',')]

def clean_role(r):
    r = r.strip()
    if r == 'Supprot': return 'Support'
    if r == 'Jungle':  return None
    return r

def clean_lane(val):
    lane = str(val).strip()
    if lane == 'EXP Laner': return 'Exp Laner'
    return lane

In [8]:
print(df_match_history.head())
print(f"Total matches: {len(df_match_history)}")
pick_cols = [f'winpick{i}' for i in range(1, 6)] + [f'losepick{i}' for i in range(1, 6)]
for col in pick_cols:
    df_match_history[col] = df_match_history[col].str.strip().replace("Change", "Chang'e")
hero_names = set(df['name'].str.strip())
all_match_heroes = set()
for col in pick_cols:
    all_match_heroes.update(df_match_history[col].unique())
missing = all_match_heroes - hero_names
print(f"Unknown heroes: {missing}")  # Should be empty set()

hero_to_id = {row['name'].strip(): int(row['id']) - 1 for _, row in df.iterrows()}
id_to_hero = {v: k for k, v in hero_to_id.items()}
NUM_HEROES = len(hero_to_id)  # 131
print(f"Total heroes: {NUM_HEROES}")

hero_roles = {}
hero_specs = {}
hero_lanes = {}

for _, row in df.iterrows():
    name = row['name'].strip()
    hero_roles[name] = [clean_role(r) for r in split_attr(row['roles']) if clean_role(r)]
    hero_specs[name] = split_attr(row['specialities'])
    hero_lanes[name] = [clean_lane(row['lane'])]

# ── 6. Encoding categories ─────────────────────────────────────────────────────
ROLES = sorted(['Assassin', 'Fighter', 'Mage', 'Marksman', 'Support', 'Tank'])
SPECS = sorted(['Burst', 'Charge', 'Chase', 'Control', 'Crowd Control', 'Damage',
                'Finisher', 'Guard', 'Initiator', 'Magic Damage', 'Mixed Damage',
                'Poke', 'Push', 'Reap', 'Regen', 'Support'])
LANES = sorted(['Exp Laner', 'Gold Laner', 'Jungler', 'Mid Laner', 'Roamer'])

role_to_id = {r: i for i, r in enumerate(ROLES)}
spec_to_id = {s: i for i, s in enumerate(SPECS)}
lane_to_id = {l: i for i, l in enumerate(LANES)}

NUM_ROLES = len(ROLES)   # 6
NUM_SPECS = len(SPECS)   # 16
NUM_LANES = len(LANES)   # 5

print(f"Roles: {NUM_ROLES} | Specs: {NUM_SPECS} | Lanes: {NUM_LANES}")
print(f"Final vector size: {NUM_HEROES*2 + NUM_ROLES*2 + NUM_SPECS*2 + NUM_LANES*2}")  # 316

def encode_team(heroes):
    hero_vec = np.zeros(NUM_HEROES, dtype=np.float32)
    role_vec = np.zeros(NUM_ROLES,  dtype=np.float32)
    spec_vec = np.zeros(NUM_SPECS,  dtype=np.float32)
    lane_vec = np.zeros(NUM_LANES,  dtype=np.float32)
    for h in heroes:
        hero_vec[hero_to_id[h]] = 1.0
        for r in hero_roles.get(h, []):
            if r in role_to_id:
                role_vec[role_to_id[r]] = 1.0
        for s in hero_specs.get(h, []):
            if s in spec_to_id:
                spec_vec[spec_to_id[s]] = 1.0
        for l in hero_lanes.get(h, []):
            if l in lane_to_id:
                lane_vec[lane_to_id[l]] = 1.0
    return hero_vec, role_vec, spec_vec, lane_vec

# Each match → 2 samples:
#   Sample A: ally=win_team,  enemy=lose_team → label 1  (ally wins)
#   Sample B: ally=lose_team, enemy=win_team  → label 0  (ally loses)
X_win   = []
y_win   = []
skipped = 0

for _, row in df_match_history.iterrows():
    win_heroes  = [row[f'winpick{i}'].strip()  for i in range(1, 6)]
    lose_heroes = [row[f'losepick{i}'].strip() for i in range(1, 6)]

    if any(h not in hero_to_id for h in win_heroes + lose_heroes):
        skipped += 1
        continue

    win_h,  win_r,  win_s,  win_l  = encode_team(win_heroes)
    lose_h, lose_r, lose_s, lose_l = encode_team(lose_heroes)

    # Sample A: ally=win → label 1
    X_win.append(np.concatenate([win_h, lose_h, win_r, lose_r, win_s, lose_s, win_l, lose_l]))
    y_win.append(1.0)

    # Sample B: ally=lose → label 0
    X_win.append(np.concatenate([lose_h, win_h, lose_r, win_r, lose_s, win_s, lose_l, win_l]))
    y_win.append(0.0)

X_win = np.stack(X_win).astype(np.float32)  # (15808, 316)
y_win = np.array(y_win,  dtype=np.float32)  # (15808,)

print(f"\nSkipped : {skipped} matches")
print(f"Samples : {len(X_win)}")
print(f"X shape : {X_win.shape}")   # (15808, 316)
print(f"y shape : {y_win.shape}")   # (15808,)
print(f"Wins    : {int(y_win.sum())} | Losses: {int((1 - y_win).sum())}")

np.savez('data/draft_dataset_316.npz', X=X_win, y=y_win)
print("\nSaved: data/draft_dataset_316.npz")

def check_sample(idx):
    s = X_win[idx]
    ally_heroes  = [id_to_hero[i] for i in np.where(s[0:131]   == 1)[0]]
    enemy_heroes = [id_to_hero[i] for i in np.where(s[131:262] == 1)[0]]
    ally_roles   = [ROLES[i]      for i in np.where(s[262:268] == 1)[0]]
    enemy_roles  = [ROLES[i]      for i in np.where(s[268:274] == 1)[0]]
    ally_specs   = [SPECS[i]      for i in np.where(s[274:290] == 1)[0]]
    enemy_specs  = [SPECS[i]      for i in np.where(s[290:306] == 1)[0]]
    ally_lanes   = [LANES[i]      for i in np.where(s[306:311] == 1)[0]]
    enemy_lanes  = [LANES[i]      for i in np.where(s[311:316] == 1)[0]]
    print(f"\n─── Sample {idx} ───")
    print(f"Ally  heroes : {ally_heroes}")
    print(f"Enemy heroes : {enemy_heroes}")
    print(f"Ally  roles  : {ally_roles}")
    print(f"Enemy roles  : {enemy_roles}")
    print(f"Ally  specs  : {ally_specs}")
    print(f"Enemy specs  : {enemy_specs}")
    print(f"Ally  lanes  : {ally_lanes}")
    print(f"Enemy lanes  : {enemy_lanes}")
    print(f"Label        : {int(y_win[idx])}  (1=ally wins, 0=ally loses)")

check_sample(1)   # Match 1 — ally wins
check_sample(2)   # Match 1 — ally loses (flipped)

        id  winpick1   winpick2 winpick3   winpick4  winpick5 losepick1  \
0  Match_1    Claude       Gord   Martis    Paquito  Silvanna    Angela   
1  Match_2     Atlas  Lapu-Lapu   Lesley     Martis    Xavier     Estes   
2  Match_3     Atlas     Gusion   Karina    Melissa  Silvanna   Beatrix   
3  Match_4  Fredrinn     Kadita  Terizla  Valentina    Wanwan     Brody   
4  Match_5    Arlott     Lesley   Lolita     Pharsa  Silvanna    Aldous   

  losepick2 losepick3 losepick4  losepick5  
0     Brody     Edith      Ling     Pharsa  
1  Fredrinn    Moskov    Pharsa   Silvanna  
2     Fanny      Kaja    Lolita  Valentina  
3     Freya    Julian  Lancelot   Silvanna  
4    Franco  Fredrinn   Melissa      Valir  
Total matches: 9122
Unknown heroes: set()
Total heroes: 131
Roles: 6 | Specs: 16 | Lanes: 5
Final vector size: 316

Skipped : 0 matches
Samples : 18244
X shape : (18244, 316)
y shape : (18244,)
Wins    : 9122 | Losses: 9122

Saved: data/draft_dataset_316.npz

─── Sample 1 ───
Al

In [9]:
print(df['roles'].unique())
print(df['specialities'].unique())
print(df['lane'].unique())
print(df['possible_lanes'].unique())

['Marksman' 'Fighter' 'Assassin' 'Mage,Tank' 'Mage,Support' 'Tank,Support'
 'Fighter,Assassin' 'Fighter,Mage' 'Support' 'Mage' 'Assassin,Fighter'
 'Support,Tank' 'Fighter,Tank' 'Assassin,Marksman' 'Tank'
 'Fighter,Marksman' 'Tank,Fighter' 'Mage,Assassin' 'Marksman,Assassin'
 'Fighter,Support' 'Assassin,Mage' 'Marksman,Mage' 'Support,Mage'
 'Support,Assassin' 'Tank,Marksman' 'Support/Tank' 'Assassin/Fighter'
 'Supprot/Fighter' 'Fighter/Assassin']
['Reap,Damage' 'Damage,Regen' 'Charge,Reap' 'Charge,Regen' 'Poke,Burst'
 'Crowd Control' 'Chase,Damage' 'Reap,Magic Damage' 'Guard,Crowd Control'
 'Initiator,Control' 'Push,Burst' 'Reap,Burst' 'Regen,Guard'
 'Control,Burst' 'Chase,Reap' 'Chase,Burst' 'Poke,Reap' 'Chase,Control'
 'Push,Damage' 'Charge,Damage' 'Crowd Control,Regen' 'Reap,Chase'
 'Support,Crowd Control' 'Poke,Control' 'Crowd Control,Poke'
 'Crowd Control,Burst' 'Burst,Poke' 'Crowd Control,Initiator'
 'Charge,Burst' 'Guard,Poke' 'Guard,Initiator' 'Guard,Support'
 'Burst,Magic Damag

In [10]:
encode_team(['Saber'])

(array([0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32),
 array([1., 0., 0., 0., 0., 0.], dtype=float32),
 array([0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       dtype=float32),
 array([0., 0., 0., 0., 1.], dtype=float32))

## Function for Encoding State

In [11]:
def encode_state(ally_picks, enemy_picks):
    
    ally_h,  ally_r,  ally_s,  ally_l  = encode_team(ally_picks)
    enemy_h, enemy_r, enemy_s, enemy_l = encode_team(enemy_picks)

    state = np.concatenate([
        ally_h, enemy_h,   # heroes  (262)
        ally_r, enemy_r,   # roles    (12)
        ally_s, enemy_s,   # specs    (32)
        ally_l, enemy_l,   # lanes    (10)
    ])                     # total = 316 dimension of vectors

    return torch.tensor(state, dtype=torch.float32)

In [12]:
# Test encode_state with known heroes
ally_picks  = ['Yu Zhong', 'Fredrinn', 'Karrie', 'Akai']       # 3 ally picks
enemy_picks = ['Fanny', 'Gusion', 'Lesley']     # 3 enemy picks

state = encode_state(ally_picks, enemy_picks)
print(f"State shape: {state.shape}")  # Should be torch.Size([316])

# ── Slice and decode each section ─────────────────────────────────────────────
ally_h   = state[0:131]
enemy_h  = state[131:262]
ally_r   = state[262:268]
enemy_r  = state[268:274]
ally_s   = state[274:290]
enemy_s  = state[290:306]
ally_l   = state[306:311]
enemy_l  = state[311:316]

# Print decoded heroes
print("\n── Heroes ──")
print("Ally  :", [id_to_hero[i] for i in torch.where(ally_h  == 1)[0].tolist()])
print("Enemy :", [id_to_hero[i] for i in torch.where(enemy_h == 1)[0].tolist()])

# Print decoded roles
print("\n── Roles ──")
print("Ally  :", [ROLES[i] for i in torch.where(ally_r == 1)[0].tolist()])
print("Enemy :", [ROLES[i] for i in torch.where(enemy_r == 1)[0].tolist()])

# Print decoded specs
print("\n── Specialities ──")
print("Ally  :", [SPECS[i] for i in torch.where(ally_s == 1)[0].tolist()])
print("Enemy :", [SPECS[i] for i in torch.where(enemy_s == 1)[0].tolist()])

# Print decoded lanes
print("\n── Lanes ──")
print("Ally  :", [LANES[i] for i in torch.where(ally_l == 1)[0].tolist()])
print("Enemy :", [LANES[i] for i in torch.where(enemy_l == 1)[0].tolist()])

State shape: torch.Size([316])

── Heroes ──
Ally  : ['Akai', 'Karrie', 'Yu Zhong', 'Fredrinn']
Enemy : ['Fanny', 'Lesley', 'Gusion']

── Roles ──
Ally  : ['Fighter', 'Marksman', 'Support', 'Tank']
Enemy : ['Assassin', 'Marksman']

── Specialities ──
Ally  : ['Chase', 'Crowd Control', 'Damage', 'Guard', 'Reap', 'Regen']
Enemy : ['Burst', 'Chase', 'Magic Damage', 'Reap']

── Lanes ──
Ally  : ['Exp Laner', 'Gold Laner', 'Jungler', 'Roamer']
Enemy : ['Gold Laner', 'Jungler']


## Making of Neural Network (MODEL Deep Q-Learning)

#

In [13]:
# DQN Neural Network Starts here

from collections import deque
import random

# ── LOAD THE NPZ DATASET ───────────────────────────────────────────────────────
# Load the preprocessed 316-dim dataset we saved earlier
data  = np.load('data/draft_dataset_316.npz')
X_all = data['X']  # (15808, 316) — input vectors
y_all = data['y']  # (15808,)     — labels: 1=ally wins, 0=ally loses

print(f"Loaded dataset:")
print(f"  X shape : {X_all.shape}")
print(f"  y shape : {y_all.shape}")

Loaded dataset:
  X shape : (18244, 316)
  y shape : (18244,)


### Replay Buffer

In [14]:
NUM_SAMPLES       = len(X_all)
HEROES_PER_SAMPLE = 5
TOTAL_EXPERIENCES = NUM_SAMPLES * HEROES_PER_SAMPLE

print(f"Dataset samples    : {NUM_SAMPLES}")
print(f"Total experiences  : {TOTAL_EXPERIENCES}")

# ── REPLAY BUFFER ──────────────────────────────────────────────────────────────
class ReplayMemory:
    def __init__(self, capacity=TOTAL_EXPERIENCES):
        self.capacity = capacity   # max experiences to store
        self.memory   = []         # list of experiences
        self.position = 0          # circular pointer

    def insert(self, state, action, reward):
        # transition = one experience tuple
        transition = (
            state.unsqueeze(0),                              # (1, 316)
            torch.tensor([[action]], dtype=torch.long),      # (1, 1)
            torch.tensor([[reward]], dtype=torch.float32)    # (1, 1)
        )
        # If not full yet, expand the list
        if len(self.memory) < self.capacity:
            self.memory.append(None)
        # Overwrite at current position (circular)
        self.memory[self.position] = transition
        # Move pointer forward, wrap around when full
        self.position = (self.position + 1) % self.capacity

    def sample(self, batch_size=64):
        # Randomly sample batch_size experiences
        assert self.can_sample(batch_size)
        batch = random.sample(self.memory, batch_size)
        batch = zip(*batch)
        # cat joins all tensors in the batch together
        return [torch.cat(items) for items in batch]

    def can_sample(self, batch_size):
        # Only sample when buffer has at least 10x the batch size
        # Ensures enough diversity in each batch
        return len(self.memory) >= batch_size * 10

    def __len__(self):
        return len(self.memory)

# Initialize with auto maxlen
memory = ReplayMemory(capacity=TOTAL_EXPERIENCES)
print(f"Replay buffer capacity : {TOTAL_EXPERIENCES}")

Dataset samples    : 18244
Total experiences  : 91220
Replay buffer capacity : 91220


### Experience Buffer

In [15]:
# ── SHUFFLE FIRST so buffer gets mixed wins and losses ─────────────────────────
indices = list(range(len(X_all)))
random.shuffle(indices)  # ← shuffle so buffer doesn't fill with same label

total_experiences = 0

for i in indices:  # ← use shuffled indices instead of range(len(X_all))
    state  = torch.tensor(X_all[i], dtype=torch.float32)  # (316,)
    label  = float(y_all[i])                               # 1.0=win, 0.0=loss

    # Ally heroes occupy positions 0:131 in the state vector
    ally_indices = np.where(X_all[i][0:131] == 1)[0]

    for hero_idx in ally_indices:
        action = int(hero_idx)
        reward = label
        memory.insert(state, action, reward)
        total_experiences += 1

print(f"Total experiences pushed : {total_experiences}")
print(f"Replay buffer size       : {len(memory)}")

# ── SAMPLE ────────────────────────────────────────────────────────────────────
states, actions, rewards = memory.sample(batch_size=64)

# ── SANITY CHECK ──────────────────────────────────────────────────────────────
print("\nSampled experiences:")
win_count  = 0
lose_count = 0
for j in range(64):
    hero_name   = id_to_hero[actions[j].item()]
    reward      = rewards[j].item()
    ally_heroes = [id_to_hero[k] for k in torch.where(states[j][0:131] == 1)[0].tolist()]
    if reward == 1.0: win_count  += 1
    else:             lose_count += 1
    print(f"  [{j}] Hero: {hero_name:15s} | Reward: {reward} | Team: {ally_heroes}")

print(f"\nReward distribution in batch:")
print(f"  Wins   (1.0) : {win_count}")
print(f"  Losses (0.0) : {lose_count}")

Total experiences pushed : 91197
Replay buffer size       : 91197

Sampled experiences:
  [0] Hero: Lancelot        | Reward: 0.0 | Team: ['Clint', 'Layla', 'Yi Sun-shin', 'Vexana', 'Lancelot']
  [1] Hero: Franco          | Reward: 0.0 | Team: ['Franco', "Chang'e", 'Guinevere', 'Beatrix', 'Sora']
  [2] Hero: Freya           | Reward: 1.0 | Team: ['Freya', 'Esmeralda', 'Cecilion', 'Gloo', 'Novaria']
  [3] Hero: Kaja            | Reward: 0.0 | Team: ['Freya', 'Roger', 'Irithel', 'Kaja', 'Yve']
  [4] Hero: Melissa         | Reward: 0.0 | Team: ['Chou', 'Lancelot', 'Terizla', 'Melissa', 'Xavier']
  [5] Hero: Hilda           | Reward: 1.0 | Team: ['Franco', 'Kagura', 'Hilda', 'Granger', 'Brody']
  [6] Hero: Khufra          | Reward: 0.0 | Team: ['Akai', 'Khufra', 'Esmeralda', 'Beatrix', 'Valentina']
  [7] Hero: Wanwan          | Reward: 0.0 | Team: ['Gatotkaca', 'Lancelot', 'Khufra', 'Wanwan', 'Xavier']
  [8] Hero: Yi Sun-shin     | Reward: 1.0 | Team: ['Yi Sun-shin', 'Khufra', 'Brody', 'Yv

In [16]:
# ── DQN NEURAL NETWORK ─────────────────────────────────────────────────────────
class DQN(nn.Module):
    def __init__(self, input_dim=316, output_dim=131):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 256),  # 316 → 256
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 128),        # 256 → 128
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, output_dim)  # 128 → 131 Q-values
        )

    def forward(self, x):
        return self.net(x)  # returns Q-value for all 131 heroes

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model     = DQN(input_dim=316, output_dim=131).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()
print(f"Using device : {device}")
print(f"Model        : DQN(316 → 256 → 128 → 131)")

Using device : cpu
Model        : DQN(316 → 256 → 128 → 131)


In [17]:
NUM_EPOCHS = 100
BATCH_SIZE = 256

# How many batches to cover full buffer each epoch
STEPS_PER_EPOCH = len(memory) // BATCH_SIZE
print(f"Buffer size     : {len(memory)}")
print(f"Steps per epoch : {STEPS_PER_EPOCH}")  # 79040 // 256 = 308 steps

optimizer = optim.Adam(model.parameters(), lr=0.0001)
best_loss = float('inf')

for epoch in range(NUM_EPOCHS):
    model.train()

    epoch_loss    = 0.0
    epoch_correct = 0

    # ── Loop through full buffer each epoch ────────────────────────────────────
    for step in range(STEPS_PER_EPOCH):

        # 1. Sample random batch
        states, actions, rewards = memory.sample(BATCH_SIZE)
        states  = states.to(device)
        actions = actions.to(device)
        rewards = rewards.to(device)

        # 2. Forward pass
        q_values = model(states)               # (256, 131)
        q_action = q_values.gather(1, actions) # (256, 1)

        # 3. Target = reward directly
        target_q = rewards                     # (256, 1)

        # 4. Loss
        loss = criterion(q_action, target_q)

        # 5. Backprop
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Track loss and accuracy
        epoch_loss    += loss.item()
        predicted      = (q_action >= 0.5).float()
        epoch_correct += (predicted == rewards).sum().item()

    # ── Average metrics for this epoch ────────────────────────────────────────
    avg_loss = epoch_loss    / STEPS_PER_EPOCH
    accuracy = epoch_correct / (STEPS_PER_EPOCH * BATCH_SIZE) * 100

    # ── Validation every 10 epochs ─────────────────────────────────────────────
    if epoch % 10 == 0:
        model.eval()
        with torch.no_grad():
            val_states, val_actions, val_rewards = memory.sample(BATCH_SIZE)
            val_states  = val_states.to(device)
            val_actions = val_actions.to(device)
            val_rewards = val_rewards.to(device)

            val_q       = model(val_states).gather(1, val_actions)
            val_loss    = criterion(val_q, val_rewards)
            val_pred    = (val_q >= 0.5).float()
            val_correct = (val_pred == val_rewards).sum().item()
            val_acc     = val_correct / BATCH_SIZE * 100

        print(f"Epoch {epoch:3d}/{NUM_EPOCHS} | "
              f"Train Loss: {avg_loss:.4f}  Acc: {accuracy:.1f}% | "
              f"Val Loss: {val_loss.item():.4f}  Acc: {val_acc:.1f}%")

        if val_loss.item() < best_loss:
            best_loss = val_loss.item()
            torch.save(model.state_dict(), 'data/draft_model_best.pth')
            print(f"  ✅ Best model saved (val_loss={best_loss:.4f})")

# ── SAVE ───────────────────────────────────────────────────────────────────────
torch.save(model.state_dict(), 'data/draft_model.pth')
print(f"\nSaved : data/draft_model.pth")
print(f"Best  : data/draft_model_best.pth (val_loss={best_loss:.4f})")

Buffer size     : 91197
Steps per epoch : 356
Epoch   0/100 | Train Loss: 0.3334  Acc: 50.7% | Val Loss: 0.2597  Acc: 51.2%
  ✅ Best model saved (val_loss=0.2597)
Epoch  10/100 | Train Loss: 0.2127  Acc: 66.3% | Val Loss: 0.1927  Acc: 75.8%
  ✅ Best model saved (val_loss=0.1927)
Epoch  20/100 | Train Loss: 0.1315  Acc: 83.1% | Val Loss: 0.1089  Acc: 89.1%
  ✅ Best model saved (val_loss=0.1089)
Epoch  30/100 | Train Loss: 0.0849  Acc: 91.1% | Val Loss: 0.0445  Acc: 97.7%
  ✅ Best model saved (val_loss=0.0445)
Epoch  40/100 | Train Loss: 0.0635  Acc: 94.6% | Val Loss: 0.0256  Acc: 100.0%
  ✅ Best model saved (val_loss=0.0256)
Epoch  50/100 | Train Loss: 0.0519  Acc: 96.3% | Val Loss: 0.0150  Acc: 100.0%
  ✅ Best model saved (val_loss=0.0150)
Epoch  60/100 | Train Loss: 0.0447  Acc: 97.2% | Val Loss: 0.0085  Acc: 100.0%
  ✅ Best model saved (val_loss=0.0085)
Epoch  70/100 | Train Loss: 0.0395  Acc: 97.8% | Val Loss: 0.0086  Acc: 100.0%
Epoch  80/100 | Train Loss: 0.0354  Acc: 98.3% | Val 

## Recommendation Function using Trained DQN

### Load Model

In [18]:
# ── 1. LOAD THE TRAINED MODEL ──────────────────────────────────────────────────
model = DQN(input_dim=316, output_dim=131).to(device)
model.load_state_dict(torch.load('data/draft_model_best.pth'))
model.eval()  # ← switch to evaluation mode (disables dropout)
print("Model loaded!")


Model loaded!


In [19]:
def recommend(ally_picks, enemy_picks, banned, player_lanes=None, top_n=10):
    ally_heroes  = [p['hero'] for p in ally_picks]
    enemy_heroes = [p['hero'] for p in enemy_picks]

    # ── Build state ────────────────────────────────────────────────────────────
    state = encode_state(ally_heroes, enemy_heroes)
    with torch.no_grad():
        q_values = model(state.unsqueeze(0)).squeeze(0)  # (131,)

    # ── Unavailable ────────────────────────────────────────────────────────────
    unavailable = set(ally_heroes + enemy_heroes + banned)

    # ── Roles already on ally team ─────────────────────────────────────────────
    ally_role_set = set(r for h in ally_heroes for r in hero_roles.get(h, []))

    # ── Designated lanes — what's actually occupied ────────────────────────────
    ally_designated_set = set(
        p['lane'] for p in ally_picks if p.get('lane')
    )

    # ── Score ALL available heroes once ────────────────────────────────────────
    # Base scores computed once, then filtered per lane
    all_scored = []
    for hero, idx in hero_to_id.items():

        if hero in unavailable:
            continue

        score = q_values[idx].item()

        # Penalty — duplicate role
        for r in hero_roles.get(hero, []):
            if r in ally_role_set:
                score -= 0.3

        # Penalty — lane already designated by teammate
        for l in hero_lanes.get(hero, []):
            if l in ally_designated_set:
                score -= 0.3

        all_scored.append((hero, score))

    # ── Build separate top N per lane ──────────────────────────────────────────
    if player_lanes is None:
        # No restriction — return single top N list
        all_scored.sort(key=lambda x: x[1], reverse=True)
        return {
            'All': [
                {
                    'hero'  : hero,
                    'score' : round(score, 4),
                    'roles' : hero_roles.get(hero, []),
                    'lane'  : hero_lanes.get(hero, [])
                }
                for hero, score in all_scored[:top_n]
            ]
        }

    # ── Separate results per designated lane ───────────────────────────────────
    results = {}

    for lane in player_lanes:
        # Filter heroes that fit THIS specific lane
        lane_scored = [
            (hero, score)
            for hero, score in all_scored
            if lane in hero_lanes.get(hero, [])
        ]

        # Sort by score
        lane_scored.sort(key=lambda x: x[1], reverse=True)

        results[lane] = [
            {
                'hero'  : hero,
                'score' : round(score, 4),
                'roles' : hero_roles.get(hero, []),
                'lane'  : hero_lanes.get(hero, [])   
            }
            for hero, score in lane_scored[:top_n]
        ]
    
    return results


### Example Usage of the model

In [None]:
# ── EXAMPLE 1: Single lane ────────────────────────────────────────────────────
print("─── Single Lane: Gold Laner only ───")
results = recommend(
    ally_picks   = [{'hero': 'Granger', 'lane': 'Jungler'},
                    {'hero': 'Khufra', 'lane': 'Roamer'},
                    {'hero': 'Zetian', 'lane': 'Mid Laner'}],
    enemy_picks  = [{'hero': 'Fanny',   'lane': 'Jungler'},
                    {'hero': 'Atlas',   'lane': 'Roamer'},
                    {'hero': 'Karrie', 'lane': 'Gold Laner'},
                    {'hero': 'Esmeralda', 'lane': 'Exp Laner'}],
    banned       = getBannedHeroes(),
    player_lanes = ['Exp Laner', 'Gold Laner'],
    top_n        = 10
)

for lane, heroes in results.items():
    print(f"\n  {lane} recommendations:")
    for rank, r in enumerate(heroes, 1):
        print(f"    {rank:2}. {r['hero']:<20} {r['score']:.4f}  {r['roles']}")


# ── EXAMPLE 2: 2-pick phase — separate top 10 per lane ───────────────────────
print("\n─── 2-pick phase: Gold Laner + Exp Laner (separate top 10 each) ───")
results = recommend(
    ally_picks   = [{'hero': 'Gusion', 'lane': 'Mid Laner'},
                    {'hero': 'Atlas',   'lane': 'Roamer'}],
    enemy_picks  = [{'hero': 'Fanny',   'lane': 'Jungler'}],
    banned       = getBannedHeroes(),
    player_lanes = ['Jungler', 'Exp Laner'],  # ← 2 separate lists
    top_n        = 10
)
for lane, heroes in results.items():
    print(f"\n  {lane} recommendations (top {len(heroes)}):")
    for rank, r in enumerate(heroes, 1):
        print(f"    {rank:2}. {r['hero']:<20} {r['score']:.4f}  {r['roles']}")


# ── EXAMPLE 3: No lane restriction ────────────────────────────────────────────
print("\n─── No lane restriction ───")
results = recommend(
    ally_picks   = [],
    enemy_picks  = [],
    banned       = getBannedHeroes(),
    player_lanes = ['Gold Laner'],
    top_n        = 10
)
for lane, heroes in results.items():
    print(f"\n  {lane} recommendations:")
    for rank, r in enumerate(heroes, 1):
        print(f"    {rank:2}. {r['hero']:<20} {r['score']:.4f}  {r['lane']}")

─── Single Lane: Gold Laner only ───

  Exp Laner recommendations:
     1. Arlott               0.0580  ['Fighter', 'Assassin']
     2. Guinevere            0.0566  ['Fighter']
     3. Aldous               0.0524  ['Fighter']
     4. Thamuz               0.0516  ['Fighter']
     5. Martis               0.0414  ['Fighter']
     6. Freya                0.0371  ['Fighter']
     7. Sora                 0.0346  ['Fighter', 'Assassin']
     8. Silvanna             0.0334  ['Fighter']
     9. Sun                  0.0309  ['Fighter']
    10. Lapu-Lapu            0.0308  ['Fighter']

  Gold Laner recommendations:
     1. Lesley               0.0468  ['Marksman', 'Assassin']
     2. Ixia                 0.0466  ['Marksman']
     3. Brody                0.0446  ['Marksman']
     4. Layla                0.0444  ['Marksman']
     5. Melissa              0.0427  ['Marksman']
     6. Hanabi               0.0411  ['Marksman']
     7. Beatrix              0.0354  ['Marksman']
     8. Popol and Kupa    