## Importing of data

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

df = pd.read_csv('data/heroes_updated.csv')
df_match_history = pd.read_csv('data/toBe7_process_FulldraftWinLose.csv') # needs to be replaced by full match history dataset
print(df.head())        
print(len(df))          
print(df.columns)  

   id     name         roles  specialities        lane possible_lanes  \
0   1     Miya      Marksman   Reap,Damage  Gold Laner            NaN   
1   2  Balmond       Fighter  Damage,Regen     Jungler      Exp Laner   
2   3    Saber      Assassin   Charge,Reap      Roamer        Jungler   
3   4    Alice     Mage,Tank  Charge,Regen   Exp Laner        Jungler   
4   5     Nana  Mage,Support    Poke,Burst   Mid Laner            NaN   

                                                icon  
0  https://static.wikia.nocookie.net/mobile-legen...  
1  https://static.wikia.nocookie.net/mobile-legen...  
2  https://static.wikia.nocookie.net/mobile-legen...  
3  https://static.wikia.nocookie.net/mobile-legen...  
4  https://static.wikia.nocookie.net/mobile-legen...  
131
Index(['id', 'name', 'roles', 'specialities', 'lane', 'possible_lanes',
       'icon'],
      dtype='object')


## Preprocessing of data
1. df_hero_id to show id and name only for encoding_state
2. df_role to show name and role 
3. df_specialty to show name and specialty
4. df_lane to show name and lane/possible lanes

In [69]:
df_hero_to_id = {row['name']: row['id'] - 1 for _, row in df.iterrows()}
df_id_to_hero = {v: k for k, v in df_hero_to_id.items()}
df_role = {row['name']: row['roles'] for _, row in df.iterrows()}
df_specialities = {row['name']: row['specialities'] for _, row in df.iterrows()}
df_lane = {row['name']: row['lane'] for _, row in df.iterrows()}

# Example usage:
print(df_hero_to_id["Saber"])
print(df_id_to_hero[2])
print(df_role["Saber"])
print(df_specialities["Saber"])
print(df_lane["Saber"])

# Total Dimension / Available Heroes
print(len(df_hero_to_id))  

2
Saber
Assassin
Charge,Reap
Roamer
131


## Banned Hero Function

In [70]:
import random

# Scraped from mobilelegends.com/rank
# 2/17/2026
BAN_RATES = {
    "Gloo": 81.00,        "Sora": 67.09,        "Aamon": 41.63,
    "Helcurt": 37.93,     "Freya": 35.15,       "Yi Sun-shin": 33.03,
    "Alice": 30.55,       "Estes": 28.16,       "Diggie": 24.59,
    "Saber": 24.27,       "Floryn": 22.42,      "Hayabusa": 21.41,
    "Leomord": 20.30,     "Fredrinn": 20.22,    "Angela": 18.43,
    "Hilda": 18.22,       "Lancelot": 13.79,    "Guinevere": 13.37,
    "Ixia": 13.28,        "Sun": 12.69,         "Yu Zhong": 11.67,
    "Gusion": 11.23,      "Franco": 11.05,      "Granger": 10.72,
    "Grock": 10.12,       "Zetian": 8.80,       "X.Borg": 8.02,
    "Hanzo": 7.40,        "Minotaur": 7.40,     "Belerick": 7.05,
    "Tigreal": 6.85,      "Thamuz": 6.58,       "Minsitthar": 6.47,
    "Fanny": 6.45,        "Kadita": 6.15,       "Eudora": 6.02,
    "Lesley": 5.94,       "Lapu-Lapu": 5.45,    "Kalea": 5.01,
    "Cici": 4.97,         "Rafaela": 4.90,      "Chip": 4.61,
    "Hanabi": 4.57,       "Nana": 4.34,         "Yin": 4.29,
    "Zhuxin": 4.18,       "Claude": 4.16,       "Karrie": 4.12,
    "Harley": 3.50,       "Atlas": 3.37,        "Johnson": 3.29,
    "Obsidia": 3.29,      "Julian": 3.17,       "Chou": 2.85,
    "Miya": 2.68,         "Esmeralda": 2.48,    "Cyclops": 2.44,
    "Natalia": 2.38,      "Lolita": 2.25,       "Alucard": 2.21,
    "Akai": 2.20,         "Karina": 2.05,       "Joy": 1.86,
    "Argus": 1.84,        "Lukas": 1.80,        "Uranus": 1.72,
    "Vexana": 1.71,       "Silvanna": 1.67,     "Badang": 1.66,
    "Khufra": 1.64,       "Carmilla": 1.50,     "Phoveus": 1.47,
    "Arlott": 1.45,       "Alpha": 1.43,        "Pharsa": 1.39,
    "Layla": 1.36,        "Selena": 1.33,       "Benedetta": 1.29,
    "Suyou": 1.17,        "Kaja": 1.16,         "Clint": 1.16,
    "Valir": 1.11,        "Hylos": 1.07,        "Kagura": 1.03,
    "Gatotkaca": 1.02,    "Melissa": 0.99,      "Chang'e": 0.95,
    "Mathilda": 0.95,     "Kimmy": 0.91,        "Lylia": 0.90,
    "Ruby": 0.80,         "Zilong": 0.75,       "Faramis": 0.73,
    "Wanwan": 0.71,       "Irithel": 0.66,      "Odette": 0.65,
    "Martis": 0.57,       "Aldous": 0.56,       "Cecilion": 0.54,
    "Dyrroth": 0.54,      "Valentina": 0.53,    "Nolan": 0.52,
    "Lunox": 0.51,        "Khaleed": 0.50,      "Ling": 0.46,
    "Brody": 0.46,        "Xavier": 0.44,       "Natan": 0.42,
    "Jawhead": 0.37,      "Popol and Kupa": 0.37, "Paquito": 0.35,
    "Terizla": 0.35,      "Gord": 0.34,         "Yve": 0.33,
    "Bane": 0.33,         "Masha": 0.32,        "Zhask": 0.32,
    "Balmond": 0.32,      "Baxia": 0.30,        "Aurora": 0.30,
    "Vale": 0.29,         "Beatrix": 0.29,      "Moskov": 0.27,
    "Aulus": 0.23,        "Novaria": 0.22,      "Roger": 0.17,
    "Barats": 0.14,       "Luo Yi": 0.14,       "Edith": 0.14,
    "Bruno": 0.11,        "Harith": 0.11,
}
# Draft progression: {state: (ally_pick_count, enemy_pick_count)}
DRAFT_STATES = {
    'FP1': (0, 0),   # First pick, no picks yet(picking 1)
    'FP2': (1, 2),   # First pick, ally took 1(picking 2), enemy took 2
    'FP3': (3, 4),   # First pick, ally took 3(picking 2), enemy took 4
    'SP1': (0, 1),   # Second pick, enemy took 1(picking 2)
    'SP2': (2, 3),   # Second pick, ally took 2(picking 2), enemy has 3
    'SP3': (4, 5),   # Second pick, ally took 2(picking 1), enemy has 5
}

HERO_POOL = list(df_hero_to_id)   # 131 heroes
print(f'Loaded {HERO_POOL} heroes.')

Loaded ['Miya', 'Balmond', 'Saber', 'Alice', 'Nana', 'Tigreal', 'Alucard', 'Karina', 'Akai', 'Franco', 'Bane', 'Bruno', 'Clint', 'Rafaela', 'Eudora', 'Zilong', 'Fanny', 'Layla', 'Minotaur', 'Lolita', 'Hayabusa', 'Freya', 'Gord', 'Natalia', 'Kagura', 'Chou', 'Sun', 'Alpha', 'Ruby', 'Yi Sun-shin', 'Moskov', 'Johnson', 'Cyclops', 'Estes', 'Hilda', 'Aurora', 'Lapu-Lapu', 'Vexana', 'Roger', 'Karrie', 'Gatotkaca', 'Harley', 'Irithel', 'Grock', 'Argus', 'Odette', 'Lancelot', 'Diggie', 'Hylos', 'Zhask', 'Helcurt', 'Pharsa', 'Lesley', 'Jawhead', 'Angela', 'Gusion', 'Valir', 'Martis', 'Uranus', 'Hanabi', "Chang'e", 'Kaja', 'Selena', 'Aldous', 'Claude', 'Vale', 'Leomord', 'Lunox', 'Hanzo', 'Belerick', 'Kimmy', 'Thamuz', 'Harith', 'Minsitthar', 'Kadita', 'Faramis', 'Badang', 'Khufra', 'Granger', 'Guinevere', 'Esmeralda', 'Terizla', 'X.Borg', 'Ling', 'Dyrroth', 'Lylia', 'Baxia', 'Masha', 'Wanwan', 'Silvanna', 'Carmilla', 'Cecilion', 'Atlas', 'Popol and Kupa', 'Yu Zhong', 'Luo Yi', 'Benedetta', 'Kha

In [71]:
import math

# Pre-compute softmax weights once (temperature controls spread)
# Higher temperature, more uniform  (less bias toward top heroes)
# Lower  temperature, more skewed   (top heroes dominate)
# T = 20 gives a good realistic middle ground
TEMPERATURE = 20.0

def _compute_softmax_weights(ban_rates: dict, temperature: float) -> list:
    heroes = list(ban_rates.keys())
    rates  = [ban_rates[h] for h in heroes]
    scaled = [r / temperature for r in rates]
    max_s  = max(scaled)
    exps   = [math.exp(s - max_s) for s in scaled]
    total  = sum(exps)
    return [e / total for e in exps]

SOFTMAX_WEIGHTS = _compute_softmax_weights(BAN_RATES, TEMPERATURE)


def getBannedHeroes() -> list:
    """
    Returns a list of 10 unique banned heroes sampled without replacement,
    weighted by real ban rates (softmax-scaled to avoid heavy bias).
    """

    keys = [
        (random.random() ** (1.0 / w), hero)
        for hero, w in zip(HERO_POOL, SOFTMAX_WEIGHTS)
    ]
    keys.sort(reverse=True)
    return [hero for weight, hero in keys[:10]]


sample = getBannedHeroes()
print('Sample call, getBannedHeroes():')
print(sample)

Sample call, getBannedHeroes():
['Bane', 'Miya', 'Alice', 'Obsidia', 'Franco', 'Joy', 'Balmond', 'Novaria', 'Karina', 'Yu Zhong']


## Training Dataset Preprocess

### Helper Functions

In [72]:
def split_attr(val):
    if pd.isna(val):
        return []
    return [x.strip() for x in str(val).replace('/', ',').split(',')]

def clean_role(r):
    r = r.strip()
    if r == 'Supprot': return 'Support'
    if r == 'Jungle':  return None
    return r

def clean_lane(val):
    lane = str(val).strip()
    if lane == 'EXP Laner': return 'Exp Laner'
    return lane

In [73]:
print(df_match_history.head())
print(f"Total matches: {len(df_match_history)}")
pick_cols = [f'winpick{i}' for i in range(1, 6)] + [f'losepick{i}' for i in range(1, 6)]
for col in pick_cols:
    df_match_history[col] = df_match_history[col].str.strip().replace("Change", "Chang'e")
hero_names = set(df['name'].str.strip())
all_match_heroes = set()
for col in pick_cols:
    all_match_heroes.update(df_match_history[col].unique())
missing = all_match_heroes - hero_names
print(f"Unknown heroes: {missing}")  # Should be empty set()

hero_to_id = {row['name'].strip(): int(row['id']) - 1 for _, row in df.iterrows()}
id_to_hero = {v: k for k, v in hero_to_id.items()}
NUM_HEROES = len(hero_to_id)  # 131
print(f"Total heroes: {NUM_HEROES}")

hero_roles = {}
hero_specs = {}
hero_lanes = {}

for _, row in df.iterrows():
    name = row['name'].strip()
    hero_roles[name] = [clean_role(r) for r in split_attr(row['roles']) if clean_role(r)]
    hero_specs[name] = split_attr(row['specialities'])
    hero_lanes[name] = [clean_lane(row['lane'])]

# ── 6. Encoding categories ─────────────────────────────────────────────────────
ROLES = sorted(['Assassin', 'Fighter', 'Mage', 'Marksman', 'Support', 'Tank'])
SPECS = sorted(['Burst', 'Charge', 'Chase', 'Control', 'Crowd Control', 'Damage',
                'Finisher', 'Guard', 'Initiator', 'Magic Damage', 'Mixed Damage',
                'Poke', 'Push', 'Reap', 'Regen', 'Support'])
LANES = sorted(['Exp Laner', 'Gold Laner', 'Jungler', 'Mid Laner', 'Roamer'])

role_to_id = {r: i for i, r in enumerate(ROLES)}
spec_to_id = {s: i for i, s in enumerate(SPECS)}
lane_to_id = {l: i for i, l in enumerate(LANES)}

NUM_ROLES = len(ROLES)   # 6
NUM_SPECS = len(SPECS)   # 16
NUM_LANES = len(LANES)   # 5

print(f"Roles: {NUM_ROLES} | Specs: {NUM_SPECS} | Lanes: {NUM_LANES}")
print(f"Final vector size: {NUM_HEROES*2 + NUM_ROLES*2 + NUM_SPECS*2 + NUM_LANES*2}")  # 316

def encode_team(heroes):
    hero_vec = np.zeros(NUM_HEROES, dtype=np.float32)
    role_vec = np.zeros(NUM_ROLES,  dtype=np.float32)
    spec_vec = np.zeros(NUM_SPECS,  dtype=np.float32)
    lane_vec = np.zeros(NUM_LANES,  dtype=np.float32)
    for h in heroes:
        hero_vec[hero_to_id[h]] = 1.0
        for r in hero_roles.get(h, []):
            if r in role_to_id:
                role_vec[role_to_id[r]] = 1.0
        for s in hero_specs.get(h, []):
            if s in spec_to_id:
                spec_vec[spec_to_id[s]] = 1.0
        for l in hero_lanes.get(h, []):
            if l in lane_to_id:
                lane_vec[lane_to_id[l]] = 1.0
    return hero_vec, role_vec, spec_vec, lane_vec

## Sample Usage:
# encode_team(['Saber'])

# Each match → 2 samples:
#   Sample A: ally=win_team,  enemy=lose_team → label 1  (ally wins)
#   Sample B: ally=lose_team, enemy=win_team  → label 0  (ally loses)
X_win   = []
y_win   = []
skipped = 0

for _, row in df_match_history.iterrows():
    win_heroes  = [row[f'winpick{i}'].strip()  for i in range(1, 6)]
    lose_heroes = [row[f'losepick{i}'].strip() for i in range(1, 6)]

    if any(h not in hero_to_id for h in win_heroes + lose_heroes):
        skipped += 1
        continue

    win_h,  win_r,  win_s,  win_l  = encode_team(win_heroes)
    lose_h, lose_r, lose_s, lose_l = encode_team(lose_heroes)

    # Sample A: ally=win → label 1
    X_win.append(np.concatenate([win_h, lose_h, win_r, lose_r, win_s, lose_s, win_l, lose_l]))
    y_win.append(1.0)

    # Sample B: ally=lose → label 0
    X_win.append(np.concatenate([lose_h, win_h, lose_r, win_r, lose_s, win_s, lose_l, win_l]))
    y_win.append(0.0)

X_win = np.stack(X_win).astype(np.float32)  
y_win = np.array(y_win,  dtype=np.float32)  

print(f"\nSkipped : {skipped} matches")
print(f"Samples : {len(X_win)}")
print(f"X shape : {X_win.shape}")  
print(f"y shape : {y_win.shape}")   
print(f"Wins    : {int(y_win.sum())} | Losses: {int((1 - y_win).sum())}")

np.savez('data/draft_dataset_316.npz', X=X_win, y=y_win)
print("\nSaved: data/draft_dataset_316.npz")

def check_sample(idx):
    s = X_win[idx]
    ally_heroes  = [id_to_hero[i] for i in np.where(s[0:131]   == 1)[0]]
    enemy_heroes = [id_to_hero[i] for i in np.where(s[131:262] == 1)[0]]
    ally_roles   = [ROLES[i]      for i in np.where(s[262:268] == 1)[0]]
    enemy_roles  = [ROLES[i]      for i in np.where(s[268:274] == 1)[0]]
    ally_specs   = [SPECS[i]      for i in np.where(s[274:290] == 1)[0]]
    enemy_specs  = [SPECS[i]      for i in np.where(s[290:306] == 1)[0]]
    ally_lanes   = [LANES[i]      for i in np.where(s[306:311] == 1)[0]]
    enemy_lanes  = [LANES[i]      for i in np.where(s[311:316] == 1)[0]]
    print(f"\n─── Sample {idx} ───")
    print(f"Ally  heroes : {ally_heroes}")
    print(f"Enemy heroes : {enemy_heroes}")
    print(f"Ally  roles  : {ally_roles}")
    print(f"Enemy roles  : {enemy_roles}")
    print(f"Ally  specs  : {ally_specs}")
    print(f"Enemy specs  : {enemy_specs}")
    print(f"Ally  lanes  : {ally_lanes}")
    print(f"Enemy lanes  : {enemy_lanes}")
    print(f"Label        : {int(y_win[idx])}  (1=ally wins, 0=ally loses)")

check_sample(1)   # Match 1 — ally wins
check_sample(2)   # Match 1 — ally loses (flipped)

        id  winpick1   winpick2 winpick3   winpick4  winpick5 losepick1  \
0  Match_1    Claude       Gord   Martis    Paquito  Silvanna    Angela   
1  Match_2     Atlas  Lapu-Lapu   Lesley     Martis    Xavier     Estes   
2  Match_3     Atlas     Gusion   Karina    Melissa  Silvanna   Beatrix   
3  Match_4  Fredrinn     Kadita  Terizla  Valentina    Wanwan     Brody   
4  Match_5    Arlott     Lesley   Lolita     Pharsa  Silvanna    Aldous   

  losepick2 losepick3 losepick4  losepick5  
0     Brody     Edith      Ling     Pharsa  
1  Fredrinn    Moskov    Pharsa   Silvanna  
2     Fanny      Kaja    Lolita  Valentina  
3     Freya    Julian  Lancelot   Silvanna  
4    Franco  Fredrinn   Melissa      Valir  
Total matches: 9122
Unknown heroes: set()
Total heroes: 131
Roles: 6 | Specs: 16 | Lanes: 5
Final vector size: 316

Skipped : 0 matches
Samples : 18244
X shape : (18244, 316)
y shape : (18244,)
Wins    : 9122 | Losses: 9122

Saved: data/draft_dataset_316.npz

─── Sample 1 ───
Al

In [74]:
print(df['roles'].unique())
print(df['specialities'].unique())
print(df['lane'].unique())
print(df['possible_lanes'].unique())

['Marksman' 'Fighter' 'Assassin' 'Mage,Tank' 'Mage,Support' 'Tank,Support'
 'Fighter,Assassin' 'Fighter,Mage' 'Support' 'Mage' 'Assassin,Fighter'
 'Support,Tank' 'Fighter,Tank' 'Assassin,Marksman' 'Tank'
 'Fighter,Marksman' 'Tank,Fighter' 'Mage,Assassin' 'Marksman,Assassin'
 'Fighter,Support' 'Assassin,Mage' 'Marksman,Mage' 'Support,Mage'
 'Support,Assassin' 'Tank,Marksman' 'Support/Tank' 'Assassin/Fighter'
 'Supprot/Fighter' 'Fighter/Assassin']
['Reap,Damage' 'Damage,Regen' 'Charge,Reap' 'Charge,Regen' 'Poke,Burst'
 'Crowd Control' 'Chase,Damage' 'Reap,Magic Damage' 'Guard,Crowd Control'
 'Initiator,Control' 'Push,Burst' 'Reap,Burst' 'Regen,Guard'
 'Control,Burst' 'Chase,Reap' 'Chase,Burst' 'Poke,Reap' 'Chase,Control'
 'Push,Damage' 'Charge,Damage' 'Crowd Control,Regen' 'Reap,Chase'
 'Support,Crowd Control' 'Poke,Control' 'Crowd Control,Poke'
 'Crowd Control,Burst' 'Burst,Poke' 'Crowd Control,Initiator'
 'Charge,Burst' 'Guard,Poke' 'Guard,Initiator' 'Guard,Support'
 'Burst,Magic Damag

## Function for Encoding State

In [75]:
def encode_state(ally_picks, enemy_picks):
    
    ally_h,  ally_r,  ally_s,  ally_l  = encode_team(ally_picks)
    enemy_h, enemy_r, enemy_s, enemy_l = encode_team(enemy_picks)

    state = np.concatenate([
        ally_h, enemy_h,   # heroes  (262)
        ally_r, enemy_r,   # roles    (12)
        ally_s, enemy_s,   # specs    (32)
        ally_l, enemy_l,   # lanes    (10)
    ])                     # total = 316 dimension of vectors

    return torch.tensor(state, dtype=torch.float32)

In [76]:
# Test encode_state with known heroes
ally_picks  = ['Yu Zhong', 'Fredrinn', 'Karrie', 'Akai']       # 3 ally picks
enemy_picks = ['Fanny', 'Gusion', 'Lesley']     # 3 enemy picks

state = encode_state(ally_picks, enemy_picks)
print(f"State shape: {state.shape}")  # Should be torch.Size([316])

# ── Slice and decode each section ─────────────────────────────────────────────
ally_h   = state[0:131]
enemy_h  = state[131:262]
ally_r   = state[262:268]
enemy_r  = state[268:274]
ally_s   = state[274:290]
enemy_s  = state[290:306]
ally_l   = state[306:311]
enemy_l  = state[311:316]

# Print decoded heroes
print("\n── Heroes ──")
print("Ally  :", [id_to_hero[i] for i in torch.where(ally_h  == 1)[0].tolist()])
print("Enemy :", [id_to_hero[i] for i in torch.where(enemy_h == 1)[0].tolist()])

# Print decoded roles
print("\n── Roles ──")
print("Ally  :", [ROLES[i] for i in torch.where(ally_r == 1)[0].tolist()])
print("Enemy :", [ROLES[i] for i in torch.where(enemy_r == 1)[0].tolist()])

# Print decoded specs
print("\n── Specialities ──")
print("Ally  :", [SPECS[i] for i in torch.where(ally_s == 1)[0].tolist()])
print("Enemy :", [SPECS[i] for i in torch.where(enemy_s == 1)[0].tolist()])

# Print decoded lanes
print("\n── Lanes ──")
print("Ally  :", [LANES[i] for i in torch.where(ally_l == 1)[0].tolist()])
print("Enemy :", [LANES[i] for i in torch.where(enemy_l == 1)[0].tolist()])

State shape: torch.Size([316])

── Heroes ──
Ally  : ['Akai', 'Karrie', 'Yu Zhong', 'Fredrinn']
Enemy : ['Fanny', 'Lesley', 'Gusion']

── Roles ──
Ally  : ['Fighter', 'Marksman', 'Support', 'Tank']
Enemy : ['Assassin', 'Marksman']

── Specialities ──
Ally  : ['Chase', 'Crowd Control', 'Damage', 'Guard', 'Reap', 'Regen']
Enemy : ['Burst', 'Chase', 'Magic Damage', 'Reap']

── Lanes ──
Ally  : ['Exp Laner', 'Gold Laner', 'Jungler', 'Roamer']
Enemy : ['Gold Laner', 'Jungler']


## Environment

In [77]:
PICK_ORDER = [0, 1, 1, 0, 0, 1, 1, 0, 0, 1]

class MLBBDraftEnv:
    def __init__(self, win_heroes, lose_heroes, fp_is_win: bool, perspective: int = 0):
        if fp_is_win:
            self.team     = {0: win_heroes,  1: lose_heroes}
            self.team_won = {0: True,         1: False}
        else:
            self.team     = {0: lose_heroes, 1: win_heroes}
            self.team_won = {0: False,        1: True}
        self.perspective = perspective
        self.reset()

    def reset(self):
        self.step_idx = 0
        self.picks    = {0: [], 1: []}
        return self._get_state()

    def _get_state(self):
        ally  = self.picks[self.perspective]
        enemy = self.picks[1 - self.perspective]
        return encode_state(ally, enemy)

    def step(self, hero):
        assert self.step_idx < 10, "Episode already finished."
        state  = self._get_state()
        picker = PICK_ORDER[self.step_idx]
        action = hero_to_id[hero]

        self.picks[picker].append(hero)
        self.step_idx += 1

        done       = (self.step_idx == 10)
        next_state = self._get_state()

        reward = 0.0
        if done:
            reward = 1.0 if self.team_won[self.perspective] else -1.0

        return state, action, reward, next_state, done

    def run_episode(self):  
        self.reset()
        transitions = []
        pick_counts = {0: 0, 1: 0}

        for step in range(10):
            picker = PICK_ORDER[step]
            hero   = self.team[picker][pick_counts[picker]]
            pick_counts[picker] += 1

            s, a, r, ns, done = self.step(hero)

            if picker == self.perspective:
                transitions.append((s, a, r, ns, done))

        return transitions

In [78]:
def build_transitions_from_dataset(df_match, both_perspectives=True):
    all_transitions = []
    skipped = 0

    for _, row in df_match.iterrows():
        win_heroes  = [row[f'winpick{i}'].strip() for i in range(1, 6)]
        lose_heroes = [row[f'losepick{i}'].strip() for i in range(1, 6)]

        if any(h not in hero_to_id for h in win_heroes + lose_heroes):
            skipped += 1
            continue

        fp_is_win    = random.random() < 0.5
        perspectives = [0, 1] if both_perspectives else [random.randint(0, 1)]

        for pov in perspectives:
            env = MLBBDraftEnv(win_heroes, lose_heroes, fp_is_win, perspective=pov)
            all_transitions.extend(env.run_episode())

    print(f"Matches processed : {len(df_match) - skipped}")
    print(f"Skipped           : {skipped}")
    print(f"Total transitions : {len(all_transitions)}")
    return all_transitions

transitions = build_transitions_from_dataset(df_match_history, both_perspectives=True)

Matches processed : 9122
Skipped           : 0
Total transitions : 91220


## Making of Neural Network

#

In [79]:
# from collections import deque
import random

# Load the preprocessed 316-dim dataset we saved earlier
data  = np.load('data/draft_dataset_316.npz')
X_all = data['X']  # (15808, 316) — input vectors
y_all = data['y']  # (15808,)     — labels: 1=ally wins, 0=ally loses

print(f"Loaded dataset:")
print(f"  X shape : {X_all.shape}")
print(f"  y shape : {y_all.shape}")

Loaded dataset:
  X shape : (18244, 316)
  y shape : (18244,)


### Replay Buffer

In [None]:
NUM_SAMPLES       = len(X_all)
HEROES_PER_SAMPLE = 5
TOTAL_EXPERIENCES = NUM_SAMPLES * HEROES_PER_SAMPLE

print(f"Dataset samples    : {NUM_SAMPLES}")
print(f"Total experiences  : {TOTAL_EXPERIENCES}")

STATE_DIM  = NUM_HEROES * 2 + NUM_ROLES * 2 + NUM_SPECS * 2 + NUM_LANES * 2  # 316
device     = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"State dim: {STATE_DIM} | Device: {device}")

class ReplayMemory:
    def __init__(self, capacity):
        self.capacity = capacity
        self.memory   = []
        self.position = 0

    def insert(self, state, action, reward, next_state, done):
        transition = (
            state.unsqueeze(0),
            torch.tensor([[action]],  dtype=torch.long),
            torch.tensor([[reward]],  dtype=torch.float32),
            next_state.unsqueeze(0),
            torch.tensor([[done]],    dtype=torch.bool),
        )
        if len(self.memory) < self.capacity:
            self.memory.append(None)
        self.memory[self.position] = transition
        self.position = (self.position + 1) % self.capacity

    def sample(self, batch_size):
        assert self.can_sample(batch_size)
        batch = random.sample(self.memory, batch_size)
        states, actions, rewards, next_states, dones = zip(*batch)
        return (torch.cat(states), torch.cat(actions), torch.cat(rewards),
                torch.cat(next_states), torch.cat(dones))

    def can_sample(self, batch_size):
        return len(self.memory) >= batch_size * 10

    def __len__(self):
        return len(self.memory)

# Initialize with auto maxlen
memory = ReplayMemory(capacity=TOTAL_EXPERIENCES)
print(f"Replay buffer capacity : {TOTAL_EXPERIENCES}")

Dataset samples    : 18244
Total experiences  : 91220
State dim: 316 | Device: cpu
Replay buffer capacity : 91220


In [81]:
class DQN(nn.Module):
    def __init__(self, input_dim=316, output_dim=131):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 512),  # 316 → 256
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, 256),        # 512 → 256
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 128),        # 256 → 128
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, output_dim)  # 128 → 131 Q-values
        )

    def forward(self, x):
        return self.net(x)  # returns Q-value for all 131 heroes

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model     = DQN(input_dim=316, output_dim=131).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()

print(f"Using device : {device}")

Using device : cpu


In [82]:
GAMMA             = 0.99
BATCH_SIZE        = 256
NUM_EPOCHS        = 100
TARGET_SYNC_EVERY = 10
LR                = 0.0001

def train(transitions, num_epochs=NUM_EPOCHS, batch_size=BATCH_SIZE):
    memory = ReplayMemory(capacity=len(transitions))
    for s, a, r, ns, done in transitions:
        memory.insert(s, a, r, ns, done)
    print(f"Replay buffer filled: {len(memory)} transitions")

    if not memory.can_sample(batch_size):
        print(f"ERROR: Need at least {batch_size * 10} transitions, got {len(memory)}")
        return None, None

    policy_net = DQN(STATE_DIM, NUM_HEROES).to(device)
    target_net = DQN(STATE_DIM, NUM_HEROES).to(device)
    target_net.load_state_dict(policy_net.state_dict())
    target_net.eval()

    optimizer       = optim.Adam(policy_net.parameters(), lr=LR)
    criterion       = nn.MSELoss()
    steps_per_epoch = max(len(memory) // batch_size, 1)
    best_loss       = float('inf')
    print(f"Steps per epoch: {steps_per_epoch}\n")

    for epoch in range(num_epochs):
        policy_net.train()
        epoch_loss, epoch_correct = 0.0, 0

        for _ in range(steps_per_epoch):
            states, actions, rewards, next_states, dones = memory.sample(batch_size)
            states, actions, rewards, next_states, dones = (
                states.to(device), actions.to(device), rewards.to(device),
                next_states.to(device), dones.to(device)
            )

            q_action = policy_net(states).gather(1, actions)

            with torch.no_grad():
                max_next_q = target_net(next_states).max(dim=1, keepdim=True).values
                target_q   = rewards + GAMMA * max_next_q * (~dones).float()

            loss = criterion(q_action, target_q)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            epoch_loss    += loss.item()
            epoch_correct += ((q_action.detach() > 0) == (rewards > 0)).sum().item()

        avg_loss = epoch_loss    / steps_per_epoch
        accuracy = epoch_correct / (steps_per_epoch * batch_size) * 100

        if epoch % TARGET_SYNC_EVERY == 0:
            target_net.load_state_dict(policy_net.state_dict())

        if epoch % 10 == 0:
            policy_net.eval()
            with torch.no_grad():
                vs, va, vr, vns, vd = memory.sample(batch_size)
                vs, va, vr, vns, vd = (vs.to(device), va.to(device), vr.to(device),
                                        vns.to(device), vd.to(device))
                vq       = policy_net(vs).gather(1, va)
                vt       = vr + GAMMA * target_net(vns).max(dim=1, keepdim=True).values * (~vd).float()
                val_loss = criterion(vq, vt).item()
                val_acc  = ((vq.detach() > 0) == (vr > 0)).float().mean().item() * 100

            print(f"Epoch {epoch:3d}/{num_epochs} | "
                  f"Train Loss: {avg_loss:.4f}  Acc: {accuracy:.1f}% | "
                  f"Val Loss: {val_loss:.4f}  Acc: {val_acc:.1f}%")

            if val_loss < best_loss:
                best_loss = val_loss
                torch.save(policy_net.state_dict(), 'data/draft_model_bellman_best.pth')
                print(f"Best model saved (val_loss={best_loss:.4f})")

            policy_net.train()

    torch.save(policy_net.state_dict(), 'data/draft_model_bellman.pth')
    print(f"\nFinal  : data/draft_model_bellman.pth")
    print(f"Best   : data/draft_model_bellman_best.pth (val_loss={best_loss:.4f})")
    return policy_net, target_net

# ── Run ───────────────────────────────────────────────────────────────────────
policy_net, target_net = train(transitions)

Replay buffer filled: 91220 transitions
Steps per epoch: 356

Epoch   0/100 | Train Loss: 0.1058  Acc: 27.7% | Val Loss: 0.1009  Acc: 7.4%
Best model saved (val_loss=0.1009)
Epoch  10/100 | Train Loss: 0.0972  Acc: 7.7% | Val Loss: 0.0971  Acc: 8.6%
Best model saved (val_loss=0.0971)
Epoch  20/100 | Train Loss: 0.0682  Acc: 9.5% | Val Loss: 0.0650  Acc: 10.9%
Best model saved (val_loss=0.0650)
Epoch  30/100 | Train Loss: 0.0442  Acc: 9.4% | Val Loss: 0.0374  Acc: 7.8%
Best model saved (val_loss=0.0374)
Epoch  40/100 | Train Loss: 0.0298  Acc: 9.6% | Val Loss: 0.0268  Acc: 6.6%
Best model saved (val_loss=0.0268)
Epoch  50/100 | Train Loss: 0.0237  Acc: 9.7% | Val Loss: 0.0208  Acc: 7.0%
Best model saved (val_loss=0.0208)
Epoch  60/100 | Train Loss: 0.0194  Acc: 9.7% | Val Loss: 0.0202  Acc: 9.8%
Best model saved (val_loss=0.0202)
Epoch  70/100 | Train Loss: 0.0174  Acc: 9.9% | Val Loss: 0.0214  Acc: 12.5%
Epoch  80/100 | Train Loss: 0.0161  Acc: 9.9% | Val Loss: 0.0281  Acc: 9.8%
Epoch 

## Recommendation Function using Trained DQN

### Load Model

In [83]:
# ── 1. LOAD THE TRAINED MODEL ──────────────────────────────────────────────────
model = DQN(input_dim=316, output_dim=131).to(device)
model.load_state_dict(torch.load('data/draft_model_bellman_best.pth'))
model.eval()  # ← switch to evaluation mode (disables dropout)
print("Model loaded!")


Model loaded!


In [84]:
def recommend(model, ally_picks, enemy_picks, banned, player_lanes=None, top_n=10):
    ally_heroes  = [p['hero'] for p in ally_picks]
    enemy_heroes = [p['hero'] for p in enemy_picks]

    state = encode_state(ally_heroes, enemy_heroes).to(device)
    model.eval()
    with torch.no_grad():
        q_values = model(state.unsqueeze(0)).squeeze(0)

    unavailable = set(ally_heroes + enemy_heroes + banned)

    all_scored = [
        (hero, q_values[idx].item())
        for hero, idx in hero_to_id.items()
        if hero not in unavailable
    ]

    if player_lanes is None:
        all_scored.sort(key=lambda x: x[1], reverse=True)
        return {'All': [{'hero': h, 'score': round(s, 4),
                         'roles': hero_roles.get(h, []),
                         'lane':  hero_lanes.get(h, [])} for h, s in all_scored[:top_n]]}

    results = {}
    for lane in player_lanes:
        lane_scored = [(h, s) for h, s in all_scored if lane in hero_lanes.get(h, [])]
        lane_scored.sort(key=lambda x: x[1], reverse=True)
        results[lane] = [{'hero': h, 'score': round(s, 4),
                          'roles': hero_roles.get(h, []),
                          'specs': list(hero_specs.get(h, [])),
                          'lane':  hero_lanes.get(h, [])} for h, s in lane_scored[:top_n]]
    return results

In [85]:
# from collections import Counter

# def recommend(model, ally_picks, enemy_picks, banned, player_lanes=None, top_n=10):
#     ally_heroes  = [p['hero'] for p in ally_picks]
#     enemy_heroes = [p['hero'] for p in enemy_picks]

#     state = encode_state(ally_heroes, enemy_heroes).to(device)
#     model.eval()
#     with torch.no_grad():
#         q_values = model(state.unsqueeze(0)).squeeze(0)

#     unavailable    = set(ally_heroes + enemy_heroes + banned)
#     ally_role_set  = set(r for h in ally_heroes for r in hero_roles.get(h, []))
#     ally_lane_set  = set(p['lane'] for p in ally_picks if p.get('lane'))

#     # ── Dynamic spec context ───────────────────────────────────────────────
#     ally_spec_set  = set(s for h in ally_heroes  for s in hero_specs.get(h, []))
#     enemy_spec_set = set(s for h in enemy_heroes for s in hero_specs.get(h, []))

#     # Specs your ally team is stacking too much of
#     spec_counts   = Counter(s for h in ally_heroes for s in hero_specs.get(h, []))
#     stacked_specs = {s for s, count in spec_counts.items() if count >= 2}

#     # ── Dynamic desired specs — react to enemy composition ─────────────────
#     desired_specs = set()
#     if enemy_heroes:  # only react if enemy has picks
#         if 'Burst'        in enemy_spec_set: desired_specs |= {'Guard', 'Regen'}
#         if 'Chase'        in enemy_spec_set: desired_specs |= {'Crowd Control', 'Control'}
#         if 'Reap'         in enemy_spec_set: desired_specs |= {'Guard', 'Initiator'}
#         if 'Damage'       in enemy_spec_set: desired_specs |= {'Regen'}
#         if 'Magic Damage' in enemy_spec_set: desired_specs |= {'Regen', 'Guard'}
#         if 'Charge'       in enemy_spec_set: desired_specs |= {'Crowd Control', 'Control'}
#         if 'Poke'         in enemy_spec_set: desired_specs |= {'Initiator', 'Chase'}

#     # Only bonus specs that are actually missing from ally team
#     missing_specs = desired_specs - ally_spec_set

#     all_scored = []
#     for hero, idx in hero_to_id.items():
#         if hero in unavailable:
#             continue

#         score     = q_values[idx].item()
#         hero_spec = set(hero_specs.get(hero, []))

#         # Penalty — duplicate role
#         for r in hero_roles.get(hero, []):
#             if r in ally_role_set:
#                 score -= 0.3

#         # Penalty — lane already taken
#         for l in hero_lanes.get(hero, []):
#             if l in ally_lane_set:
#                 score -= 0.3

#         # Bonus — fills a gap against enemy composition
#         score += 0.15 * len(hero_spec & missing_specs)

#         # Penalty — stacks what ally already has too much of
#         score -= 0.10 * len(hero_spec & stacked_specs)

#         all_scored.append((hero, score))

#     if player_lanes is None:
#         all_scored.sort(key=lambda x: x[1], reverse=True)
#         return {'All': [{'hero': h, 'score': round(s, 4),
#                          'roles': hero_roles.get(h, []),
#                          'lane':  hero_lanes.get(h, [])} for h, s in all_scored[:top_n]]}

#     results = {}
#     for lane in player_lanes:
#         lane_scored = [(h, s) for h, s in all_scored if lane in hero_lanes.get(h, [])]
#         lane_scored.sort(key=lambda x: x[1], reverse=True)
#         results[lane] = [{'hero': h, 'score': round(s, 4),
#                           'roles': hero_roles.get(h, []),
#                           'specs': list(hero_specs.get(h, [])),
#                           'lane':  hero_lanes.get(h, [])} for h, s in lane_scored[:top_n]]
#     return results

### Example Usage of the model

In [None]:
import os

print("=" * 60)
print("Step 1: Load or Train Model")
print("=" * 60)

if os.path.exists('data/draft_model_bellman_best.pth'):
    print("✅ Saved model found — skipping training.")
    policy_net = DQN(STATE_DIM, NUM_HEROES).to(device)
    policy_net.load_state_dict(torch.load('data/draft_model_bellman_best.pth', map_location=device))
    policy_net.eval()
else:
    print("No saved model found — training from scratch...")
    transitions = build_transitions_from_dataset(df_match_history, both_perspectives=True)
    policy_net, target_net = train(transitions)
    policy_net.load_state_dict(torch.load('data/draft_model_bellman_best.pth', map_location=device))
    policy_net.eval()
    print("Training complete, best model loaded.")

print("Model ready for recommendations.")

print("\n" + "=" * 60)
print("Step 2: Sample Recommendations")
print("=" * 60)

print("Example 1: Exp Laner + Gold Laner")
exampleOneBanned = getBannedHeroes()
results = recommend(
    model        = policy_net,
    ally_picks   = [{'hero': 'Lancelot', 'lane': 'Jungler'},
                    {'hero': 'Khufra',  'lane': 'Roamer'},
                    {'hero': 'Zetian',  'lane': 'Mid Laner'}],

    enemy_picks  = [{'hero': 'Fredrinn',     'lane': 'Jungler'},
                    {'hero': 'Silvanna',     'lane': 'Exp Laner'},
                    {'hero': 'Karrie',    'lane': 'Gold Laner'},
                    {'hero': 'Grock', 'lane': 'Roamer'}],
    banned       = exampleOneBanned,
    player_lanes = ['Exp Laner', 'Gold Laner'],
    top_n        = 20
)
print(f"Banned heroes: {exampleOneBanned}")
for lane, heroes in results.items():
    print(f"\n  {lane} recommendations:")
    for rank, r in enumerate(heroes, 1):
        print(f"    {rank:2}. {r['hero']:<20} {r['score']:.4f}  {r['roles']}")

print("\nExample 2: Jungler + Exp Laner")
results = recommend(
    model        = policy_net,
    ally_picks   = [{'hero': 'Gusion', 'lane': 'Mid Laner'},
                    {'hero': 'Atlas',  'lane': 'Roamer'}],

    enemy_picks  = [{'hero': 'Fanny',  'lane': 'Jungler'},
                    {'hero': 'Karrie', 'lane': 'Gold Laner'},
                    {'hero': 'Esmeralda', 'lane': 'Exp Laner'}],
    banned       = getBannedHeroes(),
    player_lanes = ['Jungler', 'Exp Laner'],
    top_n        = 10
)
for lane, heroes in results.items():
    print(f"\n  {lane} recommendations (top {len(heroes)}):")
    for rank, r in enumerate(heroes, 1):
        print(f"    {rank:2}. {r['hero']:<20} {r['score']:.4f}  {r['roles']}")

print("\nExample 3: Gold Laner, no picks yet")
results = recommend(
    model        = policy_net,
    ally_picks   = [],
    enemy_picks  = [],
    banned       = getBannedHeroes(),
    player_lanes = ['Gold Laner'],
    top_n        = 20
)
for lane, heroes in results.items():
    print(f"\n  {lane} recommendations:")
    for rank, r in enumerate(heroes, 1):
        print(f"    {rank:2}. {r['hero']:<20} {r['score']:.4f}  {r['lane']}")

Step 1: Load or Train Model
✅ Saved model found — skipping training.
Model ready for recommendations.

Step 2: Sample Recommendations
Example 1: Exp Laner + Gold Laner
Banned heroes: ['Kalea', 'Balmond', 'Bane', 'Miya', 'Chou', 'Joy', 'Paquito', 'Saber', 'Novaria', 'Minotaur']

  Exp Laner recommendations:
     1. Badang               1.0445  ['Fighter']
     2. Masha                0.9884  ['Fighter', 'Tank']
     3. Lukas                0.9589  ['Fighter']
     4. Uranus               0.9466  ['Tank']
     5. Cici                 0.9433  ['Fighter']
     6. Sun                  0.9419  ['Fighter']
     7. X.Borg               0.9413  ['Fighter']
     8. Guinevere            0.9208  ['Fighter']
     9. Yu Zhong             0.9056  ['Fighter']
    10. Arlott               0.9039  ['Fighter', 'Assassin']
    11. Thamuz               0.8983  ['Fighter']
    12. Gloo                 0.8894  ['Tank']
    13. Terizla              0.8863  ['Fighter']
    14. Alice                0.8745  ['Ma