In [1]:
import pandas as pd
import numpy as np

np.random.seed(42)


In [5]:
# Define Valorant agents and their roles (updated through 2026)
agents_roles = {
    # Duelists
    "Jett": "Duelist",
    "Reyna": "Duelist",
    "Phoenix": "Duelist",
    "Raze": "Duelist",
    "Yoru": "Duelist",
    "Neon": "Duelist",
    "Iso": "Duelist",
    "Waylay": "Duelist",

    # Sentinels
    "Sage": "Sentinel",
    "Cypher": "Sentinel",
    "Killjoy": "Sentinel",
    "Chamber": "Sentinel",
    "Deadlock": "Sentinel",
    "Veto": "Sentinel",
    "Vyse": "Sentinel",

    # Initiators
    "Sova": "Initiator",
    "Breach": "Initiator",
    "Skye": "Initiator",
    "KAY/O": "Initiator",
    "Fade": "Initiator",
    "Gekko": "Initiator",
    "Tejo": "Initiator",

    # Controllers
    "Brimstone": "Controller",
    "Omen": "Controller",
    "Viper": "Controller",
    "Astra": "Controller",
    "Harbor": "Controller",
    "Clove": "Controller"
}

agents = list(agents_roles.keys())
roles = list(set(agents_roles.values()))

# Define ranks
ranks = [
    "Iron", "Bronze", "Silver", "Gold",
    "Platinum", "Diamond", "Ascendant",
    "Immortal", "Radiant"
]

In [6]:
# Number of synthetic players
num_players = 2000

# Generate player IDs
player_ids = [f"P{str(i).zfill(4)}" for i in range(1, num_players + 1)]

# Assign ranks to players (realistic distribution)
rank_probs = [
    0.08,  # Iron
    0.12,  # Bronze
    0.18,  # Silver
    0.20,  # Gold
    0.16,  # Platinum
    0.12,  # Diamond
    0.08,  # Ascendant
    0.05,  # Immortal
    0.01   # Radiant
]

player_ranks = np.random.choice(
    ranks,
    size=num_players,
    p=rank_probs
)


In [7]:
# Assign a main agent to each player
player_agents = np.random.choice(agents, size=num_players)

# Assign role based on agent
player_roles = [agents_roles[agent] for agent in player_agents]


In [8]:
# Matches played (more for higher ranks, but with variation)
matches_played = np.random.randint(50, 400, size=num_players)

# Base win rate influenced by rank
rank_winrate_base = {
    "Iron": 0.42,
    "Bronze": 0.45,
    "Silver": 0.48,
    "Gold": 0.50,
    "Platinum": 0.52,
    "Diamond": 0.54,
    "Ascendant": 0.56,
    "Immortal": 0.58,
    "Radiant": 0.60
}

# Generate win rates with some randomness
win_rates = [
    np.clip(
        np.random.normal(rank_winrate_base[rank], 0.05),
        0.30,
        0.75
    )
    for rank in player_ranks
]


In [9]:
# Base combat stats by rank (realistic approximations)
rank_combat_base = {
    "Iron": {"kills": 12, "deaths": 16, "assists": 4, "acs": 160},
    "Bronze": {"kills": 14, "deaths": 15, "assists": 4.5, "acs": 170},
    "Silver": {"kills": 16, "deaths": 14, "assists": 5, "acs": 185},
    "Gold": {"kills": 18, "deaths": 13, "assists": 5.5, "acs": 200},
    "Platinum": {"kills": 19, "deaths": 12.5, "assists": 6, "acs": 215},
    "Diamond": {"kills": 20, "deaths": 12, "assists": 6.5, "acs": 230},
    "Ascendant": {"kills": 21, "deaths": 11.5, "assists": 7, "acs": 245},
    "Immortal": {"kills": 22, "deaths": 11, "assists": 7.5, "acs": 260},
    "Radiant": {"kills": 23, "deaths": 10.5, "assists": 8, "acs": 280}
}

# Lists to store generated stats
kills_per_match = []
deaths_per_match = []
assists_per_match = []
acs_values = []

# Generate stats player by player
for rank in player_ranks:
    base = rank_combat_base[rank]

    # Normal performance (majority of players)
    kills = np.random.normal(base["kills"], 2)
    deaths = np.random.normal(base["deaths"], 1.5)
    assists = np.random.normal(base["assists"], 1.5)
    acs = np.random.normal(base["acs"], 20)

    # Roll for rare extreme performances
    extreme_roll = np.random.rand()

    # 3% chance: insane pop-off game (any rank, more likely higher)
    if extreme_roll < 0.03:
        kills += np.random.uniform(10, 20)   # 35–42 kill games
        acs += np.random.uniform(80, 140)

    # 4% chance: very poor performance (mostly lower ranks)
    elif extreme_roll < 0.07 and rank in ["Iron", "Bronze", "Silver"]:
        kills -= np.random.uniform(5, 10)    # 3–6 kill games
        deaths += np.random.uniform(4, 8)

    # Safety bounds (keep values realistic)
    kills_per_match.append(np.clip(kills, 3, 45))
    deaths_per_match.append(np.clip(deaths, 5, 25))
    assists_per_match.append(np.clip(assists, 1, 15))
    acs_values.append(np.clip(acs, 100, 420))


In [10]:
# Base playstyle tendencies by role
role_playstyle_base = {
    "Duelist": {"hs": 0.24, "utility": 0.30, "clutch": 0.22},
    "Initiator": {"hs": 0.22, "utility": 0.55, "clutch": 0.25},
    "Controller": {"hs": 0.21, "utility": 0.65, "clutch": 0.28},
    "Sentinel": {"hs": 0.23, "utility": 0.50, "clutch": 0.35}
}

headshot_pct = []
utility_usage = []
clutch_success_rate = []

for role in player_roles:
    base = role_playstyle_base[role]

    hs = np.random.normal(base["hs"], 0.05)
    util = np.random.normal(base["utility"], 0.10)
    clutch = np.random.normal(base["clutch"], 0.08)

    headshot_pct.append(np.clip(hs, 0.10, 0.45))
    utility_usage.append(np.clip(util, 0.10, 0.90))
    clutch_success_rate.append(np.clip(clutch, 0.05, 0.70))


In [11]:
# Create the final DataFrame
player_stats_df = pd.DataFrame({
    "player_id": player_ids,
    "rank": player_ranks,
    "agent": player_agents,
    "role": player_roles,
    "matches_played": matches_played,
    "win_rate": win_rates,
    "kills_per_match": kills_per_match,
    "deaths_per_match": deaths_per_match,
    "assists_per_match": assists_per_match,
    "acs": acs_values,
    "headshot_pct": headshot_pct,
    "utility_usage": utility_usage,
    "clutch_success_rate": clutch_success_rate
})


In [12]:
player_stats_df.head()

Unnamed: 0,player_id,rank,agent,role,matches_played,win_rate,kills_per_match,deaths_per_match,assists_per_match,acs,headshot_pct,utility_usage,clutch_success_rate
0,P0001,Silver,Clove,Controller,367,0.529512,16.702813,13.294893,5.307003,182.779731,0.232486,0.526087,0.356563
1,P0002,Immortal,Iso,Duelist,363,0.550652,20.434803,12.060549,10.627069,239.841766,0.230088,0.355264,0.336361
2,P0003,Platinum,Phoenix,Duelist,210,0.501133,18.36102,13.31431,6.112292,241.372359,0.276981,0.290517,0.375524
3,P0004,Platinum,Brimstone,Controller,380,0.555864,20.521012,9.885579,5.088647,246.363649,0.207767,0.659313,0.26691
4,P0005,Bronze,Clove,Controller,65,0.421751,16.144393,16.518337,1.977407,164.089954,0.14872,0.516421,0.235664


In [13]:
# Save dataset to raw data folder
player_stats_df.to_csv(
    "../data/raw/player_stats.csv",
    index=False
)
