In this notebook, I will create dataframes: one with kills and economy information per rounds and the other is player rating information.\
Goal is to define our own economy rating with kills and economy information per rounds and then use this economy rating together with player rating

In [1]:
import pandas as pd

In [2]:
og_rounds_kills = pd.read_csv("../data/vct_2023/matches/rounds_kills.csv")
og_eco_rounds = pd.read_csv("../data/vct_2023/matches/eco_rounds.csv")
og_rounds_kills["Round Number"] = og_rounds_kills["Round Number"].apply(lambda x: int(x.split(" ")[-1]))
og_eco_rounds["Outcome"] = og_eco_rounds["Outcome"].apply(lambda x: 1 if x == "Win" else -1)
og_eco_rounds["Loadout Value"] = og_eco_rounds["Loadout Value"].apply(lambda x: int(x.replace(".","").replace("k","00")))
og_eco_rounds["Remaining Credits"] = og_eco_rounds["Remaining Credits"].apply(lambda x: int(x.replace(".","").replace("k","00")))
og_eco_rounds["Type"] = og_eco_rounds["Type"].apply(lambda x: x.split(":")[0])

og_maps_scores = pd.read_csv("../data/vct_2023/matches/maps_scores.csv")
og_overview = pd.read_csv("../data/vct_2023/matches/overview.csv")
og_players_stats = pd.read_csv("../data/vct_2023/players_stats/players_stats.csv")

In [3]:
roles = {"duelist": {"jett", "phoenix", "reyna", "raze", "yoru", "neon", "iso"},  # dictionary of agents grouped by each role.
             "initiator": {"sova", "breach", "skye", "kayo", "fade", "gekko"},
             "controller": {"brimstone", "omen", "viper", "astra", "harbor"},
             "sentinel": {"cypher", "sage", "killjoy", "chamber", "deadlock"}}
allagents = set()  # initiate an empty set to make the set of all agents.
for role, l in roles.items():
    allagents = allagents.union(set(l))

duelists = roles["duelist"]  # set of all duelists
initiators = roles["initiator"]  # set of all initiators
controllers = roles["controller"]  # set of all controllers
sentinels = roles["sentinel"]  # set of all sentinels

def get_role(s:str) -> str:
    """
    input: agent names
    return: agent roles
    """
    lst = s.split(", ")
    num_of_agents = len(lst)
    output = ""
    if len(lst) == 1:
        agent = lst[0]
        if agent in duelists:
            output += "duel"
        elif agent in initiators:
            output += "init"
        elif agent in controllers:
            output += "cont"
        elif agent in sentinels:
            output += "sent"
        elif agent == "all":
            output += "agent"
        else:
            raise ValueError(f"{s} is not a valid agent.")
        return output
    else:
        for agent in lst:
            if agent in duelists:
                output += "duel,"
            elif agent in initiators:
                output += "init,"
            elif agent in controllers:
                output += "cont,"
            elif agent in sentinels:
                output += "sent,"
            elif agent == "all":
                output += "agent,"
            else:
                raise ValueError(f"{s} is not a valid agent.")
        return output[:-1]

In [4]:
# The next two print values show that "rounds_kills" and "eco_rounds" files don't have NaN.
print(og_rounds_kills.isna().sum().sum())
print(og_eco_rounds.isna().sum().sum())

0
0


In [5]:
og_players_stats.info(show_counts=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10516 entries, 0 to 10515
Data columns (total 25 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   Tournament                      10516 non-null  object 
 1   Stage                           10516 non-null  object 
 2   Match Type                      10516 non-null  object 
 3   Player                          10516 non-null  object 
 4   Team                            10516 non-null  object 
 5   Agents                          10516 non-null  object 
 6   Rounds Played                   10516 non-null  int64  
 7   Rating                          9909 non-null   float64
 8   Average Combat Score            10504 non-null  float64
 9   Kills:Deaths                    10516 non-null  float64
 10  Kill, Assist, Trade, Survive %  9909 non-null   object 
 11  Average Damage Per Round        9909 non-null   float64
 12  Kills Per Round                 

In [6]:
og_players_stats.isna().sum()

Tournament                           0
Stage                                0
Match Type                           0
Player                               0
Team                                 0
Agents                               0
Rounds Played                        0
Rating                             607
Average Combat Score                12
Kills:Deaths                         0
Kill, Assist, Trade, Survive %     607
Average Damage Per Round           607
Kills Per Round                      0
Assists Per Round                    0
First Kills Per Round              584
First Deaths Per Round             607
Headshot %                         608
Clutch Success %                  5733
Clutches (won/played)             1170
Maximum Kills in a Single Map        0
Kills                                0
Deaths                               0
Assists                              0
First Kills                          0
First Deaths                         0
dtype: int64

In [7]:
match_id_columns = ["Tournament", "Stage", "Match Type", "Team"]
player_agent = ["Player", "Agents"]
features = ["Rating",
            "Average Combat Score",
            "Kill, Assist, Trade, Survive %",
            "Average Damage Per Round",
            "Kills Per Round",
            "Assists Per Round",
            "First Kills Per Round",
            "First Deaths Per Round",
            "Headshot %",
            "Clutch Success %"]
            #"Kills:Deaths", I will not use "Kills:Deaths" which is "First Kills Per Round" - "First Deaths Per Round".
            # Instead we kept "First Kills Per Round" and "First Deaths Per Round"
            #"Clutches (won/played)", this info is included in "Clutch Success %"
            #"Maximum Kills in a Single Map", other kill related features should reflect this information
            #"Kills", 
            #"Deaths",
            #"Assists",
            #"First Kills",
            #"First Deaths"
            # In many cases, one player played the same agent in two or three different maps in a match,
            # where "Kills", "Deaths", "Assists", "First Kills", "First Deaths" values were accumulated.
            # That's why I want to drop these columns.                 

In [8]:
players_stats = og_players_stats[match_id_columns + player_agent + features]
players_stats.head()

Unnamed: 0,Tournament,Stage,Match Type,Team,Player,Agents,Rating,Average Combat Score,"Kill, Assist, Trade, Survive %",Average Damage Per Round,Kills Per Round,Assists Per Round,First Kills Per Round,First Deaths Per Round,Headshot %,Clutch Success %
0,Champions Tour 2023: Americas Last Chance Qual...,Main Event,Upper Round 1,KRÜ Esports,Melser,brimstone,1.14,256.0,75%,168.9,0.85,0.45,0.1,0.15,32%,
1,Champions Tour 2023: Americas Last Chance Qual...,Main Event,Upper Round 1,KRÜ Esports,Melser,omen,1.12,216.0,81%,131.8,0.86,0.05,0.19,0.05,25%,33%
2,Champions Tour 2023: Americas Last Chance Qual...,Main Event,Upper Round 1,KRÜ Esports,Melser,"brimstone, omen",1.13,236.0,78%,149.9,0.85,0.24,0.15,0.1,28%,25%
3,Champions Tour 2023: Americas Last Chance Qual...,Main Event,Upper Round 1,KRÜ Esports,DaveeyS,killjoy,1.29,253.0,86%,170.1,0.9,0.33,0.1,0.0,27%,100%
4,Champions Tour 2023: Americas Last Chance Qual...,Main Event,Upper Round 1,KRÜ Esports,keznit,raze,1.24,262.5,80%,172.5,0.85,0.39,0.15,0.1,15%,


In [9]:
players_stats.keys()

Index(['Tournament', 'Stage', 'Match Type', 'Team', 'Player', 'Agents',
       'Rating', 'Average Combat Score', 'Kill, Assist, Trade, Survive %',
       'Average Damage Per Round', 'Kills Per Round', 'Assists Per Round',
       'First Kills Per Round', 'First Deaths Per Round', 'Headshot %',
       'Clutch Success %'],
      dtype='object')

In [10]:
# make numeric features to be actually numeric

numeric_columns = ["Kill, Assist, Trade, Survive %", "Headshot %", "Clutch Success %"]
for feature in numeric_columns:
    players_stats[feature] = players_stats[feature].apply(lambda s: int(s.split("%")[0])/100 if type(s) == str else None)

players_stats.info()  # "Numeric" features are now really numeric.

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10516 entries, 0 to 10515
Data columns (total 16 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   Tournament                      10516 non-null  object 
 1   Stage                           10516 non-null  object 
 2   Match Type                      10516 non-null  object 
 3   Team                            10516 non-null  object 
 4   Player                          10516 non-null  object 
 5   Agents                          10516 non-null  object 
 6   Rating                          9909 non-null   float64
 7   Average Combat Score            10504 non-null  float64
 8   Kill, Assist, Trade, Survive %  9909 non-null   float64
 9   Average Damage Per Round        9909 non-null   float64
 10  Kills Per Round                 10516 non-null  float64
 11  Assists Per Round               10516 non-null  float64
 12  First Kills Per Round           

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_stats[feature] = players_stats[feature].apply(lambda s: int(s.split("%")[0])/100 if type(s) == str else None)


In [11]:
players_stats.isna().sum()

Tournament                           0
Stage                                0
Match Type                           0
Team                                 0
Player                               0
Agents                               0
Rating                             607
Average Combat Score                12
Kill, Assist, Trade, Survive %     607
Average Damage Per Round           607
Kills Per Round                      0
Assists Per Round                    0
First Kills Per Round              584
First Deaths Per Round             607
Headshot %                         608
Clutch Success %                  5733
dtype: int64

In [12]:
columns_with_missing_values = ["Rating",
                                "Average Combat Score",
                                "Kill, Assist, Trade, Survive %",
                                "Average Damage Per Round",
                                "First Kills Per Round",
                                "First Deaths Per Round",
                                "Headshot %",
                                "Clutch Success %"]

In [13]:
# I will fill in missing values with average of each player's performance of each tournament and then each team's performance and so on.
# See the following cells to check how I fillin missing values.
for column in columns_with_missing_values:
    players_stats.loc[:,column] = players_stats\
        .groupby(["Tournament", "Player"])[column].transform(lambda x: x.fillna(x.mean()))

In [14]:
for column in columns_with_missing_values:
    players_stats.loc[:,column] = players_stats\
    .groupby(["Tournament", "Team"])[column].transform(lambda x: x.fillna(x.mean()))

In [15]:
for column in columns_with_missing_values:
    players_stats.loc[:,column] = players_stats\
        .groupby(["Player", "Agents"])[column].transform(lambda x: x.fillna(x.mean()))

In [16]:
for column in columns_with_missing_values:
    players_stats.loc[:,column] = players_stats\
        .groupby(["Player"])[column].transform(lambda x: x.fillna(x.mean()))

In [17]:
for column in columns_with_missing_values:
    players_stats.loc[:,column] = players_stats\
        .groupby(["Team"])[column].transform(lambda x: x.fillna(x.mean()))

In [18]:
players_stats.isna().sum().sum()

0

In [19]:
ind_side_is_both = og_overview["Side"] == "both"
ind_map = og_overview["Map"] != "All Maps"

overview_agent_pick_keys = ["Tournament", "Stage", "Match Type", "Match Name", "Map", "Player", 'Team', 'Agents']
agent_pick_from_overview = og_overview[ind_side_is_both & ind_map][overview_agent_pick_keys]

agent_pick_from_overview

Unnamed: 0,Tournament,Stage,Match Type,Match Name,Map,Player,Team,Agents
0,Valorant Champions 2023,Group Stage,Opening (D),Team Liquid vs Natus Vincere,Fracture,nAts,Team Liquid,viper
3,Valorant Champions 2023,Group Stage,Opening (D),Team Liquid vs Natus Vincere,Fracture,Sayf,Team Liquid,breach
6,Valorant Champions 2023,Group Stage,Opening (D),Team Liquid vs Natus Vincere,Fracture,soulcas,Team Liquid,astra
9,Valorant Champions 2023,Group Stage,Opening (D),Team Liquid vs Natus Vincere,Fracture,Jamppi,Team Liquid,neon
12,Valorant Champions 2023,Group Stage,Opening (D),Team Liquid vs Natus Vincere,Fracture,Redgar,Team Liquid,sova
...,...,...,...,...,...,...,...,...
34929,Champions Tour 2023: Lock-In Sao Paulo,Playoffs,Grand Final,LOUD vs FNATIC,Icebox,Derke,FNATIC,jett
34932,Champions Tour 2023: Lock-In Sao Paulo,Playoffs,Grand Final,LOUD vs FNATIC,Icebox,Boaster,FNATIC,viper
34935,Champions Tour 2023: Lock-In Sao Paulo,Playoffs,Grand Final,LOUD vs FNATIC,Icebox,Alfajer,FNATIC,killjoy
34938,Champions Tour 2023: Lock-In Sao Paulo,Playoffs,Grand Final,LOUD vs FNATIC,Icebox,Leo,FNATIC,sova


In [20]:
merge_keys = ["Tournament", "Stage", "Match Type", "Player", "Agents", "Team"]
players_stats_agent_pick = agent_pick_from_overview\
                                .merge(players_stats, on=merge_keys, how="left")\
                                

# Run the line below if you want to see where we have missing players stats.                                
#           |
#           V
# players_stats_agent_pick[players_stats_agent_pick.isna().any(axis=1)].groupby(["Tournament", "Stage", "Match Type", "Team"]).agg("count")

In [21]:
players_stats_agent_pick_columns_with_null\
      = ["Rating",
        "Average Combat Score",
        "Kill, Assist, Trade, Survive %",
        "Average Damage Per Round",
        "Kills Per Round",
        "Assists Per Round",
        "First Kills Per Round",
        "First Deaths Per Round",
        "Headshot %",
        "Clutch Success %"]

In [22]:
# There are 586 missing values from players_stats file.
# I checked the vlr.gg if the website has those missing values.
# At least I confirmed that it has some missing values.
# Because I don't have enough time to scrape the data,
# I will just take average of player's exisiting stats.
# There can be many different ways to deal with null values, but I chose to do in the following way.

for column in players_stats_agent_pick_columns_with_null:
    players_stats_agent_pick.loc[:,column] = players_stats_agent_pick\
        .groupby(["Tournament", "Stage", "Match Type", "Team"])[column]\
            .transform(lambda x: x.fillna(x.mean()))\
            

In [23]:
for column in players_stats_agent_pick_columns_with_null:
    players_stats_agent_pick.loc[:,column] = players_stats_agent_pick\
        .groupby(["Tournament", "Stage", "Team"])[column].transform(lambda x: x.fillna(x.mean()))

In [24]:
players_stats_agent_pick.isna().sum().sum()

0

In [25]:
players_stats_agent_pick = players_stats_agent_pick.round(2)  # Decimal digits were too big.

Next step is to combine "players_stats_by_agents_and_team_composition" with map results.

In [26]:
og_maps_scores.head(3)

Unnamed: 0,Tournament,Stage,Match Type,Match Name,Map,Team A,Team A Score,Team A Attacker Score,Team A Defender Score,Team A Overtime Score,Team B,Team B Score,Team B Attacker Score,Team B Defender Score,Team B Overtime Score,Duration
0,Valorant Champions 2023,Group Stage,Opening (D),Team Liquid vs Natus Vincere,Fracture,Team Liquid,11,6,5,,Natus Vincere,13,7,6,,1:18:55
1,Valorant Champions 2023,Group Stage,Opening (D),Team Liquid vs Natus Vincere,Bind,Team Liquid,15,7,5,3.0,Natus Vincere,17,7,5,5.0,1:22:57
2,Valorant Champions 2023,Group Stage,Opening (D),DRX vs LOUD,Lotus,DRX,13,7,5,1.0,LOUD,15,7,5,3.0,1:17:19


In [27]:
# Extracting map results and score difference.

Team_A_win = (og_maps_scores["Team A Score"] - og_maps_scores["Team B Score"]) > 0
Team_B_win = (og_maps_scores["Team B Score"] - og_maps_scores["Team A Score"]) > 0
Team_A_score_diff = og_maps_scores["Team A Score"] - og_maps_scores["Team B Score"]
Team_B_score_diff = og_maps_scores["Team B Score"] - og_maps_scores["Team A Score"]

map_results_with_score_diff = pd.DataFrame({
"Team_A_win": Team_A_win,
"Team_B_win": Team_B_win,
"Team_A_score_diff": Team_A_score_diff,
"Team_B_score_diff": Team_B_score_diff
})

In [28]:
maps_scores_results = pd.concat([og_maps_scores, map_results_with_score_diff], axis=1)

In [29]:
maps_scores_results.keys()

Index(['Tournament', 'Stage', 'Match Type', 'Match Name', 'Map', 'Team A',
       'Team A Score', 'Team A Attacker Score', 'Team A Defender Score',
       'Team A Overtime Score', 'Team B', 'Team B Score',
       'Team B Attacker Score', 'Team B Defender Score',
       'Team B Overtime Score', 'Duration', 'Team_A_win', 'Team_B_win',
       'Team_A_score_diff', 'Team_B_score_diff'],
      dtype='object')

In [30]:
# For now, I don't have a plan to separately analyze the first half and the second half.
# I also will not pay attention to overtime, so my main features will be the following.
maps_scores_results_main_features = [\
    "Team_A_win", "Team_A_score_diff", "Team_B_win", "Team_B_score_diff"\
      ]\

map_outcomes_AvsB\
    = maps_scores_results[\
        ["Tournament", "Stage", "Match Type", "Match Name", "Map", "Team A", "Team B"]\
        + maps_scores_results_main_features
        ]\
        


In [31]:
print(f"The size of map_outcome {len(map_outcomes_AvsB)} times 10 (5 players each from two teams) is")
print(f"the size of players_stats_agent_pick {len(players_stats_agent_pick)}, so it's all good!")

The size of map_outcome 830 times 10 (5 players each from two teams) is
the size of players_stats_agent_pick 8300, so it's all good!


In [32]:
# Let's add "Role" column to "players_stats_agent_pick".  This column indicates a role of an agent in each row.
# Remark: get_role function is more complicated than what we need here because I want to use it in different situation as well.
players_stats_agent_pick["Role"] = players_stats_agent_pick["Agents"].apply(get_role)

In [33]:
composition_dummies = pd.get_dummies(players_stats_agent_pick["Role"], dtype=int)
players_stats_team_comp = pd.concat([players_stats_agent_pick, composition_dummies], axis=1).round(2)

In [34]:
print(f"The size of players_stats_team_comp is {len(players_stats_team_comp)}.")
players_stats_team_comp.head(2)

The size of players_stats_team_comp is 8300.


Unnamed: 0,Tournament,Stage,Match Type,Match Name,Map,Player,Team,Agents,Rating,Average Combat Score,...,Assists Per Round,First Kills Per Round,First Deaths Per Round,Headshot %,Clutch Success %,Role,cont,duel,init,sent
0,Valorant Champions 2023,Group Stage,Opening (D),Team Liquid vs Natus Vincere,Fracture,nAts,Team Liquid,viper,1.26,263.0,...,0.18,0.14,0.09,0.31,0.27,cont,1,0,0,0
1,Valorant Champions 2023,Group Stage,Opening (D),Team Liquid vs Natus Vincere,Fracture,Sayf,Team Liquid,breach,0.96,170.0,...,0.42,0.0,0.04,0.23,0.33,init,0,0,1,0


In [35]:
players_stats_team_comp.keys()

Index(['Tournament', 'Stage', 'Match Type', 'Match Name', 'Map', 'Player',
       'Team', 'Agents', 'Rating', 'Average Combat Score',
       'Kill, Assist, Trade, Survive %', 'Average Damage Per Round',
       'Kills Per Round', 'Assists Per Round', 'First Kills Per Round',
       'First Deaths Per Round', 'Headshot %', 'Clutch Success %', 'Role',
       'cont', 'duel', 'init', 'sent'],
      dtype='object')

In [36]:
# If we use deep learning models, we probably don't need to do what I did in this cell.

stats_list = ['Rating', 'Average Combat Score',
       'Kill, Assist, Trade, Survive %', 'Average Damage Per Round',
       'Kills Per Round', 'Assists Per Round', 'First Kills Per Round',
       'First Deaths Per Round', 'Headshot %', 'Clutch Success %']
role_list = ['cont', 'duel', 'init', 'sent']

role_stats_dict = {}  # initiate a dictionary.  By multiplying "duel", "init",... with "Rating", "Average Combat Score", ...
                        # we have each role's stats of each team and each game/map

for stat in stats_list:
    for role in role_list:
        role_stats_dict[role+"_"+stat] = players_stats_team_comp[stat] * players_stats_team_comp[role]


In [37]:
# After adding role_stats_dict, we can drop the following columns.
columns_to_drop = ['Rating', 'Average Combat Score',
       'Kill, Assist, Trade, Survive %', 'Average Damage Per Round',
       'Kills Per Round', 'Assists Per Round', 'First Kills Per Round',
       'First Deaths Per Round', 'Headshot %', 'Clutch Success %', 'Role']

players_stats_team_comp_before_agg = pd.concat([players_stats_team_comp, pd.DataFrame(role_stats_dict)], axis=1).drop(columns_to_drop, axis=1)

In [38]:
# I will use this function to get team composition.
def sort_join(x):
    """
    input: array of strings
    output: sorted strings joined by ","
    """
    return ",".join(sorted(list(x)))


In [39]:
left =\
    players_stats_team_comp_before_agg.groupby(["Tournament", "Stage", "Match Type", "Match Name", "Map", "Team"]).agg({\
        "Player": sort_join,
        "Agents": sort_join
        })\
        .reset_index()\
        .rename(columns={"Agents":"Composition"})

right =\
    players_stats_team_comp_before_agg.drop(["Player", "Agents"], axis=1)\
    .groupby(["Tournament", "Stage", "Match Type", "Match Name", "Map", "Team"]).agg("sum")\
    .reset_index()


In [40]:
merge_keys_2 = ["Tournament", "Stage", "Match Type", "Match Name", "Map", "Team"]
stats_team_comp = pd.merge(left, right, on=merge_keys_2)  # This has both players stats and team composition data.

In [41]:
map_outcomes_AvsB.keys()

Index(['Tournament', 'Stage', 'Match Type', 'Match Name', 'Map', 'Team A',
       'Team B', 'Team_A_win', 'Team_A_score_diff', 'Team_B_win',
       'Team_B_score_diff'],
      dtype='object')

In [42]:
map_outcomes_A = map_outcomes_AvsB[['Tournament', 'Stage', 'Match Type', 'Match Name', 'Map', 'Team A','Team_A_win', 'Team_A_score_diff']]\
        .rename(columns={"Team A": "Team", "Team_A_win": "Win", "Team_A_score_diff":"Score_diff"})
map_outcomes_B = map_outcomes_AvsB[['Tournament', 'Stage', 'Match Type', 'Match Name', 'Map', 'Team B','Team_B_win', 'Team_B_score_diff']]\
        .rename(columns={"Team B": "Team", "Team_B_win": "Win", "Team_B_score_diff":"Score_diff"})

In [43]:
map_outcomes = pd.concat([map_outcomes_A, map_outcomes_B])
# In this dataframe, we don't compare Team A and Team B.  Only look at one team of each map/game.
#      |
#      v
stats_team_comp_outcomes = pd.merge(stats_team_comp, map_outcomes, on=merge_keys_2)

In [44]:
#stats_team_comp_outcomes

In [45]:
# Now I want to combine stats_team_comp and map_outcome_AvsB
# We can use the same merge keys, merge_keys_2 = ["Tournament", "Stage", "Match Type", "Match Name", "Map", "Team"]
# In this dataframe, we do "Team A" vs "Team B".
#       |
#       v
stats_team_comp_outcomes_AvsB = map_outcomes_AvsB\
                                    .rename(columns={"Team A":"Team"})\
                                    .merge(stats_team_comp, on=merge_keys_2)\
                                    .rename(columns={"Team": "Team_A"})\
                                    .rename(columns={"Team B":"Team"})\
                                    .merge(stats_team_comp, on=merge_keys_2, suffixes=("_sum_A", "_sum_B"))\
                                    .rename(columns={"Team": "Team_B"})\



In [46]:
#stats_team_comp_outcomes_AvsB

### To prevent data leakage.
- I will make "test_data" and "test_data2" containing players stats from earlier tournaments.\
Also, similarly "AvsB_test_data" and "AvsB_test_data2" which have "Team A" vs "Team B" structure.

- Players' stats in "test_data" and "AvsB_test_data" will be computed from smaller sets of tournaments than "test_data2" and "AvsB_test_data2".\
Their stats are based on the latest_tournaments that you can find in the following cell.

- Players' stats in "test_data2" and "AvsB_test_data2" will be computed from all tournaments but the last tournament which is "Valorant Champions 2023".

In [47]:
latest_tournaments = set(['Champions Tour 2023: Americas Last Chance Qualifier',
       'Champions Tour 2023: EMEA Last Chance Qualifier',
       'Champions Tour 2023: Pacific Last Chance Qualifier',
       'Champions Tour 2023: Champions China Qualifier',
       'Champions Tour 2023: Masters Tokyo'])
last_tournament = "Valorant Champions 2023"


In [48]:
train_data_stats_team_comp_outcomes = stats_team_comp_outcomes[stats_team_comp_outcomes.Tournament.isin(latest_tournaments)]
train_data_stats_team_comp_outcomes2 = stats_team_comp_outcomes[stats_team_comp_outcomes.Tournament != last_tournament]

test_tournament = stats_team_comp_outcomes[stats_team_comp_outcomes.Tournament == last_tournament]
AvsB_test_tournament = map_outcomes_AvsB[map_outcomes_AvsB.Tournament == last_tournament]

In [49]:
train_data_stats_team_comp_outcomes.keys()

Index(['Tournament', 'Stage', 'Match Type', 'Match Name', 'Map', 'Team',
       'Player', 'Composition', 'cont', 'duel', 'init', 'sent', 'cont_Rating',
       'duel_Rating', 'init_Rating', 'sent_Rating',
       'cont_Average Combat Score', 'duel_Average Combat Score',
       'init_Average Combat Score', 'sent_Average Combat Score',
       'cont_Kill, Assist, Trade, Survive %',
       'duel_Kill, Assist, Trade, Survive %',
       'init_Kill, Assist, Trade, Survive %',
       'sent_Kill, Assist, Trade, Survive %', 'cont_Average Damage Per Round',
       'duel_Average Damage Per Round', 'init_Average Damage Per Round',
       'sent_Average Damage Per Round', 'cont_Kills Per Round',
       'duel_Kills Per Round', 'init_Kills Per Round', 'sent_Kills Per Round',
       'cont_Assists Per Round', 'duel_Assists Per Round',
       'init_Assists Per Round', 'sent_Assists Per Round',
       'cont_First Kills Per Round', 'duel_First Kills Per Round',
       'init_First Kills Per Round', 'sent_First

In [50]:
train_stats_data =\
    train_data_stats_team_comp_outcomes\
        .drop(['Tournament', 'Stage', 'Match Type', 'Match Name',"Player", "Composition", 'Win', 'Score_diff'], axis=1)\
            .groupby(["Map", "Team"])\
                .agg("mean")\
                .reset_index()\
            
train_stats_data.head()

Unnamed: 0,Map,Team,cont,duel,init,sent,cont_Rating,duel_Rating,init_Rating,sent_Rating,...,init_First Deaths Per Round,sent_First Deaths Per Round,cont_Headshot %,duel_Headshot %,init_Headshot %,sent_Headshot %,cont_Clutch Success %,duel_Clutch Success %,init_Clutch Success %,sent_Clutch Success %
0,Ascent,Attacking Soul Esports,1.0,1.0,2.0,1.0,0.91,0.97,1.77,1.015,...,0.23,0.055,0.3,0.24,0.43,0.21,0.26,0.26,0.4,0.34
1,Ascent,Bilibili Gaming,1.0,1.0,2.0,1.0,0.88,0.92,1.9,1.07,...,0.2,0.09,0.23,0.21,0.44,0.27,0.19,0.11,0.42,0.25
2,Ascent,Cloud9,2.0,1.0,1.0,1.0,2.09,1.1,0.72,0.92,...,0.11,0.06,0.65,0.29,0.2,0.16,0.57,0.4,0.17,0.25
3,Ascent,DRX,1.0,1.0,2.0,1.0,1.28,1.0,1.49,0.8,...,0.1,0.13,0.42,0.22,0.59,0.2,0.09,1.0,0.63,0.22
4,Ascent,DetonatioN FocusMe,1.0,1.0,2.0,1.0,0.92,0.68,1.74,0.97,...,0.19,0.07,0.61,0.22,0.49,0.19,1.0,0.5,0.75,0.5


In [51]:
train_stats_data2 =\
    train_data_stats_team_comp_outcomes2\
        .drop(['Tournament', 'Stage', 'Match Type', 'Match Name', 'Player', 'Composition', 'Win', 'Score_diff'], axis=1)\
            .groupby(["Map", "Team"])\
                .agg("mean")\
                .reset_index()\
                
train_stats_data2.head()

Unnamed: 0,Map,Team,cont,duel,init,sent,cont_Rating,duel_Rating,init_Rating,sent_Rating,...,init_First Deaths Per Round,sent_First Deaths Per Round,cont_Headshot %,duel_Headshot %,init_Headshot %,sent_Headshot %,cont_Clutch Success %,duel_Clutch Success %,init_Clutch Success %,sent_Clutch Success %
0,Ascent,100 Thieves,1.75,1.0,2.0,0.25,1.4725,0.93,1.865,0.185,...,0.1575,0.0275,0.475,0.21,0.4925,0.0575,0.5075,0.26,0.67,0.025
1,Ascent,Attacking Soul Esports,1.0,1.0,2.0,1.0,0.91,0.97,1.77,1.015,...,0.23,0.055,0.3,0.24,0.43,0.21,0.26,0.26,0.4,0.34
2,Ascent,BBL Esports,1.0,1.0,2.0,1.0,1.04,1.156667,2.046667,0.993333,...,0.146667,0.09,0.351667,0.283333,0.525,0.266667,0.566667,0.241667,0.793333,0.323333
3,Ascent,Bilibili Gaming,1.0,1.0,2.0,1.0,0.88,0.92,1.9,1.07,...,0.2,0.09,0.23,0.21,0.44,0.27,0.19,0.11,0.42,0.25
4,Ascent,Cloud9,1.571429,1.0,1.428571,1.0,1.71,1.174286,1.338571,0.994286,...,0.104286,0.062857,0.437143,0.228571,0.357143,0.192857,0.538571,0.567143,0.421429,0.222857


In [52]:
# 'Win' and 'Score diff' are true values from the game, and we will add these teams stats from latest tournaments and the previous tournaments.
# Also, don't use "Player" in test_tournament for convenience.
# Consequently, test_data has 1 less feature ("Player") than stats_team_comp_outcomes (this contains actual values of maps/games in 2023).

test_data = test_tournament[['Tournament', 'Stage', 'Match Type', 'Match Name', 'Map', 'Team', 'Composition', 'Win','Score_diff']]\
                .merge(train_stats_data, on=["Map", "Team"], how="left")  # Didn't grap "Player" from the features of "test_tournament"

test_data2 = test_tournament[['Tournament', 'Stage', 'Match Type', 'Match Name', 'Map', 'Team', 'Composition', 'Win','Score_diff']]\
                .merge(train_stats_data2, on=["Map", "Team"], how="left")  # Didn't grap "Player" from the features of "test_tournament"


In [53]:
columns = test_data.keys()
columns_ignore = set(['Tournament', 'Stage', 'Match Type', 'Match Name','Composition', 'Win', 'Score_diff', 'Map', 'Team'])

In [54]:
# Run this cell to fill null values with average of team stats.
for column in columns:
    if column not in columns_ignore:
        test_data[column] = test_data\
                            .drop(['Tournament', 'Stage', 'Match Type', 'Match Name', 'Composition'],axis=1)\
                                .groupby(["Team"])[column].transform(lambda x: x.fillna(x.mean()))
        
for column in columns:
    if column not in columns_ignore:
        test_data2[column] = test_data2\
                            .drop(['Tournament', 'Stage', 'Match Type', 'Match Name', 'Composition'],axis=1)\
                                .groupby(["Team"])[column].transform(lambda x: x.fillna(x.mean()))

In [55]:
merge_keys3 = ['Tournament', 'Stage', 'Match Type', 'Match Name', 'Map', 'Team A', "Team_A_score_diff", "Team_A_win"]
merge_keys4 = ['Tournament', 'Stage', 'Match Type', 'Match Name', 'Map', 'Team B', "Team_B_score_diff", "Team_B_win"]

AvsB_test_data =\
    AvsB_test_tournament.merge(
        test_data.rename(columns={"Team":"Team A", "Score_diff":"Team_A_score_diff", "Win":"Team_A_win"})\
            , on=merge_keys3\
                , how="left"\
        )\
        .merge(
        test_data.rename(columns={"Team":"Team B", "Score_diff":"Team_B_score_diff", "Win":"Team_B_win"})\
            , on=merge_keys4\
                , how="left", suffixes=("_A","_B")\
        )\

AvsB_test_data2 =\
    AvsB_test_tournament.merge(
        test_data2.rename(columns={"Team":"Team A", "Score_diff":"Team_A_score_diff", "Win":"Team_A_win"})\
            , on=merge_keys3\
                , how="left"\
        )\
        .merge(
        test_data2.rename(columns={"Team":"Team B", "Score_diff":"Team_B_score_diff", "Win":"Team_B_win"})\
            , on=merge_keys4\
                , how="left", suffixes=("_A","_B")\
        )\

