In [1]:
import xgbStack
import pandas as pd

from itertools import combinations, permutations, product
from collections import Counter

match_data_path = "D:\\Github Repos\\csci-567\\lol_data\\matchinfo.csv"

INPUT_COLS_MATCHDATA = [
    "blueTeamTag",
    "redTeamTag",
    "bResult",
    # "rResult", #For experimental purposes only. Remove after final implementation
    "blueTop",
    "blueTopChamp",
    "blueJungle",
    "blueJungleChamp",
    "blueMiddle",
    "blueMiddleChamp",
    "blueADC",
    "blueADCChamp",
    "blueSupport",
    "blueSupportChamp",
    "redTop",
    "redTopChamp",
    "redJungle",
    "redJungleChamp",
    "redMiddle",
    "redMiddleChamp",
    "redADC",
    "redADCChamp",
    "redSupport",
    "redSupportChamp"
]

match_data = pd.read_csv(match_data_path, usecols=INPUT_COLS_MATCHDATA)
match_data = match_data.dropna(subset=['blueTeamTag', 'blueTop'], ignore_index=True) 

match_data.head()

Unnamed: 0,blueTeamTag,bResult,redTeamTag,blueTop,blueTopChamp,blueJungle,blueJungleChamp,blueMiddle,blueMiddleChamp,blueADC,...,redTop,redTopChamp,redJungle,redJungleChamp,redMiddle,redMiddleChamp,redADC,redADCChamp,redSupport,redSupportChamp
0,TSM,1,C9,Dyrus,Irelia,Santorin,RekSai,Bjergsen,Ahri,WildTurtle,...,Balls,Gnar,Meteos,Elise,Hai,Fizz,Sneaky,Sivir,LemonNation,Thresh
1,CST,0,DIG,Cris,Gnar,Impaler,Rengar,Jesiz,Ahri,Mash,...,Gamsu,Irelia,Crumbzz,JarvanIV,Shiphtur,Azir,CoreJJ,Corki,KiWiKiD,Annie
2,WFX,1,GV,Flaresz,Renekton,ShorterACE,Rengar,Pobelter,Fizz,Altec,...,Hauntzer,Sion,Saintvicious,LeeSin,Keane,Azir,Cop,Corki,BunnyFuFuu,Janna
3,TIP,0,TL,Rhux,Irelia,Rush,JarvanIV,XiaoWeiXiao,Leblanc,Apollo,...,Quas,Gnar,IWDominate,Nunu,Fenix,Lulu,KEITH,KogMaw,Xpecial,Janna
4,CLG,1,T8,Benny,Gnar,Xmithie,JarvanIV,Link,Lissandra,Doublelift,...,CaliTrlolz8,Sion,Porpoise8,RekSai,Slooshi8,Lulu,Maplestreet8,Corki,Dodo8,Annie


In [None]:
#Dynamically create the player data dataframe
lt_playerCols = [
    "blueTop",
    "blueJungle",
    "blueMiddle",
    "blueADC",
    "blueSupport",
    "redTop",
    "redJungle",
    "redMiddle",
    "redADC",
    "redSupport"
]

all_players = pd.unique(match_data[lt_playerCols].values.ravel())

# Display all unique players
print("\nAll Unique Players:")
print(all_players)
print(f"Number players: {len(all_players)}")

In [None]:
#Create the initial dataframe with the name field
players_df = pd.DataFrame(all_players, columns=['name'])

for player in all_players:
    players_df[player] = match_data[lt_playerCols].eq(player).any(axis=1).astype(int)

# Display the participation matrix
print("\Player Matrix:")
print(players_df)    



In [None]:
# Reshape the original DataFrame to have one player per row
modified_match_data = match_data.copy()
modified_match_data['match_id'] = range(1, len(modified_match_data) + 1)
melted_df = modified_match_data.melt(id_vars=['match_id'], value_vars=lt_playerCols,
                    var_name='player_position', value_name='name')

# Remove duplicates if necessary
unique_players_df = melted_df[['name']].drop_duplicates().reset_index(drop=True)

# Calculate the number of matches each player participated in
player_stats = melted_df.groupby('name').agg({'match_id': 'nunique'}).reset_index()
player_stats.rename(columns={'match_id': 'matches_played'}, inplace=True)

# Display player statistics
print("\nPlayer Statistics:")
print(player_stats)

In [3]:
#Matches won
modified_match_data = match_data.copy()
modified_match_data['match_id'] = range(1, len(modified_match_data) + 1)

blue_roles = modified_match_data[['match_id','blueJungle', 'blueTop', 'blueADC', 'blueSupport', 'blueMiddle']].melt(id_vars=['match_id'], value_name='player', var_name='role')
blue_roles['team'] = 'blue'

red_roles = modified_match_data[['match_id', 'redJungle', 'redTop',  'redADC', 'redSupport', 'redMiddle']].melt(id_vars=['match_id'], value_name='player', var_name='role')
red_roles['team'] = 'red'

# Concatenate blue and red roles to create a single DataFrame for all roles
all_roles = pd.concat([blue_roles, red_roles], ignore_index=True)

# Map role names to consistent format, e.g., blueJungle -> Jungle
all_roles['role'] = all_roles['role'].str.replace('blue|red', '', regex=True)

# Add the match ID to distinguish each match
# all_roles['MatchID'] = all_roles.index // (modified_match_data.shape[0] * 5)

# Add the win information for each match
# all_roles = all_roles.merge(modified_match_data[['bResult']], left_on='MatchID', right_index=True)
all_roles = all_roles.merge(modified_match_data[['match_id', 'bResult']], on='match_id')


# Determine if the player won based on their team and the match outcome
all_roles['Won'] = ((all_roles['team'] == 'blue') & (all_roles['bResult'] == 1)) | \
                   ((all_roles['team'] == 'red') & (all_roles['bResult'] == 0))

# Calculate the total number of matches played and the number of wins for each player-role combination
matches_played = all_roles.groupby(['player', 'role']).size().reset_index(name='Matches_Played')
matches_won = all_roles[all_roles['Won']].groupby(['player', 'role']).size().reset_index(name='Matches_Won')

# Merge the matches played and won counts
result_df = pd.merge(matches_played, matches_won, on=['player', 'role'], how='left').fillna(0)
result_df['Matches_Won'] = result_df['Matches_Won'].astype(int)
result_df['winRatio'] = result_df['Matches_Won'] / result_df['Matches_Played']

print(result_df)

      player    role  Matches_Played  Matches_Won  winRatio
0       007x  Jungle               6            0  0.000000
1        0hq     ADC               3            0  0.000000
2       2020     ADC              25            2  0.080000
3         3z     Top              76           40  0.526316
4       4LaN  Jungle              42           20  0.476190
...      ...     ...             ...          ...       ...
1725    xiye  Middle              43           24  0.558140
1726    xpng     ADC              49           28  0.571429
1727      y4     ADC               4            3  0.750000
1728     zig     Top             162           75  0.462963
1729  zoiren  Middle              14            6  0.428571

[1730 rows x 5 columns]


In [8]:
#Diagnostic checks for feature 1. Uses the results_df 

player_df = matches_won.groupby(['player']).size().reset_index()
player_list = list(player_df['player'])

# matching_rows = modified_match_data[modified_match_data.isin([playerName]).any(axis=1)]
# iCountEntries = len(matching_rows)

# Create a second DataFrame consisting of the returned entries and the column where the specified value was found
# matching_rows_with_column = matching_rows.copy()
# matching_rows_with_column['Found_In_Column'] = matching_rows.apply(
#     lambda row: ', '.join(
#         [col for col in modified_match_data.columns if row[col] == playerName]), axis=1
#     )

# print("Rows containing the specified value:")
# print(matching_rows_with_column['Found_In_Column'])

# i_matches_played = result_df[result_df['player'] == playerName]['Matches_Played'].item()

bAllMatched = True

for item in player_list:
    matching_rows = modified_match_data[modified_match_data.isin([item]).any(axis=1)]
    iCountEntries = len(matching_rows)

    try:
        numEntries = len(result_df[result_df['player'] == item])
        i_matches_played = result_df[result_df['player'] == item]['Matches_Played'].sum()
    except ValueError:
        print("Hello")

    if iCountEntries != i_matches_played:
        bAllMatched = False
        print(f"Values did not match for player: {item}.")

if bAllMatched:
    print("All entries matched.")

All entries matched.


In [None]:
#Generate Feature 2: playerChampion

blue_team = modified_match_data.melt(
    id_vars=['match_id', 'bResult'],
    value_vars=['blueJungle', 'blueTop', 'blueADC', 'blueSupport', 'blueMiddle'],
    var_name='position',
    value_name='player'
)

blue_team['champion'] = modified_match_data.melt(
    id_vars=['match_id'],
    value_vars=['blueTopChamp', 'blueJungleChamp', 'blueADCChamp', 'blueSupportChamp', 'blueMiddleChamp'],
    value_name='champion'
)['champion']

blue_team['team'] = 'blue'

red_team = modified_match_data.melt(
    id_vars=['match_id', 'bResult'],
    value_vars=['redJungle', 'redTop',  'redADC', 'redSupport', 'redMiddle'],
    var_name='position',
    value_name='player'
)
red_team['champion'] = modified_match_data.melt(
    id_vars=['match_id'],
    value_vars=['redTopChamp', 'redJungleChamp', 'redADCChamp', 'redSupportChamp', 'redMiddleChamp'],
    value_name='champion'
)['champion']

red_team['team'] = 'red'

# Combine blue and red team data
combined_df = pd.concat([blue_team, red_team])

# Calculate number of times a player has used a certain champion
usage_count = combined_df.groupby(['player', 'champion']).size().reset_index(name='usage_count')

# Calculate number of times a player has won with a certain champion
combined_df['win'] = (combined_df['team'] == 'blue') & (combined_df['bResult'] == 1) 

# Calculate number of times a player has won with a certain champion
combined_df['win'] = (combined_df['team'] == 'blue') & (combined_df['bResult'] == 1) | \
                     (combined_df['team'] == 'red') & (combined_df['bResult'] == 0)

win_count = combined_df[combined_df['win']].groupby(['player', 'champion']).size().reset_index(name='win_count')

# Merge usage and win counts
df_champion_wr = pd.merge(usage_count, win_count, how='left', on=['player', 'champion'])
df_champion_wr['win_count'] = df_champion_wr['win_count'].fillna(0)
df_champion_wr['win_ratio'] = df_champion_wr['win_count'] / df_champion_wr['usage_count']

print(df_champion_wr)

In [None]:
#Implement feature 3: coopPlayer


modified_match_data = match_data.copy()
modified_match_data['match_id'] = range(1, len(modified_match_data) + 1)

blue_teams = modified_match_data[['blueTop', 'blueMiddle', 'blueJungle', 'blueADC', 'blueSupport']].values.tolist()
red_teams = modified_match_data[['redTop', 'redMiddle', 'redJungle', 'redADC', 'redSupport']].values.tolist()

all_combinations = []

for team in blue_teams + red_teams:
    all_combinations.extend(combinations(sorted(team), 2))

# Use Counter to count occurrences of each pair
pair_counts = Counter(all_combinations)

# Create a DataFrame from the Counter
pair_df = pd.DataFrame(pair_counts.items(), columns=['Pair', 'Count'])

# Display the resulting DataFrame
print(pair_df)


In [None]:
#Using the above as the starting point, expand the example to compute the number of times 
#each pair has won and generate the win-ratio. The operations in this code block will
#provide the starting point for implementing feature 3 in the next code block below.

modified_match_data = match_data.copy()
modified_match_data['match_id'] = range(1, len(modified_match_data) + 1)

# Dictionary to track total pair counts and win counts
pair_counts = Counter()
pair_wins = Counter()

# Iterate through each row in the DataFrame
for _, row in modified_match_data.iterrows():
    # Blue team players and result
    blue_team = [row['blueTop'], row['blueMiddle'], row['blueJungle'], row['blueMiddle'] , row['blueADC'], row['blueSupport']]
    blue_win = row['bResult'] == 1
    
    # Red team players
    red_team = [row['redTop'], row['redMiddle'], row['redJungle'], row['redMiddle'], row['redADC'], row['redSupport']]
    
    # Count blue team pairs
    for player1, player2 in combinations(sorted(set(blue_team)), 2):
        pair = (player1, player2)
        pair_counts[pair] += 1
        if blue_win:
            pair_wins[pair] += 1
    
    # Count red team pairs
    for player1, player2 in combinations(sorted(set(red_team)), 2):
        pair = (player1, player2)
        pair_counts[pair] += 1
        if not blue_win:
            pair_wins[pair] += 1

# Create a DataFrame from the pair counts and win counts
pair_df = pd.DataFrame(
    {
        'pair': list(pair_counts.keys()),
        'games_played': list(pair_counts.values()),
        'num_wins': [pair_wins[pair] for pair in pair_counts.keys()]
    }
)

pair_df['win_ratio'] = pair_df['num_wins'] / pair_df['games_played']

#Vectorize the dataframe for fast lookup
pair_dt = pair_df.set_index('pair')['win_ratio'].to_dict()

print(pair_df)

In [None]:
#The example below takes about 2 minutes to compute a dataset with about 7k entries.

def get_pair_win_ratio(player1, player2, pair_dt):
    pair = tuple(sorted([player1, player2]))
    return pair_dt.get(pair, 0)

# Add columns for win ratios of the 10 possible combinations in each match
for i, row in modified_match_data.iterrows():
    blue_team = [row['blueTop'], row['blueMiddle'], row['blueJungle'], row['blueMiddle'] , row['blueADC'], row['blueSupport']]
    red_team = [row['redTop'], row['redMiddle'], row['redJungle'], row['redMiddle'], row['redADC'], row['redSupport']]
    
    # Get all combinations for blue and red teams
    blue_combinations = list(combinations(blue_team, 2))
    red_combinations = list(combinations(red_team, 2))
    
    # Calculate and assign win ratios for each pair in blue and red teams
    for idx, pair in enumerate(blue_combinations):
        modified_match_data.loc[i, f'bluePair_{idx+1}_win_ratio'] = get_pair_win_ratio(*pair, pair_dt)
    for idx, pair in enumerate(red_combinations):
        modified_match_data.loc[i, f'redPair_{idx+1}_win_ratio'] = get_pair_win_ratio(*pair, pair_dt)


In [None]:
#Alternate approach to above
modified_match_data = match_data.copy()
modified_match_data['match_id'] = range(1, len(modified_match_data) + 1)

# Function to compute pair win ratios for a given list of players
def compute_pair_win_ratios(players):
    pairs = list(combinations(sorted(set(players)), 2))
    return [pair_dt.get(tuple(sorted(set(pair))), 0) for pair in pairs]

# Apply the function to compute pair win ratios for blue and red teams
modified_match_data['blue_pairs'] = modified_match_data.apply(lambda row: compute_pair_win_ratios([row['blueTop'], row['blueMiddle'], row['blueJungle'], row['blueMiddle'] , row['blueADC'], row['blueSupport']]), axis=1)
modified_match_data['red_pairs'] = modified_match_data.apply(lambda row: compute_pair_win_ratios([row['redTop'], row['redMiddle'], row['redJungle'], row['redMiddle'], row['redADC'], row['redSupport']]), axis=1)

# Split the list of win ratios into separate columns for each pair
blue_pairs_df = modified_match_data['blue_pairs'].apply(pd.Series)
red_pairs_df = modified_match_data['red_pairs'].apply(pd.Series)

# Rename the columns to reflect blue and red pairs win ratios
blue_pairs_df.columns = [f'bluePair_{i+1}_Win_Ratio' for i in range(blue_pairs_df.shape[1])]
red_pairs_df.columns = [f'redPair_{i+1}_Win_Ratio' for i in range(red_pairs_df.shape[1])]

# Concatenate the original DataFrame with the new pair win ratio columns
df = pd.concat([modified_match_data, blue_pairs_df, red_pairs_df], axis=1)

# Drop the intermediate columns used for storing lists of pairs
df.drop(columns=['blue_pairs', 'red_pairs'], inplace=True)

# Display the resulting DataFrame
print(df)


In [None]:
#Feature implmentation iteration 3: summate all win_ratios.
modified_match_data = match_data.copy()
modified_match_data['match_id'] = range(1, len(modified_match_data) + 1)

# Function to compute the sum of pair win ratios for a given list of players
def compute_sum_pair_win_ratios(players):
    pairs = list(combinations(sorted(set(players)), 2))
    return sum(pair_dt.get(tuple(sorted(set(pair))), 0) for pair in pairs)

# Apply the function to compute the sum of win ratios for blue and red teams
modified_match_data['sumCoopPlayerBlue'] = modified_match_data.apply(lambda row: compute_sum_pair_win_ratios([row['blueTop'], row['blueMiddle'], row['blueJungle'], row['blueMiddle'] , row['blueADC'], row['blueSupport']]), axis=1)
modified_match_data['sumCoopPlayerRed'] = modified_match_data.apply(lambda row: compute_sum_pair_win_ratios([row['redTop'], row['redMiddle'], row['redJungle'], row['redMiddle'], row['redADC'], row['redSupport']]), axis=1)

print(modified_match_data.head())

In [None]:
#Generating the playerVS df in preparation for implementing feature 4. 
from itertools import product

modified_match_data = match_data.copy()
modified_match_data['match_id'] = range(1, len(modified_match_data) + 1)

roles =['Jungle', 'Top', 'ADC', 'Support', 'Middle']

# Extract blue and red players into separate DataFrames
blue_players_df = modified_match_data[['blueTop', 'blueJungle', 'blueMiddle', 'blueADC', 'blueSupport']]
red_players_df = modified_match_data[['redTop', 'redJungle', 'redMiddle', 'redADC', 'redSupport']]

# Flatten the blue and red players DataFrames into lists of tuples containing player pairs
blue_champs = blue_players_df.values.tolist()
red_champs = red_players_df.values.tolist()

# Create a list of all matchups by pairing blue and red players
matchups = []
num_wins = []

for blue, red, blue_win in zip(blue_champs, red_champs, modified_match_data['bResult']):
    for blue_champ, red_champ in product(blue, red):
        matchups.append((blue_champ, red_champ))

        # If blue team won, increment win for blue player, otherwise for red player
        if blue_win == 1:            
            num_wins.append((blue_champ, red_champ, 1))
        else:
            num_wins.append((blue_champ, red_champ, 0))

# Create a DataFrame from the matchups and count occurrences
matchups_df = pd.DataFrame(matchups, columns=['player', 'opponent'])
matchups_df['numPlayed'] = 1
matchups_summary = matchups_df.groupby(['player', 'opponent'], as_index=False)['numPlayed'].sum()

# Create a DataFrame from the num_wins and calculate the number of wins
num_wins_df = pd.DataFrame(num_wins, columns=['player', 'opponent', 'numWins'])
num_wins_summary = num_wins_df.groupby(['player', 'opponent'], as_index=False)['numWins'].sum()

# Merge the matchup counts and win counts DataFrames
final_df = pd.merge(matchups_summary, num_wins_summary, on=['player', 'opponent'], how='left')

# Fill NaN values in numWins with 0
final_df['numWins'] = final_df['numWins'].fillna(0).astype(int)
final_df['win_ratio'] = final_df['numWins'] / final_df['numPlayed']

print(final_df.sort_values(by='numPlayed', ascending=False))

dt_champion_vs = final_df.set_index(['player', 'opponent'])['win_ratio'].to_dict()

In [None]:
#Implementation of feature 4.
modified_match_data = match_data.copy()
modified_match_data['match_id'] = range(1, len(modified_match_data) + 1)
subset_data = modified_match_data.head(10)

result_rows = []
dt_player_vs_mappings = final_df.set_index(['player', 'opponent'])['win_ratio'].to_dict()

# Function to map win_ratios for a single match
def map_win_ratios(row, win_dict):
    blue_roles = ['blueTop', 'blueJungle', 'blueMiddle', 'blueADC', 'blueSupport']
    red_roles = ['redTop', 'redJungle', 'redMiddle', 'redADC', 'redSupport']    
    total = 0.0

    for blue_role in blue_roles:
        blue_player = row[blue_role]

        for red_role in red_roles:
            red_player = row[red_role]

            # Retrieve the win_ratio from win_dict, handle missing data with None or a default value
            win_ratio = win_dict.get((blue_player, red_player))
            # win_ratios[column_name] = win_ratio
            total += win_ratio
            
            
    # return pd.Series(win_ratios)
    return total


win_ratios_df = modified_match_data.apply(map_win_ratios, axis=1, win_dict=dt_champion_vs)
modified_match_data['vsPlayer'] = win_ratios_df

print(modified_match_data)

# Loop through each blue player and each red player to get win ratios
# for blue_player in blue_players:
#     for red_player in red_players:
#         key = (blue_player, red_player)
#         win_ratio = dt_player_vs_mappings.get(key, None)
#         result_rows.append({
#             'blue_player': blue_player,
#             'red_player': red_player,
#             'win_ratio': win_ratio
#         })

# Create a DataFrame from the results
# df_win_ratios = pd.DataFrame(result_rows)


# print(df_win_ratios)



In [None]:
#Implementation of feature 6: coopChampion-blue/red
modified_match_data = match_data.copy()
modified_match_data['match_id'] = range(1, len(modified_match_data) + 1)
modified_match_data['bResult'] = match_data['bResult']

blue_roles = ['blueTopChamp', 'blueJungleChamp', 'blueMiddleChamp', 'blueADCChamp', 'blueSupportChamp']
red_roles = ['redTopChamp', 'redJungleChamp', 'redMiddleChamp', 'redADCChamp', 'redSupportChamp']

blue_team = pd.DataFrame()
red_team = pd.DataFrame()

blue_team = modified_match_data.melt(
    id_vars=['match_id', 'bResult'],
    value_vars=['blueTop', 'blueJungle', 'blueMiddle', 'blueADC', 'blueSupport', ],
    var_name='position',
    value_name='player'
)

blue_team['champion'] = modified_match_data.melt(
    id_vars=['match_id'],
    value_vars=['blueTopChamp', 'blueJungleChamp', 'blueMiddleChamp', 'blueADCChamp', 'blueSupportChamp'],
    value_name='champion'
)['champion']

blue_team['team'] = 'blue'

red_team = modified_match_data.melt(
    id_vars=['match_id', 'bResult'],
    value_vars=['redTop', 'redJungle', 'redMiddle', 'redADC', 'redSupport'],
    var_name='position',
    value_name='player'
)
red_team['champion'] = modified_match_data.melt(
    id_vars=['match_id'],
    value_vars=['redTopChamp', 'redJungleChamp', 'redMiddleChamp', 'redADCChamp', 'redSupportChamp'],
    value_name='champion'
)['champion']

red_team['team'] = 'red'

# Combine blue and red team data
combined_df = pd.concat([blue_team, red_team])

# Calculate number of times a player has won with a certain champion
combined_df['win'] = (combined_df['team'] == 'blue') & (combined_df['bResult'] == 1) | \
                    (combined_df['team'] == 'red') & (combined_df['bResult'] == 0)

# Calculate number of times a player has used a certain champion
combined_df['position'] = combined_df['position'].str.replace('red|blue', '', regex=True)

win_count = combined_df[combined_df['win']].groupby(['champion', 'position']).size().reset_index(name='win_count')
usage_count = combined_df.groupby(['champion', 'position']).size().reset_index(name='usage_count')

# Merge usage and win counts
df_champion_wr = pd.merge(usage_count, win_count, how='left', on=['champion', 'position'])
df_champion_wr['win_count'] = df_champion_wr['win_count'].fillna(0)
df_champion_wr['win_ratio'] = df_champion_wr['win_count'] / df_champion_wr['usage_count']
                                   
print(df_champion_wr)

In [None]:
#Implementation of feature 6
#Uses df_champion_wr in above code frame.

modified_match_data = match_data.copy()

TEAM_COLOR = ['blue', 'red']
TEAM_ROLE = ["Top", "Jungle", "Middle", "ADC", "Support"]

# Convert the matches DataFrame to a long format
df_long = pd.melt(modified_match_data.reset_index(), id_vars=['index'], 
                  value_vars=['blueTopChamp', 'blueJungleChamp', 'blueMiddleChamp', 'blueADCChamp', 'blueSupportChamp',
                              'redTopChamp', 'redJungleChamp', 'redMiddleChamp', 'redADCChamp', 'redSupportChamp'],
                  var_name='team_role', value_name='champion')

df_long['position'] = df_long['team_role'].str.replace('red|blue', '', regex=True).replace('Champ', '', regex=True)
df_merged = df_long.merge(df_champion_wr, how='left', on=['champion', 'position'])
df_result = df_merged.pivot(index='index', columns='team_role', values='win_ratio')

# print(df_result)


player_combos = list(combinations(TEAM_ROLE, 2))
print(player_combos)

testDf = pd.DataFrame()
testDf = modified_match_data.copy()

# testDf['blueWinRate'] = 0.0

totalWinRate = 0.0
for teamColor in ['red', 'blue']:
    testDf[f'{teamColor}WinRate'] = 0.0

    for pair in player_combos:
        combinedLabel_1 = f"{teamColor}{pair[0]}Champ"
        combinedLabel_2 = f"{teamColor}{pair[1]}Champ"
        # print(f"{combinedLabel_1}, {combinedLabel_2}")
        testDf[f"{teamColor}WinRate"] += df_result[combinedLabel_1] + df_result[combinedLabel_2]
    
print(testDf)

In [10]:
#Implementation of feature 7 - based on "generate_player_vs_df" implementation from load_data.py
modified_match_data = match_data.head(10).copy()
modified_match_data['match_id'] = range(1, len(modified_match_data) + 1)

blue_champs_df = modified_match_data[['blueTopChamp', 'blueMiddleChamp', 'blueJungleChamp', 'blueADCChamp', 'blueSupportChamp']]
red_champs_df =  modified_match_data[['redTopChamp', 'redMiddleChamp', 'redJungleChamp', 'redADCChamp', 'redSupportChamp']]

blue_champs = blue_champs_df.values.tolist()
red_champs = red_champs_df.values.tolist()

matchups = []
num_wins = []

for blue, red, blue_win in zip(blue_champs, red_champs, modified_match_data['bResult']):
    for blue_player, red_player in product(blue, red):
        matchups.append((blue_player, red_player))

        # If blue team won, increment win for blue player, otherwise for red player
        if blue_win == 1:
            num_wins.append((blue_player, red_player, 1, 0))
        else:
            num_wins.append((blue_player, red_player, 0, 1))

# Create a DataFrame from the matchups and count occurrences
matchups_df = pd.DataFrame(matchups, columns=['champion', 'opponent'])
matchups_count_df = matchups_df.groupby(['champion', 'opponent']).size().reset_index(name='numPlayed')

# Create a DataFrame from the num_wins and calculate the number of wins
num_wins_df = pd.DataFrame(num_wins, columns=['champion', 'opponent', 'championWins', 'opponentWins'])
num_wins_summary = num_wins_df.groupby(['champion', 'opponent']).sum().reset_index()
num_wins_summary['numWins'] = num_wins_summary['championWins']

# Merge the matchup counts and win counts DataFrames
champion_vs_df = pd.merge(matchups_count_df, num_wins_summary[['champion', 'opponent', 'numWins']], 
                    on=['champion', 'opponent'], how='left')

# Fill NaN values in numWins with 0
champion_vs_df['numWins'] = champion_vs_df['numWins'].fillna(0).astype(int)
champion_vs_df['win_ratio'] = champion_vs_df['numWins'] / champion_vs_df['numPlayed']

dt_champion_vs = champion_vs_df.set_index(['champion', 'opponent'])['win_ratio'].to_dict()

# print(dt_champion_vs)

def process_feature7(x_dataset, dt_pair_data):
    modified_match_data = x_dataset.copy()
    modified_match_data['match_id'] = range(1, len(modified_match_data) + 1)

    # Apply the function to compute the sum of win ratios for blue and red teams
    modified_match_data['vsChampion'] = modified_match_data.apply(
        lambda row: compute_sum_pair_win_ratios([row['blueTopChamp'], row['blueJungleChamp'], row['blueMiddleChamp'] , 
                                                 row['blueADCChamp'], row['blueSupportChamp']], 
                                                 dt_pair_data), 
                                                 axis=1)

    return modified_match_data

def compute_sum_pair_win_ratios(players, pair_dt):
    pairs = list(combinations(sorted(set(players)), 2))
    return sum(pair_dt.get(tuple(sorted(set(pair))), 0) for pair in pairs)

test = process_feature7(modified_match_data, dt_champion_vs)

print(test)

  blueTeamTag  bResult redTeamTag      blueTop blueTopChamp  blueJungle  \
0         TSM        1         C9        Dyrus       Irelia    Santorin   
1         CST        0        DIG         Cris         Gnar     Impaler   
2         WFX        1         GV      Flaresz     Renekton  ShorterACE   
3         TIP        0         TL         Rhux       Irelia        Rush   
4         CLG        1         T8        Benny         Gnar     Xmithie   
5         DIG        0        TIP        Gamsu     Kassadin     Crumbzz   
6         CST        1        WFX         Cris       Irelia     Impaler   
7          TL        1        CLG         Quas     Renekton  IWDominate   
8          C9        0         GV        Balls         Sion      Meteos   
9          T8        1        TSM  CaliTrlolz8       Irelia   Porpoise8   

  blueJungleChamp   blueMiddle blueMiddleChamp       blueADC  ...  \
0          RekSai     Bjergsen            Ahri    WildTurtle  ...   
1          Rengar        Jesiz      

In [24]:
#Implementation of feature 8

modified_match_data = match_data.copy()
modified_match_data['match_id'] = range(1, len(modified_match_data) + 1)

modified_match_data['bNumWins'] = match_data['bResult'].apply(lambda x: 1 if x == 1 else 0)
modified_match_data['rNumWins'] = match_data['bResult'].apply(lambda x: 1 if x == 0 else 0)

blue_team_df = modified_match_data.groupby('blueTeamTag').agg({
    'bResult': 'size',           # Total count of occurrences for each blue team.
    'bNumWins': 'sum'           # Sum of 'numWins' to count where 'bResult' equals 1
}).reset_index()

red_team_df = modified_match_data.groupby('redTeamTag').agg({
    'bResult': 'size',           # Total count of occurrences for each blue team.
    'rNumWins': 'sum'           # Sum of 'numWins' to count where 'bResult' equals 1
}).reset_index()

blue_team_df.columns = ['blueTeamTag', 'matchesPlayed', 'numWins']
red_team_df.columns = ['redTeamTag', 'matchesPlayed', 'numWins']

blue_team_df['win_ratio'] = blue_team_df['numWins'] / blue_team_df['matchesPlayed']
red_team_df['win_ratio'] = red_team_df['numWins'] / red_team_df['matchesPlayed']

blue_team_dt = blue_team_df.set_index(['blueTeamTag'])['win_ratio'].to_dict()
red_team_dt = red_team_df.set_index(['redTeamTag'])['win_ratio'].to_dict()

print(red_team_df.sort_values(by=['matchesPlayed', 'numWins'], ascending=False))

    redTeamTag  matchesPlayed  numWins  win_ratio
193        SKT            208      141   0.677885
212        TSM            172      103   0.598837
197        SSG            155       82   0.529032
25          C9            152       88   0.578947
108        JAG            150       64   0.426667
..         ...            ...      ...        ...
194        SPA              1        0   0.000000
209        TRC              1        0   0.000000
211         TS              1        0   0.000000
234         YC              1        0   0.000000
235        ZEN              1        0   0.000000

[243 rows x 4 columns]


In [28]:
modified_match_data = match_data.copy()

modified_match_data['bTeamColor'] = modified_match_data['blueTeamTag'].map(blue_team_dt)
modified_match_data['rTeamColor'] = modified_match_data['redTeamTag'].map(red_team_dt)

print(modified_match_data.head())

  blueTeamTag  bResult redTeamTag  blueTop blueTopChamp  blueJungle  \
0         TSM        1         C9    Dyrus       Irelia    Santorin   
1         CST        0        DIG     Cris         Gnar     Impaler   
2         WFX        1         GV  Flaresz     Renekton  ShorterACE   
3         TIP        0         TL     Rhux       Irelia        Rush   
4         CLG        1         T8    Benny         Gnar     Xmithie   

  blueJungleChamp   blueMiddle blueMiddleChamp     blueADC  ...     redJungle  \
0          RekSai     Bjergsen            Ahri  WildTurtle  ...        Meteos   
1          Rengar        Jesiz            Ahri        Mash  ...       Crumbzz   
2          Rengar     Pobelter            Fizz       Altec  ...  Saintvicious   
3        JarvanIV  XiaoWeiXiao         Leblanc      Apollo  ...    IWDominate   
4        JarvanIV         Link       Lissandra  Doublelift  ...     Porpoise8   

  redJungleChamp redMiddle redMiddleChamp        redADC redADCChamp  \
0          Elis