In [3]:
import pandas as pd
import numpy as np

In [4]:
# Load data
match_df = pd.read_csv('compiled_data\match_data')
character_names = pd.read_csv('compiled_data\character_names')

# Merge the data using the character ids
match_df = match_df.merge(character_names, left_on='character', right_on='characterID')

# Replace the 'character' column with the 'name' column
match_df['character'] = match_df['name']

# Drop unnecessary columns
match_df = match_df.drop(columns=['Unnamed: 0', 'characterID', 'name'])

match_df.head()

Unnamed: 0,character,date,game_mode,league,match_id,ranked,win
0,Lucie,2018-01-01 00:00:03,3V3,1,18E42344815440E3BC148E93658B0E2C,False,False
1,Lucie,2018-01-01 00:00:05,2V2,3,7AF171370D804737BEC036A41A369BBF,False,False
2,Lucie,2018-01-01 00:00:11,2V2,0,8CB6BA4D019E4FAC9D713BAA1D0C20B2,False,False
3,Lucie,2018-01-01 00:00:11,2V2,3,E7F34F0CF2694C7597B973FAF2D31E03,False,True
4,Lucie,2018-01-01 00:00:21,3V3,1,210B90DBDD6A4A23AEC467BDEC6C6980,True,False


In [5]:
# Find all 2v2 matches
all_2v2 = match_df.loc[match_df.game_mode == '2V2']

# Find all 3v3 matches
all_3v3 = match_df.loc[match_df.game_mode == '3V3']

'''Casual matches'''
# Find all casual matches
casual = match_df.loc[match_df.ranked == False]

# Find 2v2 casual matches
casual2v2 = casual.loc[casual.game_mode == '2V2']

# Find 3v3 casual matches
casual3v3 = casual.loc[casual.game_mode == '3V3']

'''Ranked matches'''
# Find all ranked matches
ranked = match_df.loc[match_df.ranked == True]

# Find 2v2 ranked matches
ranked2v2 = ranked.loc[ranked.game_mode == '2V2']

# Find 3v3 ranked matches
ranked3v3 = ranked.loc[ranked.game_mode == '3V3']

In [78]:
def win_rates(df, format_str, type_str):
    '''
    Returns the win rates of the input DataFrame
    '''
    if format_str != 'league':
        # Find the win rates
        temp = df.groupby('character').win.sum() / df.groupby('character').win.count()
        
        # Rename 'win' column to 'win_rate'
        win_rates = pd.DataFrame(temp).reset_index().rename(columns={'win':'win_rate'})
        
        # Add 'format' and 'type' column
        win_rates['format'] = format_str
        win_rates['type'] = type_str
    else:
        # Find the ranked pick rates by league
        temp = df.groupby(['character', 'league']).win.sum() / df.groupby(['character', 'league']).win.count()

        # Unstack groupby object
        win_rates = temp.unstack()

        # Rename columns
        rename = {}
        leagues = ['bronze', 'silver', 'gold', 'platinum', 'diamond', 'champion', 'grand_champ']
        for i in range(7):
            rename[i] = leagues[i]
        win_rates = win_rates.rename(columns=rename)
        
        # Replace null values with zeros
        win_rates = win_rates.replace(np.nan, 0)
        
        # Stack column names, and reset index
        win_rates = win_rates.stack().reset_index()
        
        # Rename 'league' column to 'format',
        # label the 'win_rate' column,
        # and add 'type' column
        win_rates = win_rates.rename(columns={'league':'format', 0:'win_rate'})
        win_rates['type'] = type_str
    
    return win_rates

In [79]:
'''Overall win rates'''
# Find the overall win rates
overall_win_rates = win_rates(match_df, 'overall', 'overall')

# Find the overall 2v2 win rates
overall_win_rates_2v2 = win_rates(all_2v2, 'overall', '2v2')

# Find the overall 3v3 win rates
overall_win_rates_3v3 = win_rates(all_3v3, 'overall', '3v3')

# Concatenate into one DataFrame
overall_win_rates = pd.concat([overall_win_rates, overall_win_rates_2v2, overall_win_rates_3v3], ignore_index=True)

'''Casual win rates'''
# Find the casual win rates
casual_win_rates = win_rates(casual, 'casual', 'overall')

# Find the 2v2 casual win rates
casual_win_rates_2v2 = win_rates(casual2v2, 'casual', '2v2')

# Find the 3v3 casual win rates
casual_win_rates_3v3 = win_rates(casual3v3, 'casual', '2v2')

# Concatenate into one DataFrame
casual_win_rates = pd.concat([casual_win_rates, casual_win_rates_2v2, casual_win_rates_3v3], ignore_index=True)

'''Ranked win rates'''
# Find the ranked win rates
ranked_win_rates = win_rates(ranked, 'ranked', 'overall')

# Find the 2v2 ranked win rates
ranked_win_rates_2v2 = win_rates(ranked2v2, 'ranked', '2v2')

# Find the 3v3 ranked win rates
ranked_win_rates_3v3 = win_rates(ranked3v3, 'ranked', '3v3')

# Concatenate into one DataFrame
ranked_win_rates = pd.concat([ranked_win_rates, ranked_win_rates_2v2, ranked_win_rates_3v3], ignore_index=True)

'''League win rates'''
# Find the ranked win rates by league
league_win_rates = win_rates(ranked, 'league', 'overall')

# Find the ranked win rates by league
league_win_rates_2v2 = win_rates(ranked2v2, 'league', '2v2')

# Find the ranked win rates by league
league_win_rates_3v3 = win_rates(ranked3v3, 'league', '3v3')

# Concatenate into one DataFrame
league_win_rates = pd.concat([league_win_rates, league_win_rates_2v2, league_win_rates_3v3], ignore_index=True)


'''Combine all win rates'''
combined_win_rates = pd.concat([overall_win_rates, casual_win_rates, ranked_win_rates, league_win_rates], ignore_index=True)
combined_win_rates.to_csv('compiled_data\combined_win_rates')

In [74]:
def pick_rates(df, format_str, type_str):
    '''
    Returns the win rates of the input DataFrame
    '''
    if format_str != 'league':
        # Find the overall pick rates
        temp = match_df.groupby('character').win.count() / match_df.win.count()

        # Rename 'win' column to 'pick_rate
        pick_rates = pd.DataFrame(temp).reset_index().rename(columns={'win':'pick_rate'})

        # Add 'format' and 'type' column
        pick_rates['format'] = format_str
        pick_rates['type'] = type_str
    else:
        # Find the ranked pick rates by league
        temp = ranked.groupby(['character', 'league']).win.count() / ranked.groupby( 'league').win.count()
        
        # Unstack groupby object
        pick_rates = temp.unstack()
        
        # Rename columns
        rename = {}
        leagues = ['bronze', 'silver', 'gold', 'platinum', 'diamond', 'champion', 'grand_champ']
        for i in range(7):
            rename[i] = leagues[i]
        pick_rates = pick_rates.rename(columns=rename)
        
        # Replace null values with zeros
        pick_rates = pick_rates.replace(np.nan, 0)
        
        # Stack column names, and reset index
        pick_rates = pick_rates.stack().reset_index()
        
        # Rename 'league' column to 'format',
        # label the 'pick_rate' column,
        # and add 'type' column
        pick_rates = pick_rates.rename(columns={'league':'format', 0:'pick_rate'})
        pick_rates['type'] = type_str
    
    return pick_rates

In [75]:
'''Overall pick rates'''
# Find the overall pick rates
overall_pick_rates = pick_rates(match_df, 'overall', 'overall')

# Find the overall 2v2 pick rates
overall_pick_rates_2v2 = pick_rates(all_2v2, 'overall', '2v2')

# Find the overall 3v3 pick rates
overall_pick_rates_3v3 = pick_rates(all_3v3, 'overall', '3v3')

# Concatenate into one DataFrame
overall_pick_rates = pd.concat([overall_pick_rates, overall_pick_rates_2v2, overall_pick_rates_3v3], ignore_index=True)

'''Casual pick rates'''
# Find the casual pick rates
casual_pick_rates = pick_rates(casual, 'casual', 'overall')

# Find the 2v2 casual pick rates
casual_pick_rates_2v2 = pick_rates(casual2v2, 'casual', '2v2')

# Find the 3v3 casual pick rates
casual_pick_rates_3v3 = pick_rates(casual3v3, 'casual', '2v2')

# Concatenate into one DataFrame
casual_pick_rates = pd.concat([casual_pick_rates, casual_pick_rates_2v2, casual_pick_rates_3v3], ignore_index=True)

'''Ranked pick rates'''
# Find the ranked pick rates
ranked_pick_rates = pick_rates(ranked, 'ranked', 'overall')

# Find the 2v2 ranked pick rates
ranked_pick_rates_2v2 = pick_rates(ranked2v2, 'ranked', '2v2')

# Find the 3v3 ranked pick rates
ranked_pick_rates_3v3 = pick_rates(ranked3v3, 'ranked', '3v3')

# Concatenate into one DataFrame
ranked_win_rates = pd.concat([ranked_pick_rates, ranked_pick_rates_2v2, ranked_pick_rates_3v3], ignore_index=True)

'''League pick rates'''
# Find the ranked pick rates by league
league_pick_rates = pick_rates(ranked, 'league', 'overall')

# Find the ranked pick rates by league
league_pick_rates_2v2 = pick_rates(ranked2v2, 'league', '2v2')

# Find the ranked pick rates by league
league_pick_rates_3v3 = pick_rates(ranked3v3, 'league', '3v3')

# Concatenate into one DataFrame
league_pick_rates = pd.concat([league_pick_rates, league_pick_rates_2v2, league_pick_rates_3v3], ignore_index=True)


'''Combine all pick rates'''
combined_pick_rates = pd.concat([overall_pick_rates, casual_pick_rates, ranked_pick_rates, league_pick_rates], ignore_index=True)
combined_pick_rates.to_csv('compiled_data\combined_pick_rates')