In [1]:
import pandas as pd
raw_df = pd.read_csv("data/match_map_stats.csv") #The match stats from https://overwatchleague.com/en-us/statslab
pd.options.mode.chained_assignment = None  #Removes certain warnings
                                                
    

In [2]:
match_ids = raw_df.match_id.unique() #The list of all matches

In [3]:
#Start building the master dataframe
team_one_list = []
team_two_list = []
date_list = []
winner_list = []
stage_list = []


#We use the following loop to go through each match_id and grab information to build the main_dataframe
for m in match_ids:
    temp_df = raw_df[raw_df['match_id'] == m]
    team_one_list.append(temp_df.team_one_name.unique()[0])
    team_two_list.append(temp_df.team_two_name.unique()[0])
    date_list.append(temp_df.round_start_time.unique()[0])
    winner_list.append(temp_df.match_winner.unique()[0])
    stage_list.append(temp_df.stage.unique()[0])
    
#Now that the information is stored in lists we can zip it up and create the working master dataframe
zipped_data = list(zip(match_ids, team_one_list, team_two_list, date_list, stage_list, winner_list))
columns = ['id', 'team_one', 'team_two', 'old_date', 'stage', 'winner']    
df = pd.DataFrame(zipped_data, columns=columns)

df['date'] = ""

#Make sure the date column is in the proper format
df['old_date']=pd.to_datetime(df['old_date'])



In [4]:
#Let's remove the time from datetime.

id_list = df.id.unique()
for i in id_list:
    old_date = ((df[df['id'] == i]).old_date).iloc[0]
    #print(old_date)
    new_date = old_date.date()
    #print(new_date)
    mask = (df['id'] == i)
    df['date'][mask] = new_date

In [5]:
#Drop old_date
df.drop(['old_date'], axis=1, inplace=True)

In [6]:
#Make sure date is properly formatted
df['date']=pd.to_datetime(df['date'])

In [7]:
#We export a template to be used for upcoming matches here
df.to_csv('data/load_upcoming_here.csv')

In [8]:
#Set the corona_virus_isolation feature.  This will be True if games are being played while in isolation because of the pandemic
df['corona_virus_isolation'] = False

#We will use a mask to properly set the feature:
#TODO
mask = (df['date'] > '03/20/2020')
df['corona_virus_isolation'] = mask

In [9]:
#Initiate some features:
df['t1_wins_season'] = ""
df['t1_losses_season'] = ""
df['t2_wins_season'] = ""
df['t2_losses_season'] = ""
df['t1_matches_season'] = ""
df['t2_matches_season'] = ""
df['t1_win_percent_season'] = ""
df['t2_win_percent_season'] = ""
df['t1_wins_alltime'] = ""
df['t1_losses_alltime'] = ""
df['t2_wins_alltime'] = ""
df['t2_losses_alltime'] = ""
df['t1_matches_alltime'] = ""
df['t2_matches_alltime'] = ""
df['t1_win_percent_alltime'] = ""
df['t2_win_percent_alltime'] = ""
df['t1_wins_last_3'] = ""
df['t1_losses_last_3'] = ""
df['t2_wins_last_3'] = ""
df['t2_losses_last_3'] = ""
df['t1_win_percent_last_3'] = ""
df['t2_win_percent_last_3'] = ""
df['t1_wins_last_5'] = ""
df['t1_losses_last_5'] = ""
df['t2_wins_last_5'] = ""
df['t2_losses_last_5'] = ""
df['t1_win_percent_last_5'] = ""
df['t2_win_percent_last_5'] = ""
df['t1_wins_last_10'] = ""
df['t1_losses_last_10'] = ""
df['t2_wins_last_10'] = ""
df['t2_losses_last_10'] = ""
df['t1_win_percent_last_10'] = ""
df['t2_win_percent_last_10'] = ""
df['t1_wins_last_20'] = ""
df['t1_losses_last_20'] = ""
df['t2_wins_last_20'] = ""
df['t2_losses_last_20'] = ""
df['t1_win_percent_last_20'] = ""
df['t2_win_percent_last_20'] = ""
df['t1_place_last_season'] = ""
df['t2_place_last_season'] = ""
df['t1_wins_vs_t2'] = ""
df['t1_losses_vs_t2'] = ""
df['t1_matches_vs_t2'] = ""
df['t1_win_percent_vs_t2'] = ""
df['winner_label'] = ""

### The following are functions that I will use to fill in the features above

In [10]:
from functions import set_winners, find_last_season_results, find_team_match_results, find_last_n_results 
from functions import find_head_to_head_results

In [11]:
return_df = set_winners(df)
return_df = find_last_season_results(return_df)
return_df = find_team_match_results(return_df, '01/01/2020', '12/31/2020', 'current_season')
return_df = find_team_match_results(return_df, '11/25/2010', '01/01/2080', 'all_time')
return_df = find_team_match_results(return_df, '01/01/2019', '12/31/2019', 'current_season')
return_df = find_team_match_results(return_df, '01/01/2018', '12/31/2018', 'current_season')
return_df = find_last_n_results(return_df, 5)
return_df = find_last_n_results(return_df, 10)
return_df = find_last_n_results(return_df, 3)
return_df = find_last_n_results(return_df, 20)
return_df = find_head_to_head_results(return_df)

In [12]:
display(return_df)

Unnamed: 0,id,team_one,team_two,stage,winner,date,corona_virus_isolation,t1_wins_season,t1_losses_season,t2_wins_season,...,t2_losses_last_20,t1_win_percent_last_20,t2_win_percent_last_20,t1_place_last_season,t2_place_last_season,t1_wins_vs_t2,t1_losses_vs_t2,t1_matches_vs_t2,t1_win_percent_vs_t2,winner_label
0,10223,Los Angeles Valiant,San Francisco Shock,Overwatch League - Stage 1,Los Angeles Valiant,2018-01-11,False,0,0,0,...,,,,,,0,0,0,,0
1,10224,Los Angeles Gladiators,Shanghai Dragons,Overwatch League - Stage 1,Los Angeles Gladiators,2018-01-11,False,0,0,0,...,,,,,,0,0,0,,0
2,10225,Seoul Dynasty,Dallas Fuel,Overwatch League - Stage 1,Seoul Dynasty,2018-01-11,False,0,0,0,...,,,,,,0,0,0,,0
3,10226,Florida Mayhem,London Spitfire,Overwatch League - Stage 1,London Spitfire,2018-01-11,False,0,0,0,...,,,,,,0,0,0,,1
4,10227,Houston Outlaws,Philadelphia Fusion,Overwatch League - Stage 1,Philadelphia Fusion,2018-01-11,False,0,0,0,...,,,,,,0,0,0,,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
666,34229,Seoul Dynasty,Hangzhou Spark,OWL 2020 Regular Season,Hangzhou Spark,2020-05-03,True,3,2,4,...,10,0.45,0.5,8,4,1,3,4,0.25,1
667,34228,Chengdu Hunters,New York Excelsior,OWL 2020 Regular Season,New York Excelsior,2020-05-03,True,3,7,7,...,8,0.4,0.6,12,2,1,1,2,0.5,1
668,34227,Philadelphia Fusion,Paris Eternal,OWL 2020 Regular Season,Philadelphia Fusion,2020-05-03,True,9,1,5,...,11,0.65,0.45,10,14,2,2,4,0.5,0
669,34226,Florida Mayhem,Boston Uprising,OWL 2020 Regular Season,Florida Mayhem,2020-05-03,True,4,4,1,...,18,0.45,0.1,20,19,2,5,7,0.285714,0


In [13]:
return_df.to_csv('data/overwatch-league.csv')