In [1]:
import pandas as pd
import matplotlib.pyplot as plt
pd.set_option('max_columns', 50)
pd.set_option('max_rows', 200)
import seaborn as sns

# Key Results of This Notebook
## Data Cleaning and Validation
- Reduce file size
- Valid the data is good
- Create data dictionary

In [2]:
df = pd.read_csv('feature_selected.csv')

In [3]:
df.head()

Unnamed: 0,start_time,esports_match_id,map_type,map_name,player_name,team_name,stat_name,hero_name,stat_amount
0,2021-04-16 19:08:52,37234,CONTROL,Busan,Doha,Dallas Fuel,All Damage Done,Echo,314.0
1,2021-04-16 19:08:52,37234,CONTROL,Busan,Doha,Dallas Fuel,Average Time Alive,Echo,16.881001
2,2021-04-16 19:08:52,37234,CONTROL,Busan,Doha,Dallas Fuel,Damage Taken,Echo,330.778026
3,2021-04-16 19:08:52,37234,CONTROL,Busan,Doha,Dallas Fuel,Deaths,Echo,1.0
4,2021-04-16 19:08:52,37234,CONTROL,Busan,Doha,Dallas Fuel,Healing Received,Echo,130.77809


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 314515 entries, 0 to 314514
Data columns (total 9 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   start_time        314515 non-null  object 
 1   esports_match_id  314515 non-null  int64  
 2   map_type          314515 non-null  object 
 3   map_name          314515 non-null  object 
 4   player_name       314515 non-null  object 
 5   team_name         314515 non-null  object 
 6   stat_name         314515 non-null  object 
 7   hero_name         314515 non-null  object 
 8   stat_amount       314515 non-null  float64
dtypes: float64(1), int64(1), object(7)
memory usage: 21.6+ MB


In [5]:
df.columns.tolist()

['start_time',
 'esports_match_id',
 'map_type',
 'map_name',
 'player_name',
 'team_name',
 'stat_name',
 'hero_name',
 'stat_amount']

In [6]:
# start_time: use date only
df['start_time']=df['start_time'].map(lambda x:x[:11])

In [7]:
date_df = df.groupby('start_time').agg({'esports_match_id':'nunique'}).reset_index().sort_index()

In [8]:
# import match_result data to validate
match_df = pd.read_csv('data/match_data.csv')

In [9]:
match_df.head()

Unnamed: 0,round_start_time,round_end_time,stage,match_id,game_number,match_winner,map_winner,map_loser,map_name,map_round,winning_team_final_map_score,losing_team_final_map_score,control_round_name,attacker,defender,team_one_name,team_two_name,attacker_payload_distance,defender_payload_distance,attacker_time_banked,defender_time_banked,attacker_control_perecent,defender_control_perecent,attacker_round_end_score,defender_round_end_score
0,2021-04-16 19:08:50,2021-04-16 19:11:30,OWL 2021,37234,1,Houston Outlaws,Houston Outlaws,Dallas Fuel,Busan,1,2,1,MEKA Base,Houston Outlaws,Dallas Fuel,Houston Outlaws,Dallas Fuel,0.0,0.0,0.0,0.0,100.0,0.0,1,0
1,2021-04-16 19:12:16,2021-04-16 19:17:52,OWL 2021,37234,1,Houston Outlaws,Houston Outlaws,Dallas Fuel,Busan,2,2,1,Downtown,Houston Outlaws,Dallas Fuel,Houston Outlaws,Dallas Fuel,0.0,0.0,0.0,0.0,99.0,100.0,1,1
2,2021-04-16 19:18:37,2021-04-16 19:23:17,OWL 2021,37234,1,Houston Outlaws,Houston Outlaws,Dallas Fuel,Busan,3,2,1,Sanctuary,Houston Outlaws,Dallas Fuel,Houston Outlaws,Dallas Fuel,0.0,0.0,0.0,0.0,100.0,54.0,2,1
3,2021-04-16 19:30:47,2021-04-16 19:37:38,OWL 2021,37234,2,Houston Outlaws,Dallas Fuel,Houston Outlaws,King's Row,1,3,2,,Dallas Fuel,Houston Outlaws,Houston Outlaws,Dallas Fuel,70.307037,0.0,68.430008,0.0,,,3,0
4,2021-04-16 19:39:10,2021-04-16 19:47:27,OWL 2021,37234,2,Houston Outlaws,Dallas Fuel,Houston Outlaws,King's Row,2,3,2,,Houston Outlaws,Dallas Fuel,Houston Outlaws,Dallas Fuel,62.829689,70.307037,0.0,68.430008,,,2,3


In [11]:
match_df.head()

Unnamed: 0,round_start_time,round_end_time,stage,match_id,game_number,match_winner,map_winner,map_loser,map_name,map_round,winning_team_final_map_score,losing_team_final_map_score,control_round_name,attacker,defender,team_one_name,team_two_name,attacker_payload_distance,defender_payload_distance,attacker_time_banked,defender_time_banked,attacker_control_perecent,defender_control_perecent,attacker_round_end_score,defender_round_end_score
0,2021-04-16 19:08:50,2021-04-16 19:11:30,OWL 2021,37234,1,Houston Outlaws,Houston Outlaws,Dallas Fuel,Busan,1,2,1,MEKA Base,Houston Outlaws,Dallas Fuel,Houston Outlaws,Dallas Fuel,0.0,0.0,0.0,0.0,100.0,0.0,1,0
1,2021-04-16 19:12:16,2021-04-16 19:17:52,OWL 2021,37234,1,Houston Outlaws,Houston Outlaws,Dallas Fuel,Busan,2,2,1,Downtown,Houston Outlaws,Dallas Fuel,Houston Outlaws,Dallas Fuel,0.0,0.0,0.0,0.0,99.0,100.0,1,1
2,2021-04-16 19:18:37,2021-04-16 19:23:17,OWL 2021,37234,1,Houston Outlaws,Houston Outlaws,Dallas Fuel,Busan,3,2,1,Sanctuary,Houston Outlaws,Dallas Fuel,Houston Outlaws,Dallas Fuel,0.0,0.0,0.0,0.0,100.0,54.0,2,1
3,2021-04-16 19:30:47,2021-04-16 19:37:38,OWL 2021,37234,2,Houston Outlaws,Dallas Fuel,Houston Outlaws,King's Row,1,3,2,,Dallas Fuel,Houston Outlaws,Houston Outlaws,Dallas Fuel,70.307037,0.0,68.430008,0.0,,,3,0
4,2021-04-16 19:39:10,2021-04-16 19:47:27,OWL 2021,37234,2,Houston Outlaws,Dallas Fuel,Houston Outlaws,King's Row,2,3,2,,Houston Outlaws,Dallas Fuel,Houston Outlaws,Dallas Fuel,62.829689,70.307037,0.0,68.430008,,,2,3
