In [49]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns

from collections import Counter
from plotly.offline import init_notebook_mode, iplot

init_notebook_mode(connected=True)

In [50]:
root_game = '/content/drive/MyDrive/Program/Python/Data ML DL/Datasets/esport_competitive_game.csv'
root_country = '/content/drive/MyDrive/Program/Python/Data ML DL/Datasets/esport_country.csv'
root_player = '/content/drive/MyDrive/Program/Python/Data ML DL/Datasets/esport_player.csv'
root_team = '/content/drive/MyDrive/Program/Python/Data ML DL/Datasets/esport_team.csv'

# Read dataset
df_game = pd.read_csv(root_game)
df_country = pd.read_csv(root_country)
df_player = pd.read_csv(root_player)
df_team = pd.read_csv(root_team)

In [51]:
# Checking missing value
for df in [df_team, df_country, df_player, df_game]:
    print(df.isna().sum())
    print('--'*20)

Team Name          0
Total Prize Won    0
dtype: int64
----------------------------------------
Country                                0
Total Earning by Player                0
Percentage of Country Total Earning    0
Team                                   0
dtype: int64
----------------------------------------
Player ID        0
Name             0
Total Earning    0
Team             0
dtype: int64
----------------------------------------
Game                             0
Total Money Earned               0
Total Money Earned Percentage    0
Team                             0
dtype: int64
----------------------------------------


In [52]:
for head in [df_country, df_game, df_player, df_team]:
    print(head.columns.tolist())

['Country', 'Total Earning by Player', 'Percentage of Country Total Earning', 'Team']
['Game', 'Total Money Earned', 'Total Money Earned Percentage', 'Team']
['Player ID', 'Name', 'Total Earning', 'Team']
['Team Name', 'Total Prize Won']


In [53]:
# convert some value
def convert_to_float(string):
    try:
        result = float(string.replace('$', '').replace(',', ''))
    except:
        result = np.nan
    return result

def convert_to_num(string):
    try:
        result = float(string.replace('%', ''))
    except:
        result = np.nan
    return result

# converting
df_team['Total Prize Won'] = df_team['Total Prize Won'].apply(lambda s: convert_to_float(s))
df_country['Total Earning by Player'] = df_country['Total Earning by Player'].apply(lambda s: convert_to_float(s))
df_player['Total Earning'] = df_player['Total Earning'].apply(lambda s: convert_to_float(s))
df_game['Total Money Earned'] = df_game['Total Money Earned'].apply(lambda s: convert_to_float(s))

df_country['Percentage of Country Total Earning'] = df_country['Percentage of Country Total Earning'].apply(lambda s: convert_to_num(s))
df_game['Total Money Earned Percentage'] = df_game['Total Money Earned Percentage'].apply(lambda s: convert_to_num(s))

In [54]:
# Searching for NaN value inside the dataset after convertion
df_country[df_country['Total Earning by Player'].isna()]

Unnamed: 0,Country,Total Earning by Player,Percentage of Country Total Earning,Team
846,Adou,,,Turnso Gaming
847,NuanYang,,,Turnso Gaming
848,QianShi,,,Turnso Gaming
849,ShenRen,,,Turnso Gaming
850,ShiJiu,,,Turnso Gaming


In [55]:
# Dropping/deleting the NaN value
# This will delete 'team', 'country', 'player', and 'game' dataset
# that fulfilling the conditions
for df in [df_team, df_country, df_player, df_game]:
    df.dropna(inplace=True)

In [56]:
# Checking if the empty rows' been dropped
df_country[df_country['Total Earning by Player'].isna()]

Unnamed: 0,Country,Total Earning by Player,Percentage of Country Total Earning,Team


In [57]:
df_team['Total Prize (Million)'] = df_team['Total Prize Won'].apply(lambda price: price / 10**6)
df_country['Total Earning by Player (Million)'] = df_country['Total Earning by Player'].apply(lambda price: price / 10**6)

In [58]:
df_team.sort_values(by='Total Prize Won', ascending=False).head()

Unnamed: 0,Team Name,Total Prize Won,Total Prize (Million)
0,Team Liquid,37509477.52,37.509478
1,OG,34550723.59,34.550724
2,Evil Geniuses,24668338.49,24.668338
3,Fnatic,16253243.71,16.253244
4,Virtus.pro,16053141.26,16.053141


In [59]:
df_team.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Total Prize Won,100.0,5273348.0,6083548.0,1278584.0,1711605.0,3124000.0,5881952.0,37509480.0
Total Prize (Million),100.0,5.273348,6.083548,1.278584,1.711605,3.124,5.881952,37.50948


In [60]:
df_country.head()

Unnamed: 0,Country,Total Earning by Player,Percentage of Country Total Earning,Team,Total Earning by Player (Million)
0,United States,6608262.09,17.62,Team Liquid,6.608262
1,Germany,5325911.27,14.2,Team Liquid,5.325911
2,Bulgaria,4534754.0,12.09,Team Liquid,4.534754
3,Finland,4302492.0,11.47,Team Liquid,4.302492
4,Jordan,4068404.4,10.85,Team Liquid,4.068404


In [61]:
# Highest total price by country
df_country.groupby('Country').sum().sort_values(by='Total Earning by Player', ascending=False).head()

Unnamed: 0_level_0,Total Earning by Player,Percentage of Country Total Earning,Total Earning by Player (Million)
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
United States,82571306.74,1681.8,82.571307
China,82168955.56,1395.22,82.168956
"Korea, Republic of",52220328.4,1397.75,52.220328
Denmark,30290514.16,467.5,30.290514
Sweden,27463378.05,354.98,27.463378


In [62]:
dataframe = df_country.groupby('Country').sum().sort_values(by='Total Earning by Player', ascending=False)

figure = px.bar(
    data_frame = dataframe.head(15),
    x = dataframe.head(15).index,
    y = 'Total Earning by Player'
)

figure.update_layout(title='Top 10 Country in Total Earning')
figure.update_xaxes(tickangle=45, title='Country')
figure.show()

In [63]:
# Highest total earning from tournament prize
df_player.sort_values(by='Total Earning', ascending=False).head()

Unnamed: 0,Player ID,Name,Total Earning,Team
25,N0tail,Johan Sundstein,6753622.23,OG
26,JerAx,Jesse Vainikka,6068960.2,OG
27,ana,Anathan Pham,5976645.23,OG
28,Ceb,Sébastien Debs,5528232.23,OG
29,Topson,Topias Taavitsainen,5486739.2,OG


In [67]:
dataframe_2 = df_game.groupby('Game').sum().sort_values('Total Money Earned', ascending=False).head(10)

figure_2 = px.bar(
    data_frame = dataframe_2,
    x = dataframe_2.index,
    y = 'Total Money Earned',
)

figure_2.update_layout(title='Top 10 Earning Based on Game')
figure_2.update_xaxes(tickangle=45, title='Game')
figure_2.show()