In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Library

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import folium

# Datasets

With e-sports getting bigger day by day and evolving to be one of the biggest emerging markets, we look at one of the most popular games, CSGO and its professional games stats. In this notebook, we will focus on the players stats data. Let's have a look!

In [None]:
df_players = pd.read_csv('../input/csgo-player-and-team-stats/player_stats.csv')

In [None]:
df_players.drop("Unnamed: 0", axis = 1, inplace=True)
df_players.head(10)

In [None]:
df_players.info()

In [None]:
df_players.describe()

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(18, 8), sharey=True)
fig.suptitle('Kill Difference, Kill/Death, and Rating')

# Kill Difference
sns.histplot(ax=axes[0], x=df_players['kd_diff'])
axes[0].set_title('Kill Difference')

# Kill/Death
sns.histplot(ax=axes[1], x=df_players['kd'])
axes[1].set_title('Kill/Death')

# Rating
sns.histplot(ax=axes[2], x=df_players['rating'])
axes[2].set_title('Rating')

plt.show()

# Top 10 Player Ratings

The data already sorted by the ratings of the player.

In [None]:
top_10_players = df_players.head(10)
top_10_players

In [None]:
plt.figure(figsize=(12,8))
sns.barplot(x=top_10_players['name'], y=top_10_players['rating'])
plt.show()

We will focus on these top 10 players for the rest of the analysis.

# Players with Most Teams

When we look at the data, in the column <b>Teams</b>, there are several players that have represent more than one team in the tournament. Let's have a look!

In [None]:
team_counts = []
for i in range(len(df_players['teams'])):
    counts = len(list(df_players['teams'][i].split(sep=',')))
    team_counts.append(counts)

In [None]:
df_players['team_counts'] = team_counts
df_players.head()

In [None]:
most_team = df_players[['name', 'team_counts']].sort_values('team_counts', ascending=False)
most_team.head(10)

In [None]:
team_group = most_team.groupby('team_counts').count().reset_index()
team_group

In [None]:
plt.figure(figsize=(12,8))
sns.barplot(x=team_group['team_counts'], y=team_group['name'])
plt.show()

From the graph above, we can conclude that there there are players that represent more than one team. Maybe before or after tournaments, many teams have recruited another member from other teams.

# Players with Most Maps Played

Now, we will have a look at the maps. The <b>maps played</b> by the players will have the impact on their experience with the maps itself.

In [None]:
top_10_maps = df_players[['name','total_maps']].sort_values('total_maps', ascending=False).head(10)
top_10_maps

In [None]:
plt.figure(figsize=(12,8))
sns.barplot(x=top_10_maps['name'], y=top_10_maps['total_maps'])
plt.show()

We can see only three players that have reached more than 2000 match played on different maps. None of them included in the top 10 player ratings.

# Players with Most Rounds

Not only maps played, but the <b>rounds played</b> also important to the experience of the player.

In [None]:
top_10_rounds = df_players[['name','total_rounds']].sort_values('total_rounds', ascending=False).head(10)
top_10_rounds

In [None]:
plt.figure(figsize=(12,8))
sns.barplot(x=top_10_rounds['name'], y=top_10_rounds['total_rounds'])
plt.show()

From the graph above, we can see the total rounds of each players in the top 10. We have 6 players that have rounds played above 5000. Again, on this data, we cannot find the players from the top 10 player ratings. Unfortunately, we can't calculate the duration that the players have been played those rounds because the data is not availiable. 

# Top 10 Kill Difference

While the experience is important before the tournaments, but the most important is the actual performance in a match or tournament. Let's have a look on the <b>Kill Difference</b>

In [None]:
top_10_diff = df_players[['name','kd_diff']].sort_values('kd_diff', ascending=False).head(10)
top_10_diff

In [None]:
plt.figure(figsize=(12,8))
sns.barplot(x=top_10_diff['name'], y=top_10_diff['kd_diff'])
plt.show()

From the graph above, we can see relatively big difference in the top three with 1000 separates each other. Different from two previous data from experience side, in this graph, we have two players from the top 10 ratings. One is being on top of the list is <b>s1mple</b> which is the second player with the highest rating, and the other one is the player with the highest ratings, <b>ZywOo</b>, in the 7th.

# Top 10 Kill per Death

Next, we will have a look at the Kill per Death from each players. 

In [None]:
top_10_kd = df_players[['name','kd']].sort_values('kd', ascending=False).head(10)
top_10_kd

In [None]:
plt.figure(figsize=(12,8))
sns.barplot(x=top_10_kd['name'], y=top_10_kd['kd'])
plt.show()

From the graph above, we can see that the top 10 Kill/Death are between 1.31 to 1.43. One of the player is outside the top 10 player ratings, the rest are the top 10 players based on ratings. The rank on this graph also different from the rank in the top 10 based on ratings. Despite being in the top of the list of top 10 player ratings, <b>ZywOo</b>, is only rank 3rd in this category. While the 1st is <b>sh1ro</b> whose 5th on the top 10 player ratings.

# Correlation

Then, we should know, is there any correlation between the feature? Which features are most affected the ratings? Let's see below!

In [None]:
corr_matrix = df_players.drop(columns='team_counts').corr()
corr_matrix

In [None]:
plt.figure(figsize=(10,8))
sns.heatmap(corr_matrix, annot=True, cmap='RdYlGn')
plt.title('Correlation Map: Total Maps, Total Rounds, Kill/Death Diffrence, Kill/Death, and Rating', size=15)
plt.show()

From the heatmap above, we can conclude that the <b>Ratings</b> are strongly affected by <b>Kill/Death Difference</b> and <b>Kill per Death</b>. While <b>Kill/Death Difference</b> and <b>Kill per Death</b> are also strongly affected by each other. 

# Bonus: Player Countries

Do we want to know where are the players come from? From which countries that the most CS:GO players are come from?

Let's answer that below!

In [None]:
player_countries = pd.DataFrame(df_players.groupby('country')['name'].count().sort_values(ascending=False).reset_index()).rename(columns={'name':'count'})
top_10_countries = player_countries.head(10)
top_10_countries

In [None]:
plt.figure(figsize=(12,8))
sns.barplot(x=top_10_countries['country'].head(10), y=top_10_countries['count'].head(10))
plt.show()

In [None]:
player_countries_new = player_countries.replace({"United States": "United States of America", "Korea": "South Korea"})

country_geo = '../input/world-countries/world-countries.json'

player_map = folium.Map(location=[0, 0], zoom_start=2)

folium.Choropleth(geo_data = country_geo,
                 data=player_countries_new,
                 columns=['country', 'count'],
                 key_on='feature.properties.name',
                 fill_color='YlGnBu',
                 fill_opacity=0.7,
                 line_opacity=0.2,
                 legend_name='Players by each Country'
                 ).add_to(player_map)

player_map