In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
from matplotlib import pyplot as plt
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
data = pd.read_csv('/kaggle/input/basketball-players-stats-per-season-49-leagues/players_stats_by_season_full_details.csv')

In [None]:
data.head()

In [None]:
data.columns

# Leagues

In [None]:
plt.figure(figsize=(20, 6))
sns.countplot(data.League, order = data['League'].value_counts().index)
l=plt.xticks(rotation=90)

# Seasons

In [None]:
data.Season.unique()

# Focus on NBA

In [None]:
nba = data[data.League == 'NBA']

# Nationalities

In [None]:
nba.groupby('nationality').count().reset_index().iloc[:,:2].sort_values('League', ascending=False).head(10)

In [None]:
# Not USA
plt.figure(figsize=(15, 6))
sns.countplot(nba[nba['nationality'] != 'United States'].nationality, order = nba[nba['nationality'] != 'United States']['nationality'].value_counts().iloc[:10].index)
l=plt.xticks(rotation=90)

### In 2019-2020

In [None]:
# Not USA
plt.figure(figsize=(15, 6))
sns.countplot(nba[(nba['nationality'] != 'United States') & (nba['Season'] == '2019 - 2020')].nationality, order = nba[(nba['nationality'] != 'United States') & (nba['Season'] == '2019 - 2020')]['nationality'].value_counts().iloc[:10].index)
l=plt.xticks(rotation=90)

# Mean age per Season

In [None]:
nba['Age'] = nba['Season'].str.split('-').str[1].astype(float) - nba['birth_year']

In [None]:
nba.groupby('Season')['Age'].mean()

# Height and weight

In [None]:
hw = nba[['Season', 'height_cm', 'weight_kg',]]

In [None]:
hw = hw.groupby('Season').mean().reset_index()

In [None]:
hw

In [None]:
plt.figure(figsize=(15, 6))
ax = sns.lineplot(x="Season", y="height_cm", data=hw)
ax2 = ax.twinx()
ax2 = sns.lineplot(x="Season", y="weight_kg", data=hw, c='red')

In [None]:
# Height decreases a very little but weight decreases a lot (-+3kg) in 10 years

# Best scorers per season

In [None]:
scorers = nba[['Season', 'Stage', 'Player', 'Team', 'GP', 'PTS']]

In [None]:
scorers['PTS/G'] = round(scorers['PTS'] / scorers['GP'],2)

# Regular season
scorers = scorers[scorers['Stage'] == 'Regular_Season']

In [None]:
idx = scorers.groupby('Season')['PTS/G'].transform(max) == scorers['PTS/G']
best_scorers = scorers[['Season', 'Player', 'Team', 'PTS/G']][idx]
best_scorers

In [None]:
plt.figure(figsize=(18, 6))
ax = sns.lineplot(x="Season", y="PTS/G", data=best_scorers)
ax.set(ylim=(27, 37))
for index, row in best_scorers.iterrows():
    ax.annotate(row['Player'], (row['Season'], row['PTS/G'] + 0.5),  xycoords='data', xytext=(-27, -10), textcoords='offset points', size=12)

# Best passers per season

In [None]:
pas = nba[['Season', 'Stage', 'Player', 'Team', 'GP', 'AST']]

In [None]:
pas['AST/G'] = round(pas['AST'] / pas['GP'],2)

# Regular season
pas = pas[pas['Stage'] == 'Regular_Season']

In [None]:
idx = pas.groupby('Season')['AST/G'].transform(max) == pas['AST/G']
best_pas = pas[['Season', 'Player', 'Team', 'AST/G']][idx]
best_pas

In [None]:
plt.figure(figsize=(18, 6))
ax = sns.lineplot(x="Season", y="AST/G", data=best_pas)
ax.set(ylim=(9.5, 12))
for index, row in best_pas.iterrows():
    ax.annotate(row['Player'], (row['Season'], row['AST/G']),  xycoords='data', xytext=(-27, -10), textcoords='offset points', size=12)

# Best rebonders per season

In [None]:
reb = nba[['Season', 'Stage', 'Player', 'Team', 'GP', 'REB']]

In [None]:
reb['REB/G'] = round(reb['REB'] / reb['GP'],2)

# Regular season
reb = reb[reb['Stage'] == 'Regular_Season']

In [None]:
idx = reb.groupby('Season')['REB/G'].transform(max) == reb['REB/G']
best_reb = reb[['Season', 'Player', 'Team', 'REB/G']][idx]
best_reb

In [None]:
plt.figure(figsize=(18, 6))
ax = sns.lineplot(x="Season", y="REB/G", data=best_reb)
ax.set(ylim=(12, 16.5))
for index, row in best_reb.iterrows():
    ax.annotate(row['Player'], (row['Season'], row['REB/G']),  xycoords='data', xytext=(-27, -10), textcoords='offset points', size=12)

# Best steals per season

In [None]:
stl = nba[['Season', 'Stage', 'Player', 'Team', 'GP', 'STL']]

In [None]:
stl['STL/G'] = round(stl['STL'] / stl['GP'],2)

# Regular season
stl = stl[stl['Stage'] == 'Regular_Season']

In [None]:
idx = stl.groupby('Season')['STL/G'].transform(max) == stl['STL/G']
best_stl = stl[['Season', 'Player', 'Team', 'STL/G']][idx]
best_stl

In [None]:
plt.figure(figsize=(18, 6))
ax = sns.lineplot(x="Season", y="STL/G", data=best_stl)
ax.set(ylim=(2, 2.6))
for index, row in best_stl.iterrows():
    ax.annotate(row['Player'], (row['Season'], row['STL/G']),  xycoords='data', xytext=(-27, -10), textcoords='offset points', size=12)

# Best Blocks per Season

In [None]:
blk = nba[['Season', 'Stage', 'Player', 'Team', 'GP', 'BLK']]

In [None]:
blk['BLK/G'] = round(blk['BLK'] / blk['GP'],2)

# Regular season
blk = blk[blk['Stage'] == 'Regular_Season']

In [None]:
idx = blk.groupby('Season')['BLK/G'].transform(max) == blk['BLK/G']
best_blk = blk[['Season', 'Player', 'Team', 'BLK/G']][idx]
best_blk

In [None]:
plt.figure(figsize=(18, 6))
ax = sns.lineplot(x="Season", y="BLK/G", data=best_blk)
ax.set(ylim=(2.3, 3.8))
for index, row in best_blk.iterrows():
    ax.annotate(row['Player'], (row['Season'], row['BLK/G']),  xycoords='data', xytext=(-27, -10), textcoords='offset points', size=12)

# Most 3PM per Season

In [None]:
pm3 = nba[['Season', 'Stage', 'Player', 'Team', '3PM', '3PA']]

In [None]:
pm3['3P%'] = round(round(pm3['3PM'] / pm3['3PA'],3)*100,3)

# Regular season
pm3 = pm3[pm3['Stage'] == 'Regular_Season']

In [None]:
idx = pm3.groupby('Season')['3PM'].transform(max) == pm3['3PM']
best_pm3 = pm3[['Season', 'Player', 'Team', '3PM', '3P%']][idx]
best_pm3

In [None]:
plt.figure(figsize=(18, 6))
ax = sns.lineplot(x="Season", y="3PM", data=best_pm3)
ax.set(ylim=(150, 420))
for index, row in best_pm3.iterrows():
    ax.annotate(row['Player'] +' ' + str(row['3P%'])+'%', (row['Season'], row['3PM']+10),  xycoords='data', xytext=(-27, -10), textcoords='offset points', size=12)

In [None]:
plt.figure(figsize=(18, 6))
ax = sns.barplot(x="Season", y="3PM", data=best_pm3)
ax2 = ax.twinx()
ax2 = sns.lineplot(x="Season", y="3P%", data=best_pm3)
for index, row in best_pm3.iterrows():
    ax2.annotate(row['Player'].split( )[1] +' ' + str(row['3P%'])+'%', (row['Season'], row['3P%']),  xycoords='data', xytext=(-27, -10), textcoords='offset points', size=12)