In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

*  Table of Contents
- <a href='#0'>  Dataset Introduction </a> 
- <a href='#1'>1. Importing Libraries and Dataset </a>  
- <a href='#2'>2. Basic Visualization by using MATPLOTLIB Library   </a> 
- <a href='#3'>3. Cleaning Data   </a> 


 * #  <a id='0'> Dataset Introduction </a>

Content

This Dataset Includes:

Seasons 1999-20 49 Leagues, 11K players details & stats per Season
Player Details: Bitrth Date, Height, Weight, Nationality, High School
Stats per Season: Scoring Stats, Free Throws, Rebounds, Blocks, Assists, Minutes, Games etc.
What's inside is more than just rows and columns. Make it easy for others to get started by describing how you acquired the data and what time period it represents, too.

* League = League Name
* Season = Season Year YYYY-YYYY
* Stage = International, NBA:Playoffs,Regular_Seasion
* Player = Player Full Name
* Team = Team Name
* GP = Games Played
* MIN = Minutes Played
* FGM = Field Goals Made
* FGA = Field Goals Attempts
* 3PM = Three Points Made
* 3PA = Three Points Attempts
* FTM = Free Throws Made
* FTA = Free Throws Attempts
* TOV = Turnovers
* PF = Personal Fouls
* ORB = Offensive Rebounds
* DRB = Defensive Rebounds
* REB =  Rebounds
* AST = Assists
* STL = Steals
* BLK = Blocks
* PTS = Points
* birth_year = Birth Year
* birth_month = Birth Month
* birth_date = Birth Date
* height = Height (Feet)
* height_cm = Height (CM)
* weight = Weight (Pounds)
* weight_kg = Weight (KG)
* nationality = Nationality
* high_school = High School

# <a id='1'> 1. Importing Libraries and Dataset</a>

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
data = pd.read_csv("/kaggle/input/basketball-players-stats-per-season-49-leagues/players_stats_by_season_full_details.csv")
data.head()

In [None]:
data.info()

In [None]:
data.corr()

**Correlation Map**

In [None]:
f,ax = plt.subplots(figsize=(10, 10))
sns.heatmap(data.corr(), annot=True, linewidths=.5, fmt= '.1f',ax=ax)
plt.show()

In [None]:
data.columns

# <a id='2'> 2.Basic Visualization by using MATPLOTLIB Library </a> 

In [None]:
# Line Plot
data.AST.plot(kind = 'line', color = 'g',label = 'AST',linewidth=1,alpha = 0.5,grid = True,linestyle = ':')
data.STL.plot(color = 'r',label = 'STL',linewidth=1, alpha = 0.5,grid = True,linestyle = '-.')
plt.legend(loc='upper right')     
plt.xlabel('x axis')              
plt.ylabel('y axis')
plt.title('Line Plot')      
plt.show()

In [None]:
# Scatter Plot 
# x = FGM, y = FGA
data.plot(kind='scatter', x='FGM', y='FGA',alpha = 1,color = 'red')
plt.xlabel('FGM')            
plt.ylabel('FGA')
plt.title('FGM FGA Scatter Plot')            

In [None]:
# Histogram
data.AST.plot(kind = 'hist',bins = 50,figsize = (10,10))
plt.show()

# <a id='3'> 3.Cleaning Data </a> 

In [None]:
data = pd.read_csv("/kaggle/input/basketball-players-stats-per-season-49-leagues/players_stats_by_season_full_details.csv")
data.head()

In [None]:
data.tail()

In [None]:
data.columns

In [None]:
data.shape

In [None]:
data.info()

**EXPLORATORY DATA ANALYSIS**

In [None]:
# For example lets look frequency of player_stats League
print(data['League'].value_counts(dropna =False))  # if there are nan values that also be counted
# As it can be seen below there are 4136 Eurocup or 977 Turkish-BSL

In [None]:
data.describe().T

**VISUAL EXPLORATORY DATA ANALYSIS**

In [None]:
# For example: compare birth_year of player_stats that are nationality  or not
data.boxplot(column='birth_year',by = 'nationality')

**TIDY DATA**

In [None]:
# Firstly I create new data from player_stats data to explain melt nore easily.
data_new = data.head()    
data_new

In [None]:
# lets melt
# id_vars = what we do not wish to melt
# value_vars = what we want to melt
melted = pd.melt(frame=data_new,id_vars = 'Player', value_vars= ['height_cm','weight_kg'])
melted

**PIVOTING DATA**

In [None]:
# Index is name
# I want to make that columns are variable
# Finally values in columns are value
melted.pivot(index = 'Player', columns = 'variable',values='value')

**CONCATENATING DATA**

In [None]:
# Firstly lets create 2 data frame
data1 = data.head()
data2= data.tail()
conc_data_row = pd.concat([data1,data2],axis =0,ignore_index =True) # axis = 0 : adds dataframes in row
conc_data_row

In [None]:
data1 = data['height_cm'].head()
data2= data['weight_kg'].head()
conc_data_col = pd.concat([data1,data2],axis =1) # axis = 1 : adds dataframes in column
conc_data_col

**DATA TYPES**


In [None]:
data.dtypes

In [None]:
# lets convert object(str) to categorical and int to float.
data['Stage'] = data['Stage'].astype('category')
data['AST'] = data['AST'].astype('float')

In [None]:
data.dtypes

**MISSING DATA and TESTING WITH ASSERT**


In [None]:
# Lets look at does pokemon data have nan value
# As you can see there are 53798 entries. However Team has 53787 non-null object so it has 11 null object.
data.info()

In [None]:
# Lets check high_school
data["high_school"].value_counts(dropna =False)
# As you can see, there are 30247 NAN value

In [None]:
# Lets drop nan values
data1=data   # also we will use data to fill missing value so I assign it to data1 variable
data1["high_school"].dropna(inplace = True)  # inplace = True means we do not assign it to new variable. Changes automatically assigned to data
# So does it work ?

In [None]:
#  Lets check with assert statement
# Assert statement:
assert 1==1 # return nothing because it is true

In [None]:
assert  data['high_sqchool'].notnull().all() # returns nothing because we drop nan value

In [None]:
data["high_school"].fillna('empty',inplace = True)

In [None]:
assert  data['high_school'].notnull().all()