# NBA Teams Dataset Data Gathering 

## Data Gathering

I gathered data using NBA_API. Here is the github to the source code:
https://github.com/swar/nba_api

In [1]:
#imported packages
import pandas as pd
from nba_api.stats.endpoints import teamyearbyyearstats
from nba_api.stats.static import teams 

In [2]:
#Got all the different teams
teams = teams.get_teams()
team_ids = [t['id'] for t in teams]

In [3]:
#for each team got their franchise history
df_list = []
for id_ in team_ids :
    cumstats = teamyearbyyearstats.TeamYearByYearStats(id_)
    df = cumstats.get_data_frames()[0]
    df_list.append(df)

In [4]:
print(df['YEAR'].value_counts)
df.info()

<bound method IndexOpsMixin.value_counts of 0     1988-89
1     1989-90
2     1990-91
3     1991-92
4     1992-93
5     1993-94
6     1994-95
7     1995-96
8     1996-97
9     1997-98
10    1998-99
11    1999-00
12    2000-01
13    2001-02
14    2004-05
15    2005-06
16    2006-07
17    2007-08
18    2008-09
19    2009-10
20    2010-11
21    2011-12
22    2012-13
23    2013-14
24    2014-15
25    2015-16
26    2016-17
27    2017-18
28    2018-19
29    2019-20
30    2020-21
Name: YEAR, dtype: object>
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31 entries, 0 to 30
Data columns (total 34 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   TEAM_ID                31 non-null     int64  
 1   TEAM_CITY              31 non-null     object 
 2   TEAM_NAME              31 non-null     object 
 3   YEAR                   31 non-null     object 
 4   GP                     31 non-null     int64  
 5   WINS                   

In [5]:
#dropped certain columns that I won't be using for analysis 
df = pd.concat(df_list)
dropped_col = ['TEAM_ID', 'TEAM_CITY', 'PO_WINS', 'PO_LOSSES', 'FGM', 'FGA', 'FG3M', 
               'FG3A', 'FTM', 'FTA', 'CONF_COUNT', 'DIV_COUNT', 'WINS', 
               'LOSSES', 'CONF_RANK', 'DIV_RANK', 'PTS_RANK']
df.drop(columns=dropped_col, inplace=True)
#I filtered by year (only 1990+)
df['YEAR'] = df.YEAR.str[:4].astype(int)
df = df.query("YEAR > 1989")
#Because some seasons had different number of games per season I made all the statistics per game instead of season total
columns_ = df.drop(columns=['YEAR', 'GP']).select_dtypes(include=int).columns
for col in columns_ :
    df[col] = df[col] / df['GP']
df.drop(columns=['GP'], inplace=True)

In [6]:
df.TEAM_NAME.value_counts()

Mavericks        31
Nuggets          31
Pacers           31
Kings            31
Bucks            31
Celtics          31
Jazz             31
Rockets          31
Suns             31
Cavaliers        31
Lakers           31
Magic            31
Hawks            31
Trail Blazers    31
Heat             31
Timberwolves     31
Knicks           31
Pistons          31
Warriors         31
76ers            31
Bulls            31
Spurs            31
Nets             31
Clippers         31
Hornets          30
Grizzlies        26
Raptors          26
Wizards          24
SuperSonics      18
Thunder          13
Bobcats          10
Pelicans          8
Bullets           7
Name: TEAM_NAME, dtype: int64

In [7]:
def team_fix(x) :
    if ( x['TEAM_NAME']=='Hornets' and x['YEAR'] <= 2012 ) :
        return 'Pelicans'
    elif ( x['TEAM_NAME']=='Bobcats') :
        return 'Hornets'
    if ( x['TEAM_NAME']=='Bullets') :
        return 'Wizards'
    if ( x['TEAM_NAME']=='SuperSonics') :
        return 'Thunder'
    return x['TEAM_NAME']

df['TEAM_NAME'] = df.apply(lambda x: team_fix(x), axis=1)
df.TEAM_NAME.value_counts()

Mavericks        31
Trail Blazers    31
Pacers           31
Kings            31
Bucks            31
Celtics          31
Pelicans         31
Jazz             31
Rockets          31
Suns             31
Cavaliers        31
Lakers           31
Magic            31
Hawks            31
Heat             31
Wizards          31
Nuggets          31
Clippers         31
Nets             31
Thunder          31
Spurs            31
Knicks           31
76ers            31
Timberwolves     31
Warriors         31
Pistons          31
Bulls            31
Raptors          26
Grizzlies        26
Hornets          17
Name: TEAM_NAME, dtype: int64

In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 906 entries, 41 to 30
Data columns (total 16 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   TEAM_NAME              906 non-null    object 
 1   YEAR                   906 non-null    int64  
 2   WIN_PCT                906 non-null    float64
 3   NBA_FINALS_APPEARANCE  906 non-null    object 
 4   FG_PCT                 906 non-null    float64
 5   FG3_PCT                906 non-null    float64
 6   FT_PCT                 906 non-null    float64
 7   OREB                   906 non-null    float64
 8   DREB                   906 non-null    float64
 9   REB                    906 non-null    float64
 10  AST                    906 non-null    float64
 11  PF                     906 non-null    float64
 12  STL                    906 non-null    float64
 13  TOV                    906 non-null    float64
 14  BLK                    906 non-null    float64
 15  PTS   

In [9]:
#converted to csv file
df.to_csv('nba_teams.csv')