In [1]:
import numpy as np 
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/202122-nba-season-active-nba-players/active_players_2.csv
/kaggle/input/nba-injuries-2010-2018/injuries_2010-2020.csv


In [2]:
df = pd.read_csv('/kaggle/input/nba-injuries-2010-2018/injuries_2010-2020.csv')

This dataset contains NBA players' injuries between 2010 and 2020.

First injury record was recorded on October 2010 and the last one on October 2020.

To find out the injury history of the active NBA players (21-22 season), we'll import the active players dataset and merge them.

In [3]:
df.tail()

Unnamed: 0,Date,Team,Acquired,Relinquished,Notes
27100,2020-09-30,Lakers,Dion Waiters,,activated from IL
27101,2020-10-02,Heat,,Bam Adebayo,strained neck (DTD)
27102,2020-10-02,Heat,,Goran Dragic,placed on IL with torn plantar fascia in left ...
27103,2020-10-02,Heat,Chris Silva,,activated from IL
27104,2020-10-06,Heat,Bam Adebayo,,returned to lineup


In [4]:
df.head()

Unnamed: 0,Date,Team,Acquired,Relinquished,Notes
0,2010-10-03,Bulls,,Carlos Boozer,fractured bone in right pinky finger (out inde...
1,2010-10-06,Pistons,,Jonas Jerebko,torn right Achilles tendon (out indefinitely)
2,2010-10-06,Pistons,,Terrico White,broken fifth metatarsal in right foot (out ind...
3,2010-10-08,Blazers,,Jeff Ayres,torn ACL in right knee (out indefinitely)
4,2010-10-08,Nets,,Troy Murphy,strained lower back (out indefinitely)


In [5]:
df.isnull().sum()

Date                0
Team                2
Acquired        17563
Relinquished     9545
Notes               0
dtype: int64

We will drop the Acquired and Relinquished columns and combine them under the column "Name".


In [6]:
df['Name'] = df[["Acquired","Relinquished"]].fillna('').sum(axis=1)

In [7]:
df = df.drop(columns=['Acquired', 'Relinquished'])

df.head()


Unnamed: 0,Date,Team,Notes,Name
0,2010-10-03,Bulls,fractured bone in right pinky finger (out inde...,Carlos Boozer
1,2010-10-06,Pistons,torn right Achilles tendon (out indefinitely),Jonas Jerebko
2,2010-10-06,Pistons,broken fifth metatarsal in right foot (out ind...,Terrico White
3,2010-10-08,Blazers,torn ACL in right knee (out indefinitely),Jeff Ayres
4,2010-10-08,Nets,strained lower back (out indefinitely),Troy Murphy


In [8]:
df["Team"].value_counts()

Spurs           1163
Bucks           1068
Warriors        1060
Rockets         1058
Raptors         1044
Celtics         1040
Nets            1024
Heat            1023
Cavaliers       1001
Mavericks        992
Hawks            975
Nuggets          966
Lakers           959
Knicks           943
76ers            910
Wizards          875
Grizzlies        875
Timberwolves     860
Jazz             841
Magic            834
Pacers           831
Bulls            791
Suns             733
Kings            728
Hornets          719
Clippers         718
Thunder          717
Pistons          714
Blazers          695
Pelicans         576
Bobcats          369
Bullets            1
Name: Team, dtype: int64

Changing the team names to full names.

In [9]:
dict = {"Rockets":"Houston Rockets",
        "Magic":"Orlando Magic",
        "Nets":"Brooklyn Nets",
        "76ers":"Philadelphia Sixers",
        "Cavaliers":"Cleveland Cavaliers",
        "Kings":"Sacramento Kings",
        "Pacers":"Indiana Pacers",
        "Bucks":"Milwaukee Bucks",
        "Celtics":"Boston Celtics",
        "Pelicans":"New Orleans Pelicans",
        "Clippers":"Los Angeles Clippers",
        "Nuggets":"Denver Nuggets",
        "Wizards":"Washington Wizards",
        "Bullets":"Washington Bullets",
        "Thunder":"Oklahoma City Thunder",
        "Raptors":"Toronto Raptors",
        "Bulls":"Chicago Bulls",
        "Lakers":"Los Angeles Lakers",
        "Grizzlies":"Memphis Grizzlies",
        "Hawks":"Atlanta Hawks",
        "Heat":"Miami Heat",
        "Spurs":"San Antonio Spurs",
        "Mavericks":"Dallas Mavericks",
        "Jazz":"Utah Jazz",
        "Hornets":"Charlotte Hornets",
        "Bobcats":"Charlotte Bobcats",
        "Pistons":"Detroit Pistons",
        "Warriors":"Golden State Warriors",
        "Timberwolves":"Minnesota Timberwolves",
        "Suns":"Phoenix Suns",
        "Knicks":"New York Knicks",
        "Blazers":"Portland Trailblazers"
    
}

In [10]:
df["Team"].replace(dict, inplace=True)

df.tail(10)

Unnamed: 0,Date,Team,Notes,Name
27095,2020-09-12,Houston Rockets,placed on IL with neck spasms (out for season),Tyson Chandler
27096,2020-09-18,Boston Celtics,placed on IL with strained right adductor (out...,Romeo Langford
27097,2020-09-22,Boston Celtics,surgery on right wrist (out for season),Romeo Langford
27098,2020-09-23,Miami Heat,sore right knee (DTD),Gabe Vincent
27099,2020-09-30,Miami Heat,strained left shoulder (DTD),Bam Adebayo
27100,2020-09-30,Los Angeles Lakers,activated from IL,Dion Waiters
27101,2020-10-02,Miami Heat,strained neck (DTD),Bam Adebayo
27102,2020-10-02,Miami Heat,placed on IL with torn plantar fascia in left ...,Goran Dragic
27103,2020-10-02,Miami Heat,activated from IL,Chris Silva
27104,2020-10-06,Miami Heat,returned to lineup,Bam Adebayo


In [11]:
df["Team"].value_counts()

San Antonio Spurs         1163
Milwaukee Bucks           1068
Golden State Warriors     1060
Houston Rockets           1058
Toronto Raptors           1044
Boston Celtics            1040
Brooklyn Nets             1024
Miami Heat                1023
Cleveland Cavaliers       1001
Dallas Mavericks           992
Atlanta Hawks              975
Denver Nuggets             966
Los Angeles Lakers         959
New York Knicks            943
Philadelphia Sixers        910
Memphis Grizzlies          875
Washington Wizards         875
Minnesota Timberwolves     860
Utah Jazz                  841
Orlando Magic              834
Indiana Pacers             831
Chicago Bulls              791
Phoenix Suns               733
Sacramento Kings           728
Charlotte Hornets          719
Los Angeles Clippers       718
Oklahoma City Thunder      717
Detroit Pistons            714
Portland Trailblazers      695
New Orleans Pelicans       576
Charlotte Bobcats          369
Washington Bullets           1
Name: Te

Changing the order of the rows

In [12]:
df = df[["Name", "Team", "Date", "Notes"]]

df.tail()

Unnamed: 0,Name,Team,Date,Notes
27100,Dion Waiters,Los Angeles Lakers,2020-09-30,activated from IL
27101,Bam Adebayo,Miami Heat,2020-10-02,strained neck (DTD)
27102,Goran Dragic,Miami Heat,2020-10-02,placed on IL with torn plantar fascia in left ...
27103,Chris Silva,Miami Heat,2020-10-02,activated from IL
27104,Bam Adebayo,Miami Heat,2020-10-06,returned to lineup


Changing the "Date" column to datetime to be able to format the dates.

In [13]:
df['Date'] = pd.to_datetime(df.Date)

df['Date'] = df['Date'].dt.strftime('%d/%m/%Y')

df.tail()

Unnamed: 0,Name,Team,Date,Notes
27100,Dion Waiters,Los Angeles Lakers,30/09/2020,activated from IL
27101,Bam Adebayo,Miami Heat,02/10/2020,strained neck (DTD)
27102,Goran Dragic,Miami Heat,02/10/2020,placed on IL with torn plantar fascia in left ...
27103,Chris Silva,Miami Heat,02/10/2020,activated from IL
27104,Bam Adebayo,Miami Heat,06/10/2020,returned to lineup


Dropping the team column as the active player dataset already contains it.

In [14]:
df = df.drop(columns="Team")
df.tail()

Unnamed: 0,Name,Date,Notes
27100,Dion Waiters,30/09/2020,activated from IL
27101,Bam Adebayo,02/10/2020,strained neck (DTD)
27102,Goran Dragic,02/10/2020,placed on IL with torn plantar fascia in left ...
27103,Chris Silva,02/10/2020,activated from IL
27104,Bam Adebayo,06/10/2020,returned to lineup


Importing active player dataset for 21-22 season.

This dataset includes Name, Team, Position, Age, Height, Weight, College and Salary information of the 2021-22 season NBA players.

We will only use Name, Team and Position information for now.

In [15]:
df_act = pd.read_csv("/kaggle/input/202122-nba-season-active-nba-players/active_players_2.csv")

df_act = df_act[["Name","Team","Position"]]

df_act.head()



Unnamed: 0,Name,Team,Position
0,Juhann Begarin,Boston Celtics,SG
1,Jaylen Brown,Boston Celtics,SG
2,Kris Dunn,Boston Celtics,PG
3,Carsen Edwards,Boston Celtics,PG
4,Tacko Fall,Boston Celtics,C


Merging the datasets by performing a left merge on Name.

In [16]:
result_df = pd.merge(df_act, df, how= "left", on=["Name"])
result_df = result_df[result_df['Date'].notna()]

result_df.tail()

Unnamed: 0,Name,Team,Position,Date,Notes
9489,Hassan Whiteside,Utah Jazz,C,13/01/2020,returned to lineup
9490,Hassan Whiteside,Utah Jazz,C,07/02/2020,bruised left leg (DTD)
9491,Hassan Whiteside,Utah Jazz,C,11/02/2020,returned to lineup
9492,Hassan Whiteside,Utah Jazz,C,08/08/2020,placed on IL with strained left hip
9493,Hassan Whiteside,Utah Jazz,C,11/08/2020,activated from IL


Clearing Null entries

In [17]:
result_df.head(30)

Unnamed: 0,Name,Team,Position,Date,Notes
1,Jaylen Brown,Boston Celtics,SG,11/01/2017,sprained right ankle (DTD)
2,Jaylen Brown,Boston Celtics,SG,13/01/2017,returned to lineup
3,Jaylen Brown,Boston Celtics,SG,13/02/2017,placed on IL with strained right hip flexor
4,Jaylen Brown,Boston Celtics,SG,24/02/2017,activated from IL
5,Jaylen Brown,Boston Celtics,SG,06/12/2017,placed on IL with right eye inflammation
6,Jaylen Brown,Boston Celtics,SG,08/12/2017,activated from IL
7,Jaylen Brown,Boston Celtics,SG,21/12/2017,sore left Achilles (DTD)
8,Jaylen Brown,Boston Celtics,SG,23/12/2017,returned to lineup
9,Jaylen Brown,Boston Celtics,SG,27/12/2017,placed on IL
10,Jaylen Brown,Boston Celtics,SG,27/12/2017,sore right knee (DTD)


9493 injury records of active players are available

Looking for a specific player's injury records for the last 10 years

In [18]:
result_df[result_df["Name"] == "Paul George"]

Unnamed: 0,Name,Team,Position,Date,Notes
5480,Paul George,Los Angeles Clippers,SG,28/11/2010,placed on IL
5481,Paul George,Los Angeles Clippers,SG,13/12/2010,activated from IL
5482,Paul George,Los Angeles Clippers,SG,17/12/2010,placed on IL
5483,Paul George,Los Angeles Clippers,SG,29/12/2010,activated from IL
5484,Paul George,Los Angeles Clippers,SG,12/01/2013,placed on IL with illness
5485,Paul George,Los Angeles Clippers,SG,13/01/2013,activated from IL
5486,Paul George,Los Angeles Clippers,SG,17/04/2013,DNP
5487,Paul George,Los Angeles Clippers,SG,09/04/2014,rest (DNP)
5488,Paul George,Los Angeles Clippers,SG,01/08/2014,surgery on right leg to repair fractured tibia...
5489,Paul George,Los Angeles Clippers,SG,29/10/2014,placed on IL recovering from surgery on right ...


In [19]:
result_df.describe()

Unnamed: 0,Name,Team,Position,Date,Notes
count,9280,9280,9280,9280,9280
unique,344,30,7,1734,1552
top,Kevin Love,Los Angeles Lakers,C,12/04/2017,activated from IL
freq,139,715,1943,35,2480


In [20]:
result_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 9280 entries, 1 to 9493
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Name      9280 non-null   object
 1   Team      9280 non-null   object
 2   Position  9280 non-null   object
 3   Date      9280 non-null   object
 4   Notes     9280 non-null   object
dtypes: object(5)
memory usage: 435.0+ KB


In [21]:
filename = 'Injury_History.csv'

result_df.to_csv(filename,index=False)

print('Saved file: ' + filename)

Saved file: Injury_History.csv
