In [None]:
import numpy as np 
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
df = pd.read_csv('/kaggle/input/nba-injuries-2010-2018/injuries_2010-2020.csv')

This dataset contains NBA players' injuries between 2010 and 2020.

First injury record was recorded on October 2010 and the last one on October 2020.

To find out the injury history of the active NBA players (21-22 season), we'll import the active players dataset and merge them.

In [None]:
df.tail()

In [None]:
df.head()

In [None]:
df.isnull().sum()

We will drop the Acquired and Relinquished columns and combine them under the column "Name".


In [None]:
df['Name'] = df[["Acquired","Relinquished"]].fillna('').sum(axis=1)

In [None]:
df = df.drop(columns=['Acquired', 'Relinquished'])

df.head()


In [None]:
df["Team"].value_counts()

Changing the team names to full names.

In [None]:
dict = {"Rockets":"Houston Rockets",
        "Magic":"Orlando Magic",
        "Nets":"Brooklyn Nets",
        "76ers":"Philadelphia Sixers",
        "Cavaliers":"Cleveland Cavaliers",
        "Kings":"Sacramento Kings",
        "Pacers":"Indiana Pacers",
        "Bucks":"Milwaukee Bucks",
        "Celtics":"Boston Celtics",
        "Pelicans":"New Orleans Pelicans",
        "Clippers":"Los Angeles Clippers",
        "Nuggets":"Denver Nuggets",
        "Wizards":"Washington Wizards",
        "Bullets":"Washington Bullets",
        "Thunder":"Oklahoma City Thunder",
        "Raptors":"Toronto Raptors",
        "Bulls":"Chicago Bulls",
        "Lakers":"Los Angeles Lakers",
        "Grizzlies":"Memphis Grizzlies",
        "Hawks":"Atlanta Hawks",
        "Heat":"Miami Heat",
        "Spurs":"San Antonio Spurs",
        "Mavericks":"Dallas Mavericks",
        "Jazz":"Utah Jazz",
        "Hornets":"Charlotte Hornets",
        "Bobcats":"Charlotte Bobcats",
        "Pistons":"Detroit Pistons",
        "Warriors":"Golden State Warriors",
        "Timberwolves":"Minnesota Timberwolves",
        "Suns":"Phoenix Suns",
        "Knicks":"New York Knicks",
        "Blazers":"Portland Trailblazers"
    
}

In [None]:
df["Team"].replace(dict, inplace=True)

df.tail(10)

In [None]:
df["Team"].value_counts()

Changing the order of the rows

In [None]:
df = df[["Name", "Team", "Date", "Notes"]]

df.tail()

Changing the "Date" column to datetime to be able to format the dates.

In [None]:
df['Date'] = pd.to_datetime(df.Date)

df['Date'] = df['Date'].dt.strftime('%d/%m/%Y')

df.tail()

Dropping the team column as the active player dataset already contains it.

In [None]:
df = df.drop(columns="Team")
df.tail()

Importing active player dataset for 21-22 season.

This dataset includes Name, Team, Position, Age, Height, Weight, College and Salary information of the 2021-22 season NBA players.

We will only use Name, Team and Position information for now.

In [None]:
df_act = pd.read_csv("/kaggle/input/202122-nba-season-active-nba-players/active_players_2.csv")

df_act = df_act[["Name","Team","Position"]]

df_act.head()



Merging the datasets by performing a left merge on Name.

In [None]:
result_df = pd.merge(df_act, df, how= "left", on=["Name"])
result_df = result_df[result_df['Date'].notna()]

result_df.tail()

Clearing Null entries

In [None]:
result_df.head(30)

9493 injury records of active players are available

Looking for a specific player's injury records for the last 10 years

In [None]:
result_df[result_df["Name"] == "Paul George"]

In [None]:
result_df.describe()

In [None]:
result_df.info()

In [None]:
filename = 'Injury_History.csv'

result_df.to_csv(filename,index=False)

print('Saved file: ' + filename)