# Scrape Injury Data from https://www.prosportstransactions.com/
### Credits to @gboogy on github for scraping code. You can find his repo at https://github.com/gboogy/nba-injury-data-scraper

In [None]:
import numpy as np
import pandas as pd
import re
import requests
from bs4 import BeautifulSoup
import datetime

pd.options.mode.chained_assignment =  None

In [None]:
def replace_all(text, dic):
    '''
    This function will replace characters in text given a dictionary of characters to seach for and replace
    '''
    rc = re.compile('|'.join(map(re.escape, dic)))

    def translate(match):
        return dic[match.group(0)]
    return rc.sub(translate, text)


# Dictionary of characters to remove from text
char_replace = {' • ': ''}

# Create list of records read from webscrapper
list_of_rows = []

start_date = "2023-06-01"

# Loop through webpage table, scrap data, and store lists
for i in range(0, 25*50, 25):
    url = 'https://www.prosportstransactions.com/basketball/Search/SearchResults.php?Player=&Team=&BeginDate={}&EndDate=&ILChkBx=yes&InjuriesChkBx=yes&PersonalChkBx=yes&Submit=Search&start={}'.format(start_date,i)
    response = requests.get(url)
    html = response.content

    soup = BeautifulSoup(html, 'lxml')

    table = soup.find('table', attrs={'class': 'datatable center'})

    for row in table.findAll('tr', attrs={'align': 'left'}):
        list_of_cells = []

        for cell in row.findAll('td'):
            text = replace_all(cell.text, char_replace)
            text = text.strip()
            list_of_cells.append(text)
        list_of_rows.append(list_of_cells)

# Store data in a dataframe for manipulation
injuries_df = pd.DataFrame(list_of_rows, columns=[
                           'Date', 'Team', 'Acquired', 'Relinquished', 'Notes'])

acq = injuries_df['Acquired']
rel = injuries_df['Relinquished']

# Remove instances where value is like "Name 1/ Name 2"
injuries_df['Acquired'] = np.where(
    acq.str.contains('/'), acq.str.split('/ ').str[1], acq)
injuries_df['Relinquished'] = np.where(
    rel.str.contains('/'), rel.str.split('/ ').str[1], rel)

# Remove instances where value is like "(some text)"
injuries_df['Acquired'] = injuries_df.Acquired.str.replace(
    r"[\(\[].*?[\)\]]", "")
injuries_df['Relinquished'] = injuries_df.Relinquished.str.replace(
    r"[\(\[].*?[\)\]]", "")

# Write contents to csv file
injuries_df

In [None]:
injuries_df

In [None]:
df = injuries_df.copy()
df["Act"]  = (df["Acquired"] != "")
df["DAct"]  = (df["Relinquished"] != "")
df["Player"] = df["Acquired"] + df["Relinquished"]
df["Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d")
df = df[["Date","Team","Player","Act","DAct","Notes"]]
df = df[df["Player"].str.istitle()].reset_index(drop=True)
df.to_csv('../fdata/NBA_prosptran_injuries_2023.csv', index=False)
df.to_parquet('../fdata/NBA_prosptran_injuries_2023.parquet')

In [None]:
dfp = df.query("Player == 'Nikola Jokic'").reset_index(drop=True)

In [None]:
dfp

Did Herro Miss the game on 1st December?

In [None]:
game_date = pd.to_datetime(datetime.date(2023,11,1))
dfp["Comp"] = dfp["Date"] <= game_date
idxi = dfp[dfp["Comp"]].index
if len(idxi) > 0:
    idx = idxi[-1]
    missed_game = dfp["DAct"].loc[idx]
else:
    missed_game = False
missed_game