### Rotowire injuries
Use the selenium package to initiate a web driver that goes to rotowire and clicks on the button to download the CSV file of injuries. Then, read in the CSV, do some manipulations, store the results, and delete the original CSV from the original download location for cleanliness.

In [29]:
# Packages
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
import json
import datetime as dt

In [7]:
# Specifiy the URL where injury information is located
roto_url = 'https://www.rotowire.com/hockey/injury-report.php'

In [24]:
# Read in team name to 3 letter code dictionary
with open('../../data/team_name_dictionary.txt', 'r') as f:
    team_name_dict = json.load(f)

In [30]:
# Record current date and time
dt_now = dt.datetime.now()
date_recorded = dt_now.date()
time_recorded = dt_now.time().strftime(format = '%H:%M:%S')

In [8]:
# Create a new instance of the Chrome driver
options = webdriver.ChromeOptions()
options.add_argument('headless=new')
driver = webdriver.Chrome(options=options)

# Navigate to the url
driver.get(roto_url)

In [9]:
# Code would not work if I put them in the same code chunk. No idea why. Needed separate code chunks.
# Locate the "CSV" button by its HTML class and click on it
driver.find_element(By.CLASS_NAME, 'export-button.is-csv').click()

# Close the driver
driver.quit()

In [44]:
# Read in the CSV file from the default download location
injuries = pd.read_csv('~/Downloads/nhl-injury-report.csv')

In [45]:
# Drop columns we don't need
injuries.drop(columns = ['Pos', 'Est. Return', 'Next Game (EST)'], inplace=True)

# Rename remaining columns
injuries.rename(columns={'Player':'name', 'Team':'team', 'Injury':'type', 'Status':'status'}, inplace=True)

# Convert team to 3-letter code
injuries['team'] = injuries['team'].str.lower().replace(team_name_dict)

# Convert injury status to code
injuries.loc[injuries['status'] == 'Out', 'status'] = 'O'
injuries.loc[injuries['status'] == 'Day-To-Day', 'status'] = 'DTD'

# Add columns for date and time of recording
injuries['date_recorded'] = date_recorded
injuries['time_recorded'] = time_recorded

# Set correct column order
injuries = injuries[['name', 'team', 'date_recorded', 'time_recorded', 'type', 'status']]

In [48]:
print(injuries['team'].unique())
print()
print(injuries['type'].unique())
print()
print(injuries['status'].unique())

['ARI' 'LAK' 'STL' 'CBJ' 'PIT' 'BUF' 'OTT' 'TOR' 'WSH' 'COL' 'MIN' 'NYR'
 'DAL' 'EDM' 'WPG' 'ANA' 'CGY' 'VGK' 'SJS' 'VAN' 'FLA' 'NSH' 'MTL' 'NJD'
 'PHI' 'SEA' 'CAR' 'DET' 'NYI' 'TBL' 'CHI']

['Concussion' 'Undisclosed' 'Ankle' 'Shoulder' 'Lower Body' 'Upper Body'
 'Back' 'Triceps' 'Lower Leg' 'Hand' 'Wrist' 'Hamstring' 'Kneecap' 'Neck'
 'Achilles' 'Abdomen' 'Leg' 'Toe' 'Groin' 'Knee' 'Hip' 'Personal' 'Arm'
 'Head' 'Chest']

['O' 'IR-LT' 'IR' 'DTD' 'IR-NR']


In [49]:
display(injuries)

Unnamed: 0,name,team,date_recorded,time_recorded,type,status
0,Matt Dumba,ARI,2023-08-31,15:59:06,Concussion,O
1,Travis Dermott,ARI,2023-08-31,15:59:06,Undisclosed,O
2,Shea Weber,ARI,2023-08-31,15:59:06,Ankle,IR-LT
3,David Rittich,LAK,2023-08-31,15:59:06,Ankle,O
4,Akil Thomas,LAK,2023-08-31,15:59:06,Shoulder,O
...,...,...,...,...,...,...
132,Bryan Little,ARI,2023-08-31,15:59:06,Head,IR-LT
133,Jakub Voracek,ARI,2023-08-31,15:59:06,Concussion,IR
134,Carey Price,MTL,2023-08-31,15:59:06,Knee,IR-LT
135,Ryan Ellis,PHI,2023-08-31,15:59:06,Back,IR-LT


In [50]:
# Write/update CSV

# Delete the original download file from rotowire
!rm ~/Downloads/nhl-injury-report.csv