### Rotowire injuries
Use the selenium package to initiate a web driver that goes to rotowire and clicks on the button to download the CSV file of injuries. Then, read in the CSV, do some manipulations, store the results, and delete the original CSV from the original download location for cleanliness.

In [16]:
# Packages
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
import json
import datetime as dt
import time

In [None]:
# Specifiy the URL where injury information is located
roto_url = 'https://www.rotowire.com/hockey/injury-report.php'

In [None]:
# Read in team name to 3 letter code dictionary
with open('../../data/team_name_dictionary.txt', 'r') as f:
    team_name_dict = json.load(f)

In [28]:
# Function for entire process
def get_rw_injuries(url):
    # Read in team name to 3 letter code dictionary
    with open('../../data/team_name_dictionary.txt', 'r') as f:
        team_name_dict = json.load(f)

    # Record current date and time
    dt_now = dt.datetime.now()
    date_recorded = dt_now.date()
    time_recorded = dt_now.time().strftime(format = '%H:%M:%S')

    # Create a new instance of the Chrome driver
    options = webdriver.ChromeOptions()
    options.add_argument('headless=new')
    driver = webdriver.Chrome(options=options)

    # Navigate to the url
    driver.get(roto_url)
    time.sleep(3)
    
    # Locate the "CSV" button by its HTML class and click on it
    driver.find_element(By.CLASS_NAME, 'export-button.is-csv').click()
    time.sleep(3)
    
    # Close the driver
    driver.quit()

    # Read in the CSV file from the default download location
    injuries_current = pd.read_csv('~/Downloads/nhl-injury-report.csv')

    # Drop columns we don't need
    injuries_current.drop(columns = ['Pos', 'Est. Return', 'Next Game (EST)'], inplace=True)

    # Rename remaining columns
    injuries_current.rename(columns={'Player':'name', 'Team':'team', 'Injury':'type', 'Status':'status'}, inplace=True)

    # Convert team to 3-letter code
    injuries_current['team'] = injuries_current['team'].str.lower().replace(team_name_dict)

    # Make type lowercase
    injuries_current['type'] = injuries_current['type'].str.strip().str.lower()

    # Convert injury status to code
    injuries_current.loc[injuries_current['status'] == 'Out', 'status'] = 'O'
    injuries_current.loc[injuries_current['status'] == 'Day-To-Day', 'status'] = 'DTD'

    # Add columns for date and time of recording
    injuries_current['date_recorded'] = date_recorded
    injuries_current['time_recorded'] = time_recorded

    # Set correct column order
    injuries_current = injuries_current[['name', 'team', 'date_recorded', 'time_recorded', 'type', 'status']]

    # Delete the original download file from rotowire
    !rm ~/Downloads/nhl-injury-report.csv

    return injuries_current

In [None]:
# Record current date and time
dt_now = dt.datetime.now()
date_recorded = dt_now.date()
time_recorded = dt_now.time().strftime(format = '%H:%M:%S')

In [None]:
# Create a new instance of the Chrome driver
options = webdriver.ChromeOptions()
options.add_argument('headless=new')
driver = webdriver.Chrome(options=options)

# Navigate to the url
driver.get(roto_url)

In [None]:
# Code would not work if I put them in the same code chunk. No idea why. Needed separate code chunks.
# Locate the "CSV" button by its HTML class and click on it
driver.find_element(By.CLASS_NAME, 'export-button.is-csv').click()

In [None]:
# Close the driver
driver.quit()

In [None]:
# Read in the CSV file from the default download location
injuries_current = pd.read_csv('~/Downloads/nhl-injury-report.csv')

In [None]:
# Drop columns we don't need
injuries_current.drop(columns = ['Pos', 'Est. Return', 'Next Game (EST)'], inplace=True)

# Rename remaining columns
injuries_current.rename(columns={'Player':'name', 'Team':'team', 'Injury':'type', 'Status':'status'}, inplace=True)

# Convert team to 3-letter code
injuries_current['team'] = injuries_current['team'].str.lower().replace(team_name_dict)

# Make type lowercase
injuries_current['type'] = injuries_current['type'].str.strip().str.lower()

# Convert injury status to code
injuries_current.loc[injuries_current['status'] == 'Out', 'status'] = 'O'
injuries_current.loc[injuries_current['status'] == 'Day-To-Day', 'status'] = 'DTD'

# Add columns for date and time of recording
injuries_current['date_recorded'] = date_recorded
injuries_current['time_recorded'] = time_recorded

# Set correct column order
injuries_current = injuries_current[['name', 'team', 'date_recorded', 'time_recorded', 'type', 'status']]

In [None]:
# See table
display(injuries_current)

In [None]:
# See unique values
print(injuries_current['team'].unique())
print()
print(injuries_current['type'].unique())
print()
print(injuries_current['status'].unique())

In [None]:
# Update
injuries_old = pd.read_csv('../../data/daily/injuries.csv')
injuries_updated = pd.concat([injuries_old, injuries_current], axis=0)
injuries_updated.to_csv('../../data/daily/injuries.csv')

In [22]:
# Delete the original download file from rotowire
!rm ~/Downloads/nhl-injury-report.csv