In [24]:
from functions import Schedule
import pandas as pd
import numpy as np
import time
from datetime import datetime, timedelta

### Create a csv with all caldendars

In [2]:
# Create a dataframe with all schedules from 1979-80 to 2022-23
years = np.arange(1979, 2024)
dfs = []
for year in years :

    print(f'Fetching schedule for season {year-1}-{year} ...', end = '\r')
    dfs.append(Schedule(year))
    time.sleep(5)

data = pd.concat(dfs).reset_index(drop = True)
data.to_csv('data/Schedules_1979-2023.csv', index = None)

Fetching schedule for season 2022-2023 ...

## Calendar features

In [37]:
data = pd.read_csv('data/Schedules_1979-2023.csv')
data.head()

Unnamed: 0,Date,Year,HomeTm,AwayTm,Home,Away
0,1978-10-13,1979,DEN,SAS,Denver Nuggets,San Antonio Spurs
1,1978-10-13,1979,DET,NJN,Detroit Pistons,New Jersey Nets
2,1978-10-13,1979,IND,ATL,Indiana Pacers,Atlanta Hawks
3,1978-10-13,1979,NYK,HOU,New York Knicks,Houston Rockets
4,1978-10-13,1979,PHI,LAL,Philadelphia 76ers,Los Angeles Lakers


In [72]:
def ComputeCalendarFeatures(tm, year) :

    games = data.copy()[((data['HomeTm'] == tm) | (data['AwayTm'] == tm)) & (data['Year'] == year)]
    games['Date'] = pd.to_datetime(games['Date'])
    games['Day'] = games['Date'].apply(lambda x: x.date())

    avgRest = games['Date'].diff().mean().total_seconds() / 3600 / 24
    numBack2Back = (games['Day'].diff().apply(lambda x: x.total_seconds() / 3600 / 24) <= 1).sum()

    return pd.DataFrame(zip([tm], [year], [avgRest], [numBack2Back]), columns = ['Tm', 'Year', 'avgRest', 'numBack2Back'])

In [73]:
rows = []
for year in data['Year'].unique() :

    datayr = data.groupby('Year').get_group(year)

    for tm in datayr['HomeTm'].unique() :

        rows.append(ComputeCalendarFeatures(tm, year))

In [83]:
dataSchedule = pd.concat(rows).reset_index(drop = True)

# One issue it that the average rest depends strongly on the season
# We will thus normalize the features by year


In [84]:
dataSchedule.head()

Unnamed: 0,Tm,Year,avgRest,numBack2Back
0,DEN,1979,2.185185,21
1,DET,1979,2.185185,26
2,IND,1979,2.185185,23
3,NYK,1979,2.148148,21
4,PHI,1979,2.185185,21


In [91]:
groupYear = dataSchedule.groupby('Year')
dataSchedule['avgRest'] = (dataSchedule['avgRest'] - groupYear['avgRest'].transform('mean')) / groupYear['avgRest'].transform('std')
dataSchedule['numBack2Back'] = (dataSchedule['numBack2Back'] - groupYear['numBack2Back'].transform('mean')) / groupYear['numBack2Back'].transform('std')

In [93]:
### TO-DO ###

# Opponent metrics: average strength of 82 opponents

In [205]:
cits, lats, lngs = ['Toronto', 'Vancouver'], [43.6532, 49.2827], [-79.3832, -123.1207] # Canadian cities
cities = pd.read_csv('data/uscities.csv')[['city', 'lat', 'lng', 'population']]
cities = pd.concat([cities, pd.DataFrame(zip(cits, lats, lngs, [1e9, 1e9]), columns = ['city', 'lat', 'lng', 'population'])]).reset_index(drop = True)
cities.head()

Unnamed: 0,city,lat,lng,population
0,New York,40.6943,-73.9249,18972871.0
1,Los Angeles,34.1141,-118.4068,12121244.0
2,Chicago,41.8375,-87.6866,8595181.0
3,Miami,25.784,-80.2101,5711945.0
4,Dallas,32.7935,-96.7667,5668165.0


In [206]:
cities

Unnamed: 0,city,lat,lng,population
0,New York,40.6943,-73.9249,1.897287e+07
1,Los Angeles,34.1141,-118.4068,1.212124e+07
2,Chicago,41.8375,-87.6866,8.595181e+06
3,Miami,25.7840,-80.2101,5.711945e+06
4,Dallas,32.7935,-96.7667,5.668165e+06
...,...,...,...,...
30841,Falcon Village,26.5652,-99.1341,0.000000e+00
30842,Hidden Lakes,40.5479,-82.7632,0.000000e+00
30843,El Monte Mobile Village,36.5471,-119.4251,0.000000e+00
30844,Toronto,43.6532,-79.3832,1.000000e+09


In [207]:
# First step help for city mapping
for team in data['Home'].unique() :

    words = team.split(' ')
    if words[0] in list(cities['city']) :
        print("'" + team +  "': " + words[0])
    elif ' '.join(words[:-1]) in list(cities['city']):
        print("'" + team, "':  " + ' '.join(words[:-1]))

    else :
        print('No mapping '+ "'" + team + "'")

'Denver Nuggets': Denver
'Detroit Pistons': Detroit
'Indiana Pacers': Indiana
'New York Knicks ':  New York
'Philadelphia 76ers': Philadelphia
'Phoenix Suns': Phoenix
'Seattle SuperSonics': Seattle
'Washington Bullets': Washington
'Boston Celtics': Boston
'Golden State Warriors': Golden
'San Antonio Spurs ':  San Antonio
No mapping 'New Jersey Nets'
'Atlanta Hawks': Atlanta
'San Diego Clippers ':  San Diego
'New Orleans Jazz ':  New Orleans
'Portland Trail Blazers': Portland
'Cleveland Cavaliers': Cleveland
'Houston Rockets': Houston
'Kansas City Kings': Kansas
'Chicago Bulls': Chicago
'Milwaukee Bucks': Milwaukee
'Los Angeles Lakers ':  Los Angeles
No mapping 'Utah Jazz'
'Dallas Mavericks': Dallas
'Los Angeles Clippers ':  Los Angeles
'Sacramento Kings': Sacramento
'Charlotte Hornets': Charlotte
'Miami Heat': Miami
'Orlando Magic': Orlando
No mapping 'Minnesota Timberwolves'
'Toronto Raptors': Toronto
'Vancouver Grizzlies': Vancouver
'Washington Wizards': Washington
'Memphis Grizzlies

In [208]:
cityMap = {'Denver Nuggets': 'Denver', 'Detroit Pistons': 'Detroit', 'Indiana Pacers': 'Indiana', 'New York Knicks': 'New York',
           'Philadelphia 76ers': 'Philadelphia', 'Phoenix Suns': 'Phoenix', 'Seattle SuperSonics': 'Seattle',
           'Washington Bullets': 'Washington', 'Boston Celtics': 'Boston', 'Golden State Warriors': 'San Francisco',
           'San Antonio Spurs':  'San Antonio', 'New Jersey Nets': 'Brooklyn', 'Atlanta Hawks': 'Atlanta',
           'San Diego Clippers':  'San Diego', 'New Orleans Jazz ':  'New Orleans', 'Portland Trail Blazers': 'Portland',
           'Cleveland Cavaliers': 'Cleveland', 'Houston Rockets': 'Houston', 'Kansas City Kings':  'Kansas City',
           'Chicago Bulls': 'Chicago', 'Milwaukee Bucks': 'Milwaukee', 'Los Angeles Lakers':  'Los Angeles',
           'Utah Jazz': 'Salt Lake City', 'Dallas Mavericks': 'Dallas', 'Los Angeles Clippers': 'Los Angeles',
           'Sacramento Kings': 'Sacramento', 'Charlotte Hornets': 'Charlotte', 'Miami Heat': 'Miami', 'Orlando Magic': 'Orlando',
           'Minnesota Timberwolves': 'Minneapolis', 'Toronto Raptors': 'Toronto', 'Vancouver Grizzlies': 'Vancouver',
           'Washington Wizards': 'Washington', 'Memphis Grizzlies': 'Memphis', 'New Orleans Hornets':  'New Orleans',
           'Charlotte Bobcats': 'Charlotte', 'New Orleans/Oklahoma City Hornets': 'Oklahoma City', 
           'Oklahoma City Thunder ':  'Oklahoma City', 'Brooklyn Nets': 'Brooklyn', 'New Orleans Pelicans':  'New Orleans'
            }


In [227]:
dfCoords = []
for team in cityMap.keys():

    coords = cities[cities['city'] == cityMap.get(team)].sort_values('population', ascending =  False).head(1).drop(columns = ['population'])
    coords['Team'] = [team]
    dfCoords.append(coords)

dfCoords = pd.concat(dfCoords).reset_index(drop = True)

In [234]:
dataex = data.merge(dfCoords.rename(columns = {'Team': 'Home'}), on = 'Home')

In [237]:
dataex[((dataex['HomeTm'] == 'NYK') | (dataex['AwayTm'] == 'NYK')) & (dataex['Year'] == 2023)].sort_values('Date')

Unnamed: 0,Date,Year,HomeTm,AwayTm,Home,Away,city,lat,lng
47572,2022-10-19 19:30:00,2023,MEM,NYK,Memphis Grizzlies,New York Knicks,Memphis,35.1087,-89.9663
7177,2022-10-21 19:30:00,2023,NYK,DET,New York Knicks,Detroit Pistons,New York,40.6943,-73.9249
7178,2022-10-24 19:30:00,2023,NYK,ORL,New York Knicks,Orlando Magic,New York,40.6943,-73.9249
7179,2022-10-26 19:30:00,2023,NYK,CHO,New York Knicks,Charlotte Hornets,New York,40.6943,-73.9249
30934,2022-10-28 20:00:00,2023,MIL,NYK,Milwaukee Bucks,New York Knicks,Milwaukee,43.0642,-87.9675
...,...,...,...,...,...,...,...,...,...
25265,2023-03-31 19:30:00,2023,CLE,NYK,Cleveland Cavaliers,New York Knicks,Cleveland,41.4764,-81.6805
7216,2023-04-02 18:00:00,2023,NYK,WAS,New York Knicks,Washington Wizards,New York,40.6943,-73.9249
5416,2023-04-05 19:00:00,2023,IND,NYK,Indiana Pacers,New York Knicks,Indiana,40.6220,-79.1552
49288,2023-04-07 20:00:00,2023,NOP,NYK,New Orleans Pelicans,New York Knicks,New Orleans,30.0687,-89.9288
