In [4]:
#from functions import Standings
import pandas as pd
import numpy as np
import time

### Create csv

In [4]:
# Create a dataframe with all schedules from 1979-80 to 2022-23
years = np.arange(1979, 2024)
dfs = []
for year in years :

    print(f'Fetching standings for season {year-1}-{year} ...', end = '\r')
    dfs.append(Standings(year))
    time.sleep(3)

data = pd.concat(dfs).reset_index(drop = True)
data.to_csv('data/Schedules_1979-2023.csv', index = None)

Fetching standings for season 2002-2003 ...

KeyboardInterrupt: 

In [4]:
target = pd.read_csv('data/Standings_1979-2023.csv')[['Tm', 'Year', 'W/L%']]
target.to_csv('data/target_1979-2023.csv', index = None)

## Create team strength features

In [6]:
data = pd.read_csv('Standings_1979-2023.csv')
data.head()

Unnamed: 0,Team,Tm,Year,W,L,W/L%,PS/G,PA/G,SRS
0,Washington Bullets,WSB,1979,54,28,0.659,114.9,109.9,4.75
1,Seattle SuperSonics,SEA,1979,52,30,0.634,106.6,103.9,2.69
2,Phoenix Suns,PHO,1979,50,32,0.61,115.4,111.7,3.55
3,San Antonio Spurs,SAS,1979,48,34,0.585,119.3,114.1,4.97
4,Kansas City Kings,KCK,1979,48,34,0.585,113.1,110.2,2.73


In [39]:
data[data['Year'] == 2018]


Unnamed: 0,Team,Tm,Year,W,L,W/L%,PS/G,PA/G,SRS
1066,Houston Rockets,HOU,2018,65,17,0.793,112.4,103.9,8.21
1067,Toronto Raptors,TOR,2018,59,23,0.72,111.7,103.9,7.29
1068,Golden State Warriors,GSW,2018,58,24,0.707,113.5,107.5,5.79
1069,Boston Celtics,BOS,2018,55,27,0.671,104.0,100.4,3.23
1070,Philadelphia 76ers,PHI,2018,52,30,0.634,109.8,105.3,4.3
1071,Cleveland Cavaliers,CLE,2018,50,32,0.61,110.9,109.9,0.59
1072,Portland Trail Blazers,POR,2018,49,33,0.598,105.6,103.0,2.6
1073,Indiana Pacers,IND,2018,48,34,0.585,105.6,104.2,1.18
1074,New Orleans Pelicans,NOP,2018,48,34,0.585,111.7,110.4,1.48
1075,Utah Jazz,UTA,2018,48,34,0.585,104.1,99.8,4.47


In [32]:
len(data['Tm'].unique())

41

### Last wins, Net Rating (=nombre de points - nbr de points encaissés)

In [101]:
# Returns dictionary where each team has its own list of dictionaries containing the year, win-loss-%, net-rating and SRS
def wins_rating(data,number_years=5):
    df_5_years = data[(data['Year'] >= 2023-number_years) & (data['Year'] <= 2023)]
    win_loss = {team: [] for team in df_5_years['Tm'].unique()}

    for _,row in df_5_years.iterrows():
        win_loss[row['Tm']].append({'Year': row['Year'], 'WinLoss': row['W/L%'],'NetRating': row['PS/G']-row['PA/G'], 'SRS': row['SRS']})
        #print(win_loss[team])
    return win_loss

In [103]:
teams_data = wins_rating(data)

# Create team_data.csv where each row corresponds to a team and contains columns for each year's win-loss-%, net rating, and SRS
def teams_csv(teams_data):
    # Transforming the data
    rows = []
    for team, years_data in teams_data.items():
        row = {'Team': team}
        for year_data in years_data:
            year = year_data['Year']
            row[f'WinLoss_{year}'] = year_data['WinLoss']
            row[f'NetRating_{year}'] = year_data['NetRating']
            row[f'SRS_{year}'] = year_data['SRS']
        rows.append(row)

    df_teams = pd.DataFrame(rows)

    # Fill NaN values if any year data is missing for a team
    df_teams.fillna('N/A', inplace=True)

    # Export to CSV
    df_teams.to_csv('team_data.csv', index=False)

teams_csv(teams_data)