In [2]:
#from functions import Standings
import pandas as pd
import numpy as np
import time

### Create csv

In [4]:
# Create a dataframe with all schedules from 1979-80 to 2022-23
years = np.arange(1979, 2024)
dfs = []
for year in years :

    print(f'Fetching standings for season {year-1}-{year} ...', end = '\r')
    dfs.append(Standings(year))
    time.sleep(3)

data = pd.concat(dfs).reset_index(drop = True)
data.to_csv('data/Schedules_1979-2023.csv', index = None)

Fetching standings for season 2002-2003 ...

KeyboardInterrupt: 

In [4]:
target = pd.read_csv('data/Standings_1979-2023.csv')[['Tm', 'Year', 'W/L%']]
target.to_csv('data/target_1979-2023.csv', index = None)

## Create team strength features

In [3]:
data = pd.read_csv('Standings_1979-2023.csv')
data.head()

Unnamed: 0,Team,Tm,Year,W,L,W/L%,PS/G,PA/G,SRS
0,Washington Bullets,WSB,1979,54,28,0.659,114.9,109.9,4.75
1,Seattle SuperSonics,SEA,1979,52,30,0.634,106.6,103.9,2.69
2,Phoenix Suns,PHO,1979,50,32,0.61,115.4,111.7,3.55
3,San Antonio Spurs,SAS,1979,48,34,0.585,119.3,114.1,4.97
4,Kansas City Kings,KCK,1979,48,34,0.585,113.1,110.2,2.73


### Last wins, Net Rating (=nombre de points - nbr de points encaissés)

In [33]:
def wins_rating(data,current_year,number_years=5):
    df_5_years = data[(data['Year'] >= current_year-number_years) & (data['Year'] <= current_year)]
    win_loss = {team: [] for team in df_5_years['Tm'].unique()}

    for _,row in df_5_years.iterrows():
        win_loss[row['Tm']].append({'Year': row['Year'], 'WinLoss': row['W/L%'],'NetRating': row['PS/G']-row['PA/G'], 'SRS': row['SRS']})
    return win_loss


# Create df_teams where each row corresponds to a team and contains columns for each year's win-loss-%, net rating, and SRS (past 5 years)
def teams_df(teams_data, current_year):
    # Transforming the data
    rows = []
    for team, years_data in teams_data.items():
        row = {'Team': team, 'Year': current_year}
        for year_data in years_data:
            year = year_data['Year']
            row[f'WinLoss_{current_year-year}'] = year_data['WinLoss']
            row[f'NetRating_{current_year-year}'] = year_data['NetRating']
            row[f'SRS_{current_year-year}'] = year_data['SRS']
        rows.append(row)

    df_teams = pd.DataFrame(rows)

    # Fill NaN values if any year data is missing for a team
    df_teams.fillna('N/A', inplace=True)
    return df_teams

def teams_csv(number_years=5):
    # Create CSV where each row has a team, year, and win-loss-%, net rating, SRS of past 5 years
    final_df = pd.DataFrame()
    for year in range(1984,2024):
        teams_data = wins_rating(data, year, number_years)
        df_temp = teams_df(teams_data, year)
        final_df = pd.concat([final_df,df_temp])

    # Export to CSV
    final_df.to_csv('team_yearly_data.csv', index=False)

teams_csv()

  df_teams.fillna('N/A', inplace=True)


In [34]:
final_df

Unnamed: 0,Team,Year,WinLoss_5,NetRating_5,SRS_5,WinLoss_4,NetRating_4,SRS_4,WinLoss_3,NetRating_3,SRS_3,WinLoss_2,NetRating_2,SRS_2,WinLoss_1,NetRating_1,SRS_1,WinLoss_0,NetRating_0,SRS_0
0,WSB,1984,0.659,5.0,4.75,0.476,-2.5,-2.27,0.476,0.0,0.42,0.524,0.9,1.06,0.512,-0.1,0.2,0.427,-2.9,-2.36
1,SEA,1984,0.634,2.7,2.69,0.683,4.7,4.24,0.415,-1.7,-1.84,0.634,4.2,3.69,0.585,3.2,2.88,0.512,-0.2,-0.34
2,PHO,1984,0.61,3.7,3.55,0.671,3.6,3.25,0.695,5.5,4.83,0.561,3.5,3.05,0.646,5.0,4.61,0.5,0.9,0.65
3,SAS,1984,0.585,5.2,4.97,0.5,-0.3,-0.24,0.634,2.9,2.18,0.585,2.3,1.79,0.646,3.6,3.1,0.451,-0.2,-0.5
4,KCK,1984,0.585,2.9,2.73,0.573,3.1,2.82,0.488,0.0,-0.49,0.366,-3.1,-3.25,0.549,1.5,1.04,0.463,-1.5,-1.62
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25,ORL,2023,0.305,-4.8,-4.92,0.512,0.7,0.28,0.452,-1.0,-0.93,0.292,-9.3,-9.02,0.268,-8.0,-7.67,0.415,-2.6,-2.39
26,ATL,2023,0.293,-5.4,-5.3,0.354,-6.1,-6.06,0.299,-7.9,-7.71,0.569,2.3,2.14,0.524,1.5,1.55,0.5,0.3,0.32
27,DAL,2023,0.293,-3.1,-2.7,0.402,-1.2,-0.87,0.573,4.9,4.87,0.583,2.2,2.26,0.634,3.3,3.12,0.463,0.1,-0.14
28,MEM,2023,0.268,-6.2,-5.81,0.402,-2.6,-2.08,0.466,-1.1,-0.91,0.528,1.0,1.07,0.683,5.7,5.37,0.622,3.9,3.6
