# Generating Team Data
In this notebook, file we will be generating statistics on a team basis. This will be better for training a model as it will contain avergaes for teams **going into** a match up.

# Imports

In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os
import shutil
import zipfile

from tqdm.notebook import tqdm

pd.set_option("display.max_columns", None)

# Utils

In [2]:
def clean_dir(path):
    """Makes a clean directory, removes all files and folders in the specified path"""
    
    if not os.path.exists(path):
        os.mkdir(path)
    
    for filename in os.listdir(path):
        file_path = os.path.join(path, filename)
        try:
            if os.path.isfile(file_path) or os.path.islink(file_path):
                os.unlink(file_path)
            elif os.path.isdir(file_path):
                shutil.rmtree(file_path)
        except Exception as e:
            print(f"Failed to delete {file_path}. Reason: {e}")

# Load Data

We first need to load in the data. There is a zip file called `seasons.zip` which we need to unzip and get all the data for. The let's create one giant csv that has all of the games from all seasons.

In [3]:
PATH_TO_ZIP = '../assets/data/seasons.zip'
EXTRACTED_DIR = '../assets/data/'
GAMES_DIR = '../assets/data/seasons/'

def getGames(zip_path=PATH_TO_ZIP,
             extract_dir=EXTRACTED_DIR,
             directory_to_games=GAMES_DIR):
    
    with zipfile.ZipFile(PATH_TO_ZIP, 'r') as zip_ref:
        clean_dir(GAMES_DIR)
        zip_ref.extractall(EXTRACTED_DIR)
    
    dfs = []
    for file in os.listdir(directory_to_games):
        dfs.append(pd.read_csv(os.path.join(directory_to_games, file)))
    
    shutil.rmtree(directory_to_games)
    
    return pd.concat(dfs, ignore_index=True).sort_values(by=['date'], ignore_index=True)

# Generating Moving Averages

Next, we want to create a file for each team that contains all of their stats for each game.

In [4]:
TEAMS = set(getGames()['losing_name']).union(set(getGames()['winning_name']))
TEAMS = [team.strip() for team in sorted(TEAMS)]

In [5]:
original_data = getGames()
original_data["away"] = np.where(original_data["winner"]=="Away",
                                 original_data["winning_name"],
                                 original_data["losing_name"])

original_data["home"] = np.where(original_data["winner"]=="Home",
                                 original_data["winning_name"],
                                 original_data["losing_name"])

# This will help later
reordered_labels = ['date',
                    'location',
                    'losing_abbr',
                    'losing_name',
                    'pace',
                    'winning_abbr',
                    'winning_name',
                    'away',
                    'home',
                    'winner',
                    'away_assist_percentage',
                    'away_assists', 
                    'away_block_percentage', 
                    'away_blocks', 
                    'away_defensive_rating', 
                    'away_defensive_rebound_percentage', 
                    'away_defensive_rebounds',
                    'away_effective_field_goal_percentage',
                    'away_field_goal_attempts',
                    'away_field_goal_percentage',
                    'away_field_goals',
                    'away_free_throw_attempt_rate',
                    'away_free_throw_attempts',
                    'away_free_throw_percentage',
                    'away_free_throws',
                    'away_losses',
                    'away_minutes_played',
                    'away_offensive_rating',
                    'away_offensive_rebound_percentage',
                    'away_offensive_rebounds',
                    'away_personal_fouls',
                    'away_points',
                    'away_steal_percentage',
                    'away_steals',
                    'away_three_point_attempt_rate',
                    'away_three_point_field_goal_attempts',
                    'away_three_point_field_goal_percentage',
                    'away_three_point_field_goals',
                    'away_total_rebound_percentage',
                    'away_total_rebounds',
                    'away_true_shooting_percentage',
                    'away_turnover_percentage',
                    'away_turnovers',
                    'away_two_point_field_goal_attempts',
                    'away_two_point_field_goal_percentage',
                    'away_two_point_field_goals',
                    'away_win_percentage',
                    'away_wins',
                    'home_assist_percentage',
                    'home_assists',
                    'home_block_percentage',
                    'home_blocks',
                    'home_defensive_rating',
                    'home_defensive_rebound_percentage',
                    'home_defensive_rebounds',
                    'home_effective_field_goal_percentage',
                    'home_field_goal_attempts',
                    'home_field_goal_percentage',
                    'home_field_goals',
                    'home_free_throw_attempt_rate',
                    'home_free_throw_attempts',
                    'home_free_throw_percentage',
                    'home_free_throws',
                    'home_losses',
                    'home_minutes_played',
                    'home_offensive_rating',
                    'home_offensive_rebound_percentage',
                    'home_offensive_rebounds',
                    'home_personal_fouls',
                    'home_points',
                    'home_steal_percentage',
                    'home_steals',
                    'home_three_point_attempt_rate',
                    'home_three_point_field_goal_attempts',
                    'home_three_point_field_goal_percentage',
                    'home_three_point_field_goals',
                    'home_total_rebound_percentage',
                    'home_total_rebounds',
                    'home_true_shooting_percentage',
                    'home_turnover_percentage',
                    'home_turnovers',
                    'home_two_point_field_goal_attempts',
                    'home_two_point_field_goal_percentage',
                    'home_two_point_field_goals',
                    'home_win_percentage',
                    'home_wins']

original_data.drop(columns=['away_ranking', 'home_ranking'], inplace=True)
original_data['date'] = pd.to_datetime(original_data['date'])
original_data['date'] = original_data['date'].dt.strftime('%Y-%m-%d')
original_data = original_data[reordered_labels]
original_data.dropna(inplace=True)
original_data.drop_duplicates(inplace=True)

print(f"There are {original_data.shape[0]} examples in the set")

There are 54626 examples in the set


In [6]:
new_labels = ['date',
              'location',
              'away',
              'home',
              'winner',
              'assist_percentage',
              'assists', 
              'block_percentage', 
              'blocks', 
              'defensive_rating', 
              'defensive_rebound_percentage', 
              'defensive_rebounds',
              'effective_field_goal_percentage',
              'field_goal_attempts',
              'field_goal_percentage',
              'field_goals',
              'free_throw_attempt_rate',
              'free_throw_attempts',
              'free_throw_percentage',
              'free_throws',
              'losses',
              'minutes_played',
              'offensive_rating',
              'offensive_rebound_percentage',
              'offensive_rebounds',
              'personal_fouls',
              'points',
              'steal_percentage',
              'steals',
              'three_point_attempt_rate',
              'three_point_field_goal_attempts',
              'three_point_field_goal_percentage',
              'three_point_field_goals',
              'total_rebound_percentage',
              'total_rebounds',
              'true_shooting_percentage',
              'turnover_percentage',
              'turnovers',
              'two_point_field_goal_attempts',
              'two_point_field_goal_percentage',
              'two_point_field_goals',
              'win_percentage',
              'wins']

def generateTeamSats(dataframe, teams=TEAMS, folder='../assets/data/team_data/'):
    """For each team, generate the games that they have played"""
    
    # Clean the folder
    clean_dir(folder)
    
    # Loop through all of the teams
    for team in tqdm(teams, unit="teams"):
        file_name = f'{team}_data.csv'
        path = os.path.join(folder, file_name)
        
        # Get home games and away games
        away_games = dataframe.loc[(dataframe["away"] == team)].copy()
        home_games = dataframe.loc[(dataframe["home"] == team)].copy()
        
        home_games.drop(home_games.filter(regex="away_").columns, axis=1, inplace=True)
        home_games.drop(columns=['losing_abbr',
                                 'losing_name',
                                 'pace',
                                 'winning_abbr',
                                 'winning_name'],
                        inplace=True)
        home_games.columns = new_labels
        
        away_games.drop(away_games.filter(regex="home_").columns, axis=1, inplace=True)
        away_games.drop(columns=['losing_abbr',
                                 'losing_name',
                                 'pace',
                                 'winning_abbr',
                                 'winning_name'],
                        inplace=True)
        away_games.columns = new_labels
        
        # Shaoe for both must match same columns
        assert away_games.shape[1] == home_games.shape[1]
        
        # Join the home games and away games, sort by date
        team_stats = pd.concat([home_games, away_games])
        team_stats.drop_duplicates(inplace=True)
        team_stats.dropna(inplace=True)
        team_stats.sort_values(by=["date"], inplace=True)
        
        # Save the stats
        team_stats.to_csv(path, index=None)

In [7]:
generateTeamSats(original_data)

  0%|          | 0/1146 [00:00<?, ?teams/s]

In [8]:
pd.read_csv('../assets/data/team_data/Yale_data.csv').head()

Unnamed: 0,date,location,away,home,winner,assist_percentage,assists,block_percentage,blocks,defensive_rating,defensive_rebound_percentage,defensive_rebounds,effective_field_goal_percentage,field_goal_attempts,field_goal_percentage,field_goals,free_throw_attempt_rate,free_throw_attempts,free_throw_percentage,free_throws,losses,minutes_played,offensive_rating,offensive_rebound_percentage,offensive_rebounds,personal_fouls,points,steal_percentage,steals,three_point_attempt_rate,three_point_field_goal_attempts,three_point_field_goal_percentage,three_point_field_goals,total_rebound_percentage,total_rebounds,true_shooting_percentage,turnover_percentage,turnovers,two_point_field_goal_attempts,two_point_field_goal_percentage,two_point_field_goals,win_percentage,wins
0,2009-11-13,"Arena at Harbor Yard, Bridgeport, Connecticut",Sacred Heart,Yale,Away,53.8,14,0.0,0,115.0,62.1,22,0.433,67,0.388,26,0.463,31,0.903,28,1,200.0,107.5,41.0,12,20,86,15.0,12,0.269,18.0,0.333,6.0,50.0,34,0.526,14.8,14,49.0,0.408,20.0,0.0,0
1,2009-11-16,"Harry A. Gampel Pavilion, Storrs, Connecticut",Hofstra,Yale,Away,70.0,14,4.5,2,94.4,46.2,12,0.434,53,0.377,20,0.434,23,0.739,17,2,200.0,87.5,23.3,7,17,63,18.1,13,0.283,15.0,0.4,6.0,33.9,19,0.493,20.2,14,38.0,0.368,14.0,0.0,0
2,2009-11-17,"Harry A. Gampel Pavilion, Storrs, Connecticut",Yale,Colgate,Away,47.6,10,11.1,2,90.2,68.0,19,0.411,56,0.375,21,0.429,24,0.792,19,2,200.0,106.6,54.5,16,15,65,18.0,11,0.339,19.0,0.211,4.0,60.3,35,0.482,15.3,12,37.0,0.459,17.0,0.333,1
3,2009-11-21,"John J. Lee Ampitheater, New Haven, Connecticut",Quinnipiac,Yale,Away,56.0,14,7.5,4,98.6,42.9,18,0.427,62,0.403,25,0.258,16,0.688,11,3,200.0,88.9,29.7,11,21,64,13.9,10,0.21,13.0,0.231,3.0,36.7,29,0.46,16.9,14,49.0,0.449,22.0,0.25,1
4,2009-11-23,"Mack Sports Complex, Hempstead, New York",Charlotte,Yale,Away,58.3,14,11.1,4,133.3,53.6,16,0.5,54,0.444,24,0.426,23,0.87,20,4,200.0,112.1,37.9,10,21,74,9.1,6,0.278,15.0,0.4,6.0,45.6,26,0.57,15.8,12,39.0,0.462,18.0,0.2,1


In [9]:
def compute_ma(span, teams=TEAMS, team_data_folder='../assets/data/team_data/', dest_folder='../assets/data/team_ma/'):
    """Computes various moving averages for the stats"""
    
    # Clean directory we will be saving the CSVs to
    clean_dir(dest_folder)
    
    for team in tqdm(teams, unit='teams'):
        averages = pd.DataFrame()

        # Load the stats for a given team
        team_stats = pd.read_csv(os.path.join(team_data_folder, f"{team}_data.csv"))

        # Compute the moving averages for the appropriate columns
        for col in team_stats.columns:
            if col in {'date', 'location', 'away', 'home', 'winner'}:
                continue
                
            # Simple moving average
            team_stats[f"{col}_SMA"] = team_stats.loc[:, col].rolling(window=span).mean()
            team_stats[f"{col}_SMA"] = team_stats[f"{col}_SMA"].shift(1)

            # Cumulative moving average
            team_stats[f"{col}_CMA"] = team_stats.loc[:, col].expanding(min_periods=span).mean()
            team_stats[f"{col}_CMA"] = team_stats[f"{col}_CMA"].shift(1)

            # Exponential moving average
            team_stats[f"{col}_EMA"] = team_stats.loc[:, col].ewm(span=span, adjust=False).mean()
            team_stats[f"{col}_EMA"] = team_stats[f"{col}_EMA"].shift(1)

        # Drop any rows with NULL values and save the CSV
        team_stats.dropna(inplace=True)
        team_stats.drop_duplicates(inplace=True)
        team_stats.to_csv(os.path.join(dest_folder, f"{team}_ma.csv"), index=None)

In [10]:
compute_ma(5)

  0%|          | 0/1146 [00:00<?, ?teams/s]

In [11]:
pd.read_csv('../assets/data/team_ma/Yale_ma.csv').head()

Unnamed: 0,date,location,away,home,winner,assist_percentage,assists,block_percentage,blocks,defensive_rating,defensive_rebound_percentage,defensive_rebounds,effective_field_goal_percentage,field_goal_attempts,field_goal_percentage,field_goals,free_throw_attempt_rate,free_throw_attempts,free_throw_percentage,free_throws,losses,minutes_played,offensive_rating,offensive_rebound_percentage,offensive_rebounds,personal_fouls,points,steal_percentage,steals,three_point_attempt_rate,three_point_field_goal_attempts,three_point_field_goal_percentage,three_point_field_goals,total_rebound_percentage,total_rebounds,true_shooting_percentage,turnover_percentage,turnovers,two_point_field_goal_attempts,two_point_field_goal_percentage,two_point_field_goals,win_percentage,wins,assist_percentage_SMA,assist_percentage_CMA,assist_percentage_EMA,assists_SMA,assists_CMA,assists_EMA,block_percentage_SMA,block_percentage_CMA,block_percentage_EMA,blocks_SMA,blocks_CMA,blocks_EMA,defensive_rating_SMA,defensive_rating_CMA,defensive_rating_EMA,defensive_rebound_percentage_SMA,defensive_rebound_percentage_CMA,defensive_rebound_percentage_EMA,defensive_rebounds_SMA,defensive_rebounds_CMA,defensive_rebounds_EMA,effective_field_goal_percentage_SMA,effective_field_goal_percentage_CMA,effective_field_goal_percentage_EMA,field_goal_attempts_SMA,field_goal_attempts_CMA,field_goal_attempts_EMA,field_goal_percentage_SMA,field_goal_percentage_CMA,field_goal_percentage_EMA,field_goals_SMA,field_goals_CMA,field_goals_EMA,free_throw_attempt_rate_SMA,free_throw_attempt_rate_CMA,free_throw_attempt_rate_EMA,free_throw_attempts_SMA,free_throw_attempts_CMA,free_throw_attempts_EMA,free_throw_percentage_SMA,free_throw_percentage_CMA,free_throw_percentage_EMA,free_throws_SMA,free_throws_CMA,free_throws_EMA,losses_SMA,losses_CMA,losses_EMA,minutes_played_SMA,minutes_played_CMA,minutes_played_EMA,offensive_rating_SMA,offensive_rating_CMA,offensive_rating_EMA,offensive_rebound_percentage_SMA,offensive_rebound_percentage_CMA,offensive_rebound_percentage_EMA,offensive_rebounds_SMA,offensive_rebounds_CMA,offensive_rebounds_EMA,personal_fouls_SMA,personal_fouls_CMA,personal_fouls_EMA,points_SMA,points_CMA,points_EMA,steal_percentage_SMA,steal_percentage_CMA,steal_percentage_EMA,steals_SMA,steals_CMA,steals_EMA,three_point_attempt_rate_SMA,three_point_attempt_rate_CMA,three_point_attempt_rate_EMA,three_point_field_goal_attempts_SMA,three_point_field_goal_attempts_CMA,three_point_field_goal_attempts_EMA,three_point_field_goal_percentage_SMA,three_point_field_goal_percentage_CMA,three_point_field_goal_percentage_EMA,three_point_field_goals_SMA,three_point_field_goals_CMA,three_point_field_goals_EMA,total_rebound_percentage_SMA,total_rebound_percentage_CMA,total_rebound_percentage_EMA,total_rebounds_SMA,total_rebounds_CMA,total_rebounds_EMA,true_shooting_percentage_SMA,true_shooting_percentage_CMA,true_shooting_percentage_EMA,turnover_percentage_SMA,turnover_percentage_CMA,turnover_percentage_EMA,turnovers_SMA,turnovers_CMA,turnovers_EMA,two_point_field_goal_attempts_SMA,two_point_field_goal_attempts_CMA,two_point_field_goal_attempts_EMA,two_point_field_goal_percentage_SMA,two_point_field_goal_percentage_CMA,two_point_field_goal_percentage_EMA,two_point_field_goals_SMA,two_point_field_goals_CMA,two_point_field_goals_EMA,win_percentage_SMA,win_percentage_CMA,win_percentage_EMA,wins_SMA,wins_CMA,wins_EMA
0,2009-11-24,"Mack Sports Complex, Hempstead, New York",Yale,Elon,Away,48.0,12,10.7,3,98.5,64.7,13,0.52,50,0.5,25,0.38,19,0.895,17,4,200.0,104.5,38.5,8,16,69,18.2,12,0.14,7.0,0.286,2.0,48.8,21,0.584,20.4,15,43.0,0.535,23.0,0.333,2,57.14,57.14,56.47037,13.2,13.2,13.407407,6.84,6.84,7.455556,2.4,2.4,2.716049,106.3,106.3,111.746914,54.56,54.56,54.303704,17.4,17.4,17.679012,0.441,0.441,0.45084,58.4,58.4,58.54321,0.3974,0.3974,0.406988,23.2,23.2,23.777778,0.402,0.402,0.39721,23.4,23.4,23.17284,0.7984,0.7984,0.81158,19.0,19.0,19.135802,2.4,2.4,2.691358,200.0,200.0,200.0,100.52,100.52,102.791358,37.28,37.28,37.707407,11.2,11.2,11.209877,18.8,18.8,19.518519,70.4,70.4,71.728395,14.82,14.82,13.539506,10.4,10.4,9.506173,0.2758,0.2758,0.270642,16.0,16.0,15.740741,0.315,0.315,0.32121,5.0,5.0,5.037037,45.3,45.3,45.51358,28.6,28.6,28.888889,0.5062,0.5062,0.516222,16.6,16.6,16.207407,13.2,13.2,13.037037,42.4,42.4,42.802469,0.4292,0.4292,0.438716,18.2,18.2,18.740741,0.1566,0.1566,0.171556,0.6,0.6,0.703704
1,2009-11-28,"Christl Arena, West Point, New York",Yale,Army,Home,44.4,8,7.7,3,104.9,54.5,15,0.4,50,0.36,18,0.38,19,0.421,8,5,200.0,78.7,39.4,10,18,48,13.1,8,0.42,21.0,0.19,4.0,45.5,25,0.407,20.4,14,29.0,0.483,14.0,0.286,2,55.98,55.616667,53.646914,12.8,13.0,12.938272,8.98,7.483333,8.537037,3.0,2.5,2.8107,103.0,105.0,107.331276,55.08,56.25,57.769136,15.6,16.666667,16.119342,0.4584,0.454167,0.473893,55.0,57.0,55.695473,0.4198,0.4145,0.437992,23.0,23.5,24.185185,0.3854,0.398333,0.391473,21.0,22.666667,21.781893,0.7968,0.8145,0.839387,16.8,18.666667,18.423868,3.0,2.666667,3.127572,200.0,200.0,200.0,99.92,101.183333,103.360905,36.78,37.483333,37.971605,10.4,10.666667,10.139918,18.0,18.333333,18.345679,67.0,70.166667,70.81893,15.46,15.383333,15.093004,10.4,10.666667,10.337449,0.25,0.253167,0.227095,13.8,14.5,12.82716,0.3056,0.310167,0.309473,4.2,4.5,4.024691,45.06,45.883333,46.609053,26.0,27.333333,26.259259,0.5178,0.519167,0.538815,17.72,17.233333,17.604938,13.4,13.5,13.691358,41.2,42.5,42.868313,0.4546,0.446833,0.470811,18.8,19.0,20.160494,0.2232,0.186,0.22537,1.0,0.833333,1.135802
2,2009-12-02,"Chase Arena at Reich Family Pavilion, West Har...",Yale,Hartford,Away,33.3,6,6.7,2,90.2,78.6,24,0.444,45,0.4,18,0.4,18,0.444,8,5,200.0,94.1,45.2,12,16,48,2.0,1,0.289,13.0,0.308,4.0,61.0,36,0.448,18.5,12,32.0,0.438,14.0,0.375,3,50.86,54.014286,50.564609,11.6,12.285714,11.292181,9.62,7.514286,8.258025,3.2,2.571429,2.8738,105.1,104.985714,106.52085,56.74,56.0,56.679424,16.2,16.428571,15.746228,0.4516,0.446429,0.449262,54.4,56.0,53.796982,0.4164,0.406714,0.411995,22.6,22.714286,22.123457,0.3746,0.395714,0.387649,20.2,22.142857,20.854595,0.7332,0.758286,0.699925,15.0,17.142857,14.949246,3.6,3.0,3.751715,200.0,200.0,200.0,98.16,97.971429,95.140604,40.0,37.757143,38.447737,11.0,10.571429,10.093278,18.2,18.285714,18.230453,64.0,67.0,63.21262,14.46,15.057143,14.428669,9.4,10.285714,9.558299,0.2774,0.277,0.291396,15.0,15.428571,15.55144,0.2636,0.293,0.269649,3.8,4.428571,4.016461,47.38,45.828571,46.239369,27.2,27.0,25.839506,0.5006,0.503143,0.494877,17.76,17.685714,18.536626,13.4,13.571429,13.794239,39.4,40.571429,38.245542,0.4776,0.452,0.474874,18.8,18.285714,18.106996,0.2804,0.200286,0.24558,1.4,1.0,1.423868
3,2009-12-07,"John J. Lee Ampitheater, New Haven, Connecticut",Vermont,Yale,Away,70.8,17,10.0,3,107.5,70.8,17,0.422,58,0.414,24,0.259,15,0.733,11,6,200.0,89.6,44.1,15,21,60,14.9,10,0.241,14.0,0.071,1.0,55.2,32,0.461,19.9,16,44.0,0.523,23.0,0.333,3,48.0,51.425,44.809739,10.8,11.5,9.528121,8.74,7.4125,7.738683,3.2,2.5,2.582533,105.1,103.1375,101.080567,58.86,58.825,63.986283,17.2,17.375,18.497485,0.4582,0.446125,0.447508,52.2,54.625,50.864655,0.4214,0.405875,0.407996,22.0,22.125,20.748971,0.3688,0.39625,0.391766,19.0,21.625,19.903064,0.6636,0.719,0.614616,12.8,16.0,12.63283,4.2,3.25,4.16781,200.0,200.0,200.0,95.66,97.4875,94.793736,38.14,38.6875,40.698491,10.2,10.75,10.728852,18.4,18.0,17.486968,60.6,64.625,58.141747,11.26,13.425,10.28578,7.4,9.125,6.705533,0.2674,0.2785,0.290598,13.8,15.125,14.70096,0.283,0.294875,0.282433,3.8,4.375,4.010974,47.52,47.725,51.159579,27.4,28.125,29.226337,0.4938,0.49625,0.479251,18.4,17.7875,18.524417,13.4,13.375,13.196159,38.4,39.5,36.163695,0.4734,0.45025,0.462583,18.2,17.75,16.737997,0.2888,0.222125,0.28872,1.8,1.25,1.949246
4,2009-12-09,"John J. Lee Ampitheater, New Haven, Connecticut",Bryant\n\t\t\t,Yale,Home,64.0,16,13.6,6,79.4,73.5,26,0.466,59,0.424,25,0.356,21,0.667,14,6,200.0,101.5,32.4,11,15,69,8.8,6,0.203,12.0,0.417,5.0,52.1,37,0.5,13.9,11,47.0,0.426,20.0,0.4,4,50.96,53.577778,53.47316,11.4,12.111111,12.018747,9.24,7.7,8.492455,3.0,2.555556,2.721689,106.88,103.622222,103.220378,64.44,60.155556,66.257522,17.0,17.333333,17.998323,0.4572,0.443444,0.439005,51.4,55.0,53.243103,0.4236,0.406778,0.409998,21.8,22.333333,21.832647,0.369,0.381,0.347511,18.8,20.888889,18.268709,0.6726,0.720556,0.654078,12.8,15.444444,12.088554,4.8,3.555556,4.77854,200.0,200.0,200.0,95.8,96.611111,93.06249,41.02,39.288889,41.832327,11.0,11.222222,12.152568,18.4,18.333333,18.657979,59.8,64.111111,58.761164,11.46,13.588889,11.823853,7.4,9.222222,7.803688,0.2736,0.274333,0.274065,14.0,15.0,14.467307,0.251,0.27,0.211955,3.4,4.0,3.007316,51.22,48.555556,52.506386,28.0,28.555556,30.150892,0.494,0.492333,0.473167,19.0,18.022222,18.982945,13.8,13.666667,14.130773,37.4,40.0,38.775796,0.4882,0.458333,0.482722,18.4,18.333333,18.825332,0.3054,0.234444,0.30348,2.2,1.444444,2.299497


# Getting the Data Ready for Testing
Now that we have computed the exponential moving average for each team, we need to joing them back into one table so that it is easier to train models on.

In [12]:
games = original_data.copy()
num_rows = games.shape[0]

# We can uniquely identify a game given the teams, the date, and the game number.
games = games.filter(["date", "location", "home", "away", "winner"])
games.drop_duplicates()

# Just to make sure that there is no lost data
assert games.shape[0] == num_rows

# Arrays to hold the home and away dataframes
homes=[]
aways=[]

# Add the moving averages
for team in tqdm(TEAMS, unit='teams'):
    # load in the  file
    team_avgs = pd.read_csv(f'../assets/data/team_ma/{team}_ma.csv')
    team_avgs.drop(new_labels[5:], axis=1, inplace=True)
    
    # Merge in visiting team stats
    away = pd.merge(games,
                    team_avgs.loc[team_avgs["away"] == team],
                    on=["date", "location", "home", "away", 'winner'],
                    how="left")
    
    # Drop any rows with NULL values and drop redudant columns
    away.dropna(inplace=True)
    away.drop_duplicates(inplace=True)
    
    # Rename the columns and append to the array
    new_away_cols = list(away.columns)[:5] + ["away_" + col for col in list(away.columns)[5:]]
    away.columns = new_away_cols
    aways.append(away)
    
    # Merge in home team stats
    home = pd.merge(games,
                    team_avgs.loc[team_avgs["home"] == team],
                    on=["date", "location", "home", "away", 'winner'],
                    how="left")
    
    # Drop any rows with NULL values and drop redudant columns
    home.dropna(inplace=True)
    home.drop_duplicates(inplace=True)
    
    # Rename the columns and append to the array
    new_home_cols = list(home.columns)[:5] + ["home_" + col for col in list(home.columns)[5:]]
    home.columns = new_home_cols
    homes.append(home)
    
    # Sanity check, make sure that the column sizes match for both home and away dataframes
    assert home.shape[1] == away.shape[1]

  0%|          | 0/1146 [00:00<?, ?teams/s]

In [13]:
# Join the tables on the common attributes and sort by the Date and the Number of game
data = pd.merge(pd.concat(homes),
                pd.concat(aways),
                on=["date", "location", "home", "away", "winner"]).sort_values(["date"]).reset_index(drop=True)

data.drop_duplicates(inplace=True)
data.dropna(inplace=True)
data.head(10)

Unnamed: 0,date,location,home,away,winner,home_assist_percentage_SMA,home_assist_percentage_CMA,home_assist_percentage_EMA,home_assists_SMA,home_assists_CMA,home_assists_EMA,home_block_percentage_SMA,home_block_percentage_CMA,home_block_percentage_EMA,home_blocks_SMA,home_blocks_CMA,home_blocks_EMA,home_defensive_rating_SMA,home_defensive_rating_CMA,home_defensive_rating_EMA,home_defensive_rebound_percentage_SMA,home_defensive_rebound_percentage_CMA,home_defensive_rebound_percentage_EMA,home_defensive_rebounds_SMA,home_defensive_rebounds_CMA,home_defensive_rebounds_EMA,home_effective_field_goal_percentage_SMA,home_effective_field_goal_percentage_CMA,home_effective_field_goal_percentage_EMA,home_field_goal_attempts_SMA,home_field_goal_attempts_CMA,home_field_goal_attempts_EMA,home_field_goal_percentage_SMA,home_field_goal_percentage_CMA,home_field_goal_percentage_EMA,home_field_goals_SMA,home_field_goals_CMA,home_field_goals_EMA,home_free_throw_attempt_rate_SMA,home_free_throw_attempt_rate_CMA,home_free_throw_attempt_rate_EMA,home_free_throw_attempts_SMA,home_free_throw_attempts_CMA,home_free_throw_attempts_EMA,home_free_throw_percentage_SMA,home_free_throw_percentage_CMA,home_free_throw_percentage_EMA,home_free_throws_SMA,home_free_throws_CMA,home_free_throws_EMA,home_losses_SMA,home_losses_CMA,home_losses_EMA,home_minutes_played_SMA,home_minutes_played_CMA,home_minutes_played_EMA,home_offensive_rating_SMA,home_offensive_rating_CMA,home_offensive_rating_EMA,home_offensive_rebound_percentage_SMA,home_offensive_rebound_percentage_CMA,home_offensive_rebound_percentage_EMA,home_offensive_rebounds_SMA,home_offensive_rebounds_CMA,home_offensive_rebounds_EMA,home_personal_fouls_SMA,home_personal_fouls_CMA,home_personal_fouls_EMA,home_points_SMA,home_points_CMA,home_points_EMA,home_steal_percentage_SMA,home_steal_percentage_CMA,home_steal_percentage_EMA,home_steals_SMA,home_steals_CMA,home_steals_EMA,home_three_point_attempt_rate_SMA,home_three_point_attempt_rate_CMA,home_three_point_attempt_rate_EMA,home_three_point_field_goal_attempts_SMA,home_three_point_field_goal_attempts_CMA,home_three_point_field_goal_attempts_EMA,home_three_point_field_goal_percentage_SMA,home_three_point_field_goal_percentage_CMA,home_three_point_field_goal_percentage_EMA,home_three_point_field_goals_SMA,home_three_point_field_goals_CMA,home_three_point_field_goals_EMA,home_total_rebound_percentage_SMA,home_total_rebound_percentage_CMA,home_total_rebound_percentage_EMA,home_total_rebounds_SMA,home_total_rebounds_CMA,home_total_rebounds_EMA,home_true_shooting_percentage_SMA,home_true_shooting_percentage_CMA,home_true_shooting_percentage_EMA,home_turnover_percentage_SMA,home_turnover_percentage_CMA,home_turnover_percentage_EMA,home_turnovers_SMA,home_turnovers_CMA,home_turnovers_EMA,home_two_point_field_goal_attempts_SMA,home_two_point_field_goal_attempts_CMA,home_two_point_field_goal_attempts_EMA,home_two_point_field_goal_percentage_SMA,home_two_point_field_goal_percentage_CMA,home_two_point_field_goal_percentage_EMA,home_two_point_field_goals_SMA,home_two_point_field_goals_CMA,home_two_point_field_goals_EMA,home_win_percentage_SMA,home_win_percentage_CMA,home_win_percentage_EMA,home_wins_SMA,home_wins_CMA,home_wins_EMA,away_assist_percentage_SMA,away_assist_percentage_CMA,away_assist_percentage_EMA,away_assists_SMA,away_assists_CMA,away_assists_EMA,away_block_percentage_SMA,away_block_percentage_CMA,away_block_percentage_EMA,away_blocks_SMA,away_blocks_CMA,away_blocks_EMA,away_defensive_rating_SMA,away_defensive_rating_CMA,away_defensive_rating_EMA,away_defensive_rebound_percentage_SMA,away_defensive_rebound_percentage_CMA,away_defensive_rebound_percentage_EMA,away_defensive_rebounds_SMA,away_defensive_rebounds_CMA,away_defensive_rebounds_EMA,away_effective_field_goal_percentage_SMA,away_effective_field_goal_percentage_CMA,away_effective_field_goal_percentage_EMA,away_field_goal_attempts_SMA,away_field_goal_attempts_CMA,away_field_goal_attempts_EMA,away_field_goal_percentage_SMA,away_field_goal_percentage_CMA,away_field_goal_percentage_EMA,away_field_goals_SMA,away_field_goals_CMA,away_field_goals_EMA,away_free_throw_attempt_rate_SMA,away_free_throw_attempt_rate_CMA,away_free_throw_attempt_rate_EMA,away_free_throw_attempts_SMA,away_free_throw_attempts_CMA,away_free_throw_attempts_EMA,away_free_throw_percentage_SMA,away_free_throw_percentage_CMA,away_free_throw_percentage_EMA,away_free_throws_SMA,away_free_throws_CMA,away_free_throws_EMA,away_losses_SMA,away_losses_CMA,away_losses_EMA,away_minutes_played_SMA,away_minutes_played_CMA,away_minutes_played_EMA,away_offensive_rating_SMA,away_offensive_rating_CMA,away_offensive_rating_EMA,away_offensive_rebound_percentage_SMA,away_offensive_rebound_percentage_CMA,away_offensive_rebound_percentage_EMA,away_offensive_rebounds_SMA,away_offensive_rebounds_CMA,away_offensive_rebounds_EMA,away_personal_fouls_SMA,away_personal_fouls_CMA,away_personal_fouls_EMA,away_points_SMA,away_points_CMA,away_points_EMA,away_steal_percentage_SMA,away_steal_percentage_CMA,away_steal_percentage_EMA,away_steals_SMA,away_steals_CMA,away_steals_EMA,away_three_point_attempt_rate_SMA,away_three_point_attempt_rate_CMA,away_three_point_attempt_rate_EMA,away_three_point_field_goal_attempts_SMA,away_three_point_field_goal_attempts_CMA,away_three_point_field_goal_attempts_EMA,away_three_point_field_goal_percentage_SMA,away_three_point_field_goal_percentage_CMA,away_three_point_field_goal_percentage_EMA,away_three_point_field_goals_SMA,away_three_point_field_goals_CMA,away_three_point_field_goals_EMA,away_total_rebound_percentage_SMA,away_total_rebound_percentage_CMA,away_total_rebound_percentage_EMA,away_total_rebounds_SMA,away_total_rebounds_CMA,away_total_rebounds_EMA,away_true_shooting_percentage_SMA,away_true_shooting_percentage_CMA,away_true_shooting_percentage_EMA,away_turnover_percentage_SMA,away_turnover_percentage_CMA,away_turnover_percentage_EMA,away_turnovers_SMA,away_turnovers_CMA,away_turnovers_EMA,away_two_point_field_goal_attempts_SMA,away_two_point_field_goal_attempts_CMA,away_two_point_field_goal_attempts_EMA,away_two_point_field_goal_percentage_SMA,away_two_point_field_goal_percentage_CMA,away_two_point_field_goal_percentage_EMA,away_two_point_field_goals_SMA,away_two_point_field_goals_CMA,away_two_point_field_goals_EMA,away_win_percentage_SMA,away_win_percentage_CMA,away_win_percentage_EMA,away_wins_SMA,away_wins_CMA,away_wins_EMA
0,2009-11-28,"Orleans Arena, Paradise, Nevada",Illinois,Bradley,Away,59.46,59.46,57.401235,18.6,18.6,17.382716,12.82,12.82,10.639506,4.8,4.8,3.765432,87.82,87.82,89.612346,71.14,71.14,72.701235,26.0,26.0,25.580247,0.5474,0.5474,0.534074,61.4,61.4,60.185185,0.4984,0.4984,0.48958,30.6,30.6,29.444444,0.33,0.33,0.314296,19.6,19.6,18.358025,0.7242,0.7242,0.738198,14.0,14.0,13.246914,0.2,0.2,0.333333,200.0,200.0,200.0,118.64,118.64,114.201235,39.34,39.34,34.538272,10.2,10.2,8.925926,17.0,17.0,17.493827,81.2,81.2,77.45679,7.9,7.9,7.54321,5.4,5.4,5.098765,0.2574,0.2574,0.243494,16.0,16.0,14.802469,0.3492,0.3492,0.322753,6.0,6.0,5.320988,56.44,56.44,54.392593,36.2,36.2,34.506173,0.5742,0.5742,0.561074,12.46,12.46,12.34321,10.0,10.0,9.604938,45.4,45.4,45.382716,0.5454,0.5454,0.536123,24.6,24.6,24.123457,0.96,0.96,0.933333,2.8,2.8,3.061728,64.46,64.46,63.264198,14.6,14.6,14.049383,10.86,10.86,9.032099,3.4,3.4,2.765432,100.1,100.1,100.198765,68.62,68.62,69.604938,22.6,22.6,22.049383,0.5018,0.5018,0.488346,51.0,51.0,50.641975,0.4404,0.4404,0.431827,22.4,22.4,21.864198,0.3876,0.3876,0.398444,19.0,19.0,19.580247,0.6886,0.6886,0.680062,12.8,12.8,12.938272,1.2,1.2,1.333333,200.0,200.0,200.0,99.94,99.94,97.24321,29.18,29.18,29.865432,7.8,7.8,8.111111,17.4,17.4,18.493827,63.6,63.6,62.234568,6.86,6.86,6.596296,4.4,4.4,4.246914,0.3582,0.3582,0.347815,18.0,18.0,17.45679,0.3214,0.3214,0.312185,6.0,6.0,5.567901,49.68,49.68,50.138272,30.4,30.4,30.160494,0.5328,0.5328,0.521605,17.38,17.38,18.385185,12.6,12.6,13.419753,33.0,33.0,33.185185,0.4962,0.4962,0.490543,16.4,16.4,16.296296,0.5034,0.5034,0.514864,1.8,1.8,2.061728
1,2009-11-28,"UIC Pavilion, Chicago, Illinois",Tennessee State,Liberty,Away,54.72,54.72,54.185185,12.6,12.6,12.925926,6.94,6.94,5.85679,2.0,2.0,1.765432,111.08,111.08,109.480247,56.52,56.52,56.780247,17.8,17.8,18.345679,0.47,0.47,0.480593,56.6,56.6,56.518519,0.4074,0.4074,0.421605,23.0,23.0,23.790123,0.27,0.27,0.271901,15.2,15.2,15.308642,0.639,0.639,0.672556,9.8,9.8,10.382716,2.8,2.8,3.061728,200.0,200.0,200.0,90.22,90.22,93.345679,41.92,41.92,45.044444,11.2,11.2,12.012346,20.8,20.8,20.271605,62.8,62.8,64.555556,14.38,14.38,14.22963,10.2,10.2,10.012346,0.3428,0.3428,0.325037,19.4,19.4,18.382716,0.3616,0.3616,0.360346,7.0,7.0,6.592593,47.42,47.42,48.97284,29.0,29.0,30.358025,0.4932,0.4932,0.506667,23.46,23.46,23.519753,18.6,18.6,18.790123,37.2,37.2,38.135802,0.4298,0.4298,0.450247,16.0,16.0,17.197531,0.04,0.04,0.066667,0.2,0.2,0.333333,49.02,49.02,52.893827,9.6,9.6,10.82716,5.54,5.54,5.544444,2.2,2.2,2.0,105.24,105.24,101.125926,63.34,63.34,65.146914,22.6,22.6,23.469136,0.41,0.41,0.424222,49.8,49.8,50.950617,0.3728,0.3728,0.385,18.8,18.8,19.864198,0.394,0.394,0.393753,19.6,19.6,20.012346,0.7106,0.7106,0.714049,14.0,14.0,14.382716,3.8,3.8,4.061728,200.0,200.0,200.0,80.86,80.86,86.391358,32.3,32.3,36.834568,8.4,8.4,9.481481,15.4,15.4,15.419753,55.4,55.4,58.197531,9.88,9.88,10.017284,6.8,6.8,6.814815,0.3394,0.3394,0.346025,17.0,17.0,17.740741,0.2144,0.2144,0.223457,3.8,3.8,4.08642,48.68,48.68,51.791358,31.0,31.0,32.950617,0.4632,0.4632,0.475827,24.44,24.44,23.044444,18.8,18.8,17.91358,32.8,32.8,33.209877,0.4554,0.4554,0.472395,15.0,15.0,15.777778,0.2472,0.2472,0.252543,1.2,1.2,1.333333
2,2009-11-28,"Orleans Arena, Paradise, Nevada",Utah,Oklahoma State,Away,45.18,45.18,41.751852,12.2,12.2,11.049383,11.72,11.72,10.777778,4.8,4.8,4.530864,95.42,95.42,95.18642,66.96,66.96,67.091358,24.6,24.6,24.518519,0.5012,0.5012,0.489506,59.0,59.0,58.049383,0.4414,0.4414,0.43758,26.2,26.2,25.54321,0.3576,0.3576,0.372951,21.0,21.0,21.567901,0.7484,0.7484,0.738049,15.8,15.8,16.074074,1.4,1.4,1.555556,200.0,200.0,200.0,105.12,105.12,102.091358,33.14,33.14,32.758025,9.6,9.6,9.580247,19.2,19.2,19.740741,75.4,75.4,73.382716,10.9,10.9,10.679012,7.8,7.8,7.666667,0.3962,0.3962,0.369173,23.6,23.6,21.753086,0.29,0.29,0.26421,7.2,7.2,6.222222,51.4,51.4,51.582716,34.2,34.2,34.098765,0.5448,0.5448,0.534926,16.02,16.02,16.951852,13.0,13.0,13.716049,35.4,35.4,36.296296,0.5342,0.5342,0.530037,19.0,19.0,19.320988,0.4534,0.4534,0.459309,1.6,1.6,1.839506,47.22,47.22,49.397531,13.0,13.0,12.777778,8.96,8.96,7.258025,3.2,3.2,2.654321,85.82,85.82,85.574074,74.82,74.82,74.174074,29.8,29.8,29.407407,0.523,0.523,0.519914,60.6,60.6,57.728395,0.4646,0.4646,0.456346,28.2,28.2,26.407407,0.4546,0.4546,0.473235,27.4,27.4,27.358025,0.687,0.687,0.703123,18.6,18.6,18.950617,0.0,0.0,0.0,200.0,200.0,200.0,113.54,113.54,110.640741,33.92,33.92,33.103704,10.8,10.8,9.765432,19.8,19.8,20.074074,81.8,81.8,78.765432,7.46,7.46,7.350617,5.4,5.4,5.246914,0.3794,0.3794,0.395926,22.8,22.8,22.419753,0.2956,0.2956,0.305444,6.8,6.8,7.0,55.42,55.42,54.780247,40.6,40.6,39.17284,0.5592,0.5592,0.560642,14.74,14.74,16.308642,12.2,12.2,13.17284,37.8,37.8,35.308642,0.5596,0.5596,0.541753,21.4,21.4,19.407407,1.0,1.0,1.0,3.0,3.0,3.395062
3,2009-11-28,"New UCF Arena, Orlando, Florida",UCF,Albany (NY),Home,60.18,60.18,58.738272,14.2,14.2,13.716049,13.52,13.52,13.765432,4.2,4.2,4.234568,90.64,90.64,89.148148,67.58,67.58,68.34321,25.8,25.8,25.864198,0.5008,0.5008,0.511753,52.2,52.2,50.703704,0.4546,0.4546,0.46316,23.4,23.4,23.135802,0.4632,0.4632,0.437494,23.6,23.6,21.703704,0.6986,0.6986,0.664148,16.6,16.6,14.493827,0.4,0.4,0.555556,200.0,200.0,200.0,98.5,98.5,96.279012,39.46,39.46,36.82963,10.4,10.4,9.135802,15.6,15.6,15.283951,68.2,68.2,65.641975,10.82,10.82,10.877778,7.4,7.4,7.296296,0.2892,0.2892,0.31616,15.0,15.0,15.864198,0.3482,0.3482,0.33642,4.8,4.8,4.876543,54.94,54.94,54.648148,36.2,36.2,35.0,0.5362,0.5362,0.536938,21.52,21.52,22.130864,16.8,16.8,16.864198,37.2,37.2,34.839506,0.5102,0.5102,0.536914,18.6,18.6,18.259259,0.91,0.91,0.877778,2.6,2.6,2.839506,57.78,59.266667,59.026337,13.0,12.5,12.395062,9.86,9.766667,8.942387,4.2,4.166667,3.576132,93.14,92.866667,95.06214,65.02,63.983333,64.702469,25.4,24.833333,25.395062,0.4966,0.466833,0.482218,50.6,51.333333,49.374486,0.4396,0.411833,0.428407,22.2,21.0,20.925926,0.3498,0.334,0.40428,17.2,16.666667,19.292181,0.6854,0.666333,0.699185,12.2,11.5,13.950617,2.4,2.166667,2.423868,200.0,200.0,200.0,94.56,87.533333,91.399177,39.6,38.0,38.717695,9.2,9.333333,9.090535,17.4,19.0,18.720165,62.4,59.166667,61.164609,9.82,10.616667,9.666255,6.4,7.333333,6.633745,0.3142,0.346667,0.349761,15.8,17.833333,17.341564,0.3606,0.330333,0.313835,5.8,5.666667,5.36214,53.8,52.033333,53.560494,34.6,34.166667,34.485597,0.53,0.499833,0.523547,22.8,24.716667,24.42716,16.6,19.166667,18.246914,34.8,33.5,32.032922,0.4718,0.454833,0.484885,16.4,15.333333,15.563786,0.3466,0.288833,0.362519,1.6,1.333333,1.839506
4,2009-11-28,"Sprint Center, Kansas City, Missouri",IUPUI,Kansas State,Away,60.72,60.72,60.281481,15.2,15.2,14.679012,9.42,9.42,8.298765,3.2,3.2,2.753086,104.9,104.9,103.624691,64.52,64.52,62.517284,19.8,19.8,19.037037,0.5594,0.5594,0.54258,50.0,50.0,50.111111,0.5048,0.5048,0.490296,25.2,25.2,24.518519,0.4382,0.4382,0.455691,22.0,22.0,22.925926,0.7842,0.7842,0.786827,17.4,17.4,18.160494,0.4,0.4,0.555556,205.0,205.0,208.333333,110.14,110.14,108.987654,34.82,34.82,36.376543,8.2,8.2,8.716049,15.6,15.6,14.962963,73.2,73.2,72.382716,12.9,12.9,13.109877,8.6,8.6,8.740741,0.3178,0.3178,0.310506,15.8,15.8,15.481481,0.3292,0.3292,0.325012,5.4,5.4,5.185185,50.54,50.54,50.080247,28.0,28.0,27.753086,0.6086,0.6086,0.597025,19.46,19.46,19.577778,14.4,14.4,14.493827,34.2,34.2,34.62963,0.5842,0.5842,0.564716,19.8,19.8,19.333333,0.91,0.91,0.877778,2.6,2.6,2.839506,54.24,54.24,52.992593,14.8,14.8,14.308642,14.86,14.86,13.4,5.8,5.8,5.08642,93.92,93.92,97.928395,62.22,62.22,61.709877,24.8,24.8,24.506173,0.5244,0.5244,0.521951,57.4,57.4,56.91358,0.4738,0.4738,0.471568,27.2,27.2,26.802469,0.6536,0.6536,0.680988,36.4,36.4,37.654321,0.5994,0.5994,0.603852,22.2,22.2,23.074074,0.4,0.4,0.555556,200.0,200.0,200.0,114.74,114.74,114.230864,45.52,45.52,45.241975,14.4,14.4,14.197531,23.0,23.0,23.950617,82.2,82.2,82.234568,8.54,8.54,8.285185,6.2,6.2,6.049383,0.2996,0.2996,0.292185,17.0,17.0,16.493827,0.3314,0.3314,0.341543,5.6,5.6,5.555556,53.78,53.78,53.275309,39.2,39.2,38.703704,0.5508,0.5508,0.550235,15.86,15.86,16.028395,13.8,13.8,14.0,40.4,40.4,40.419753,0.5306,0.5306,0.522864,21.6,21.6,21.246914,0.91,0.91,0.877778,2.6,2.6,2.839506
5,2009-11-28,"Daniel-Meyer Coliseum, Fort Worth, Texas",TCU,Louisiana Tech,Away,63.62,63.62,64.004938,17.0,17.0,18.666667,5.94,5.94,5.471605,2.2,2.2,2.037037,103.26,103.26,102.792593,60.44,60.44,63.616049,23.0,23.0,25.308642,0.5412,0.5412,0.552753,58.2,58.2,62.580247,0.4566,0.4566,0.469864,26.8,26.8,29.54321,0.3816,0.3816,0.395481,22.6,22.6,25.061728,0.6976,0.6976,0.671951,15.4,15.4,16.530864,1.4,1.4,1.506173,215.0,215.0,225.0,109.1,109.1,111.87037,36.42,36.42,36.350617,11.2,11.2,12.049383,22.4,22.4,21.975309,78.6,78.6,85.530864,7.46,7.46,6.960494,5.2,5.2,5.111111,0.4508,0.4508,0.427506,25.2,25.2,25.345679,0.3786,0.3786,0.389247,9.6,9.6,9.91358,49.38,49.38,50.98642,34.2,34.2,37.358025,0.5686,0.5686,0.576346,17.68,17.68,16.335802,14.4,14.4,13.864198,33.0,33.0,37.234568,0.5274,0.5274,0.538284,17.2,17.2,19.62963,0.6868,0.6868,0.693148,2.6,2.6,2.888889,49.34,49.34,50.7,12.4,12.4,13.246914,8.72,8.72,7.969136,2.6,2.6,2.395062,94.18,94.18,92.806173,72.08,72.08,72.930864,23.0,23.0,22.283951,0.532,0.532,0.55437,54.4,54.4,53.716049,0.4726,0.4726,0.494074,25.2,25.2,26.123457,0.4336,0.4336,0.457753,23.0,23.0,24.123457,0.7578,0.7578,0.759642,17.0,17.0,17.91358,0.6,0.6,0.703704,200.0,200.0,200.0,110.12,110.12,114.12963,39.66,39.66,40.934568,10.0,10.0,10.45679,17.8,17.8,17.222222,73.8,73.8,76.592593,11.0,11.0,11.745679,7.4,7.4,7.91358,0.3264,0.3264,0.314679,18.0,18.0,17.098765,0.3764,0.3764,0.394531,6.4,6.4,6.432099,56.42,56.42,57.434568,33.0,33.0,32.740741,0.569,0.569,0.590617,16.98,16.98,17.664198,12.8,12.8,13.382716,36.4,36.4,36.617284,0.5182,0.5182,0.539395,18.8,18.8,19.691358,0.8434,0.8434,0.828444,2.4,2.4,2.691358
6,2009-11-28,"Dunkin' Donuts Center, Providence, Rhode Island",Providence,Boston College,Away,44.18,44.18,45.950617,14.0,14.0,15.222222,13.52,13.52,12.611111,5.6,5.6,5.320988,93.26,93.26,90.390123,63.76,63.76,63.209877,26.2,26.2,26.111111,0.4846,0.4846,0.499568,72.6,72.6,73.654321,0.42,0.42,0.431481,30.6,30.6,31.91358,0.3316,0.3316,0.331259,24.0,24.0,24.296296,0.6604,0.6604,0.66842,15.8,15.8,16.234568,0.4,0.4,0.555556,200.0,200.0,200.0,116.12,116.12,117.651852,51.64,51.64,51.825926,19.4,19.4,18.975309,19.8,19.8,20.728395,86.4,86.4,90.17284,12.52,12.52,14.17037,9.6,9.6,11.197531,0.398,0.398,0.379111,28.8,28.8,27.888889,0.3278,0.3278,0.363407,9.4,9.4,10.111111,57.36,57.36,57.196296,45.6,45.6,45.08642,0.5142,0.5142,0.528358,12.1,12.1,13.095062,11.6,11.6,12.950617,43.8,43.8,45.765432,0.487,0.487,0.480951,21.2,21.2,21.802469,0.91,0.91,0.877778,2.6,2.6,2.839506,56.12,56.12,53.314815,15.6,15.6,14.641975,8.54,8.54,7.074074,3.6,3.6,3.012346,98.92,98.92,106.771605,70.14,70.14,70.474074,23.6,23.6,21.691358,0.5412,0.5412,0.550568,57.6,57.6,55.382716,0.4864,0.4864,0.495926,27.4,27.4,26.987654,0.3898,0.3898,0.40516,22.0,22.0,22.061728,0.7048,0.7048,0.711864,16.0,16.0,16.222222,0.8,0.8,1.037037,200.0,200.0,200.0,115.62,115.62,117.633333,47.12,47.12,48.576543,11.8,11.8,11.777778,16.8,16.8,17.197531,77.0,77.0,76.209877,5.22,5.22,4.750617,3.6,3.6,3.209877,0.2786,0.2786,0.276148,16.2,16.2,15.45679,0.3862,0.3862,0.384568,6.2,6.2,6.012346,58.9,58.9,59.367901,35.4,35.4,33.469136,0.5734,0.5734,0.584654,15.96,15.96,16.402469,12.4,12.4,12.407407,41.4,41.4,39.925926,0.52,0.52,0.531914,21.2,21.2,20.975309,0.8034,0.8034,0.761778,2.2,2.2,2.358025
7,2009-11-28,"UIC Pavilion, Chicago, Illinois",Iowa State,Northwestern,Away,59.64,60.65,57.632099,18.0,18.833333,17.37037,11.92,11.5,9.925514,5.0,5.0,4.26749,80.52,83.066667,81.50535,76.38,73.333333,72.783539,27.6,26.666667,26.176955,0.5788,0.582667,0.564086,59.0,59.833333,59.119342,0.5032,0.5105,0.499214,29.8,30.666667,29.539095,0.3574,0.336833,0.340782,20.4,19.5,19.641975,0.6248,0.642833,0.666638,12.8,12.5,12.983539,0.0,0.0,0.0,200.0,200.0,200.0,115.2,116.65,113.055144,37.72,40.95,38.20823,10.8,11.0,10.296296,16.2,15.833333,15.687243,81.4,82.5,79.773663,9.56,9.616667,8.278601,6.8,6.833333,5.91358,0.289,0.279833,0.265947,17.2,16.833333,15.81893,0.5126,0.505,0.478457,9.0,8.666667,7.711934,57.64,57.633333,56.039095,38.4,37.666667,36.473251,0.5924,0.596833,0.582523,16.84,17.266667,16.885597,13.8,14.333333,13.823045,41.8,43.0,43.300412,0.4968,0.509167,0.505342,20.8,22.0,21.82716,1.0,1.0,1.0,4.0,3.5,4.263374,77.06,77.06,75.711111,17.0,17.0,17.271605,12.76,12.76,14.239506,3.8,3.8,4.320988,93.92,93.92,91.638272,59.78,59.78,59.535802,20.2,20.2,21.160494,0.533,0.533,0.531827,49.6,49.6,51.283951,0.4434,0.4434,0.44342,22.0,22.0,22.740741,0.4482,0.4482,0.434938,21.6,21.6,21.481481,0.703,0.703,0.717741,15.4,15.4,15.604938,0.8,0.8,0.802469,200.0,200.0,200.0,107.78,107.78,110.622222,31.3,31.3,30.783951,7.0,7.0,6.851852,18.2,18.2,18.419753,68.2,68.2,70.098765,11.26,11.26,11.935802,7.2,7.2,7.62963,0.5052,0.5052,0.490123,24.8,24.8,24.901235,0.3482,0.3482,0.356148,8.8,8.8,9.012346,46.6,46.6,46.661728,27.2,27.2,28.012346,0.5708,0.5708,0.571457,16.32,16.32,14.166667,11.2,11.2,9.654321,24.8,24.8,26.382716,0.54,0.54,0.528519,13.2,13.2,13.728395,0.7434,0.7434,0.779062,2.2,2.2,2.592593
8,2009-11-28,"Hytche Athletic Center, Princess Anne, Maryland",Maryland-Eastern Shore,American,Home,54.46,54.46,52.204938,10.4,10.4,9.925926,4.4,4.4,3.781481,1.6,1.6,1.320988,119.42,119.42,121.093827,61.14,61.14,63.783951,19.6,19.6,19.82716,0.414,0.414,0.406852,52.2,52.2,52.728395,0.3654,0.3654,0.360926,19.0,19.0,18.962963,0.4338,0.4338,0.45142,22.4,22.4,23.530864,0.6358,0.6358,0.634691,13.6,13.6,14.358025,3.0,3.0,3.395062,200.0,200.0,200.0,85.54,85.54,86.409877,30.68,30.68,31.037037,9.0,9.0,9.432099,18.8,18.8,18.592593,56.6,56.6,57.037037,6.3,6.3,5.969136,4.2,4.2,3.975309,0.3244,0.3244,0.328235,17.0,17.0,17.345679,0.3114,0.3114,0.288222,5.0,5.0,4.753086,44.76,44.76,45.819753,28.6,28.6,29.259259,0.4518,0.4518,0.44716,18.64,18.64,17.402469,14.0,14.0,13.123457,35.2,35.2,35.382716,0.3992,0.3992,0.403074,14.0,14.0,14.209877,0.0,0.0,0.0,0.0,0.0,0.0,64.06,64.06,64.554321,14.2,14.2,14.08642,9.14,9.14,9.62716,3.8,3.8,4.234568,103.34,103.34,100.969136,68.84,68.84,70.982716,24.8,24.8,26.123457,0.4594,0.4594,0.466395,53.8,53.8,52.246914,0.4142,0.4142,0.419284,22.2,22.2,21.814815,0.3294,0.3294,0.346432,17.6,17.6,17.91358,0.655,0.655,0.650889,10.6,10.6,10.851852,4.0,4.0,4.395062,200.0,200.0,200.0,89.4,89.4,87.774074,33.66,33.66,33.150617,9.0,9.0,8.530864,17.6,17.6,16.938272,59.8,59.8,59.333333,5.74,5.74,4.992593,3.8,3.8,3.296296,0.2618,0.2618,0.267222,14.0,14.0,13.888889,0.3418,0.3418,0.348457,4.8,4.8,4.851852,51.8,51.8,53.04321,33.8,33.8,34.654321,0.4816,0.4816,0.488914,20.36,20.36,22.316049,15.8,15.8,17.395062,39.8,39.8,38.358025,0.438,0.438,0.442778,17.4,17.4,16.962963,0.0,0.0,0.0,0.0,0.0,0.0
9,2009-11-28,"The Palestra, Philadelphia, Pennsylvania",Brown,Siena,Away,62.98,62.98,62.004938,14.8,14.8,15.037037,8.92,8.92,9.353086,4.2,4.2,4.37037,106.24,106.24,105.667901,55.4,55.4,53.988889,21.4,21.4,21.580247,0.5278,0.5278,0.542457,51.6,51.6,52.012346,0.4588,0.4588,0.46884,23.6,23.6,24.308642,0.326,0.326,0.336951,16.4,16.4,17.111111,0.6874,0.6874,0.716889,12.0,12.0,13.012346,1.8,1.8,2.061728,200.0,200.0,200.0,100.66,100.66,106.697531,31.38,31.38,32.645679,7.6,7.6,7.728395,15.6,15.6,15.197531,66.2,66.2,69.197531,6.26,6.26,5.653086,4.2,4.2,3.790123,0.3854,0.3854,0.404148,20.2,20.2,21.271605,0.3514,0.3514,0.360296,7.0,7.0,7.567901,44.6,44.6,44.724691,29.0,29.0,29.308642,0.559,0.559,0.576741,20.9,20.9,19.002469,15.6,15.6,14.160494,31.4,31.4,30.740741,0.5302,0.5302,0.546012,16.6,16.6,16.740741,0.4966,0.4966,0.485136,1.2,1.2,1.333333,58.98,58.98,59.146914,16.0,16.0,16.419753,9.12,9.12,8.658025,3.6,3.6,3.382716,89.5,89.5,91.364198,69.74,69.74,71.012346,24.8,24.8,24.604938,0.5096,0.5096,0.511,57.6,57.6,59.296296,0.4794,0.4794,0.478296,27.2,27.2,27.82716,0.46,0.46,0.439531,25.8,25.8,25.296296,0.6358,0.6358,0.635605,16.4,16.4,16.074074,0.8,0.8,1.037037,200.0,200.0,200.0,100.62,100.62,101.174074,41.2,41.2,41.734568,12.0,12.0,12.259259,16.0,16.0,16.851852,74.2,74.2,75.506173,13.06,13.06,12.179012,9.6,9.6,9.08642,0.2328,0.2328,0.242407,13.6,13.6,14.617284,0.2612,0.2612,0.273926,3.4,3.4,3.777778,55.5,55.5,55.933333,36.8,36.8,36.864198,0.5348,0.5348,0.534086,19.52,19.52,19.382716,16.4,16.4,16.62963,44.0,44.0,44.679012,0.544,0.544,0.542716,23.8,23.8,24.049383,0.8034,0.8034,0.761778,2.2,2.2,2.358025


In [14]:
# Save the dataframe that we just generated

clean_dir('../assets/data/cleaned_data')
data.to_csv('../assets/data/cleaned_data/cleaned_data.csv', index=None)