# Generating Team Data
In this notebook, file we will be generating statistics on a team basis. This will be better for training a model as it will contain avergaes for teams **going into** a match up.

# Imports

In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os
import shutil
import zipfile

from tqdm.notebook import tqdm

pd.set_option("display.max_columns", None)

# Utils

In [2]:
def clean_dir(path):
    """Makes a clean directory, removes all files and folders in the specified path"""
    
    if not os.path.exists(path):
        os.mkdir(path)
    
    for filename in os.listdir(path):
        file_path = os.path.join(path, filename)
        try:
            if os.path.isfile(file_path) or os.path.islink(file_path):
                os.unlink(file_path)
            elif os.path.isdir(file_path):
                shutil.rmtree(file_path)
        except Exception as e:
            print(f"Failed to delete {file_path}. Reason: {e}")

# Load Data

We first need to load in the data. There is a zip file called `seasons.zip` which we need to unzip and get all the data for. The let's create one giant csv that has all of the games from all seasons.

In [3]:
PATH_TO_ZIP = '../assets/data/seasons.zip'
EXTRACTED_DIR = '../assets/data/'
GAMES_DIR = '../assets/data/seasons/'

def getGames(zip_path=PATH_TO_ZIP,
             extract_dir=EXTRACTED_DIR,
             directory_to_games=GAMES_DIR):
    
    with zipfile.ZipFile(PATH_TO_ZIP, 'r') as zip_ref:
        clean_dir(GAMES_DIR)
        zip_ref.extractall(EXTRACTED_DIR)
    
    dfs = []
    for file in os.listdir(directory_to_games):
        dfs.append(pd.read_csv(os.path.join(directory_to_games, file)))
    
    shutil.rmtree(directory_to_games)
    
    return pd.concat(dfs, ignore_index=True).sort_values(by=['date'], ignore_index=True)

# Generating Moving Averages

Next, we want to create a file for each team that contains all of their stats for each game.

In [4]:
TEAMS = set(getGames()['winning_abbr']).union(set(getGames()['losing_abbr']))
TEAMS = [team.strip() for team in sorted(TEAMS)]

In [5]:
original_data = getGames()
original_data["away"] = np.where(original_data["winner"]=="Away",
                                 original_data["winning_abbr"],
                                 original_data["losing_abbr"])

original_data["home"] = np.where(original_data["winner"]=="Home",
                                 original_data["winning_abbr"],
                                 original_data["losing_abbr"])

# This will help later
reordered_labels = ['date',
                    'location',
                    'losing_abbr',
                    'losing_name',
                    'pace',
                    'winning_abbr',
                    'winning_name',
                    'away',
                    'home',
                    'winner',
                    'away_assist_percentage',
                    'away_assists', 
                    'away_block_percentage', 
                    'away_blocks', 
                    'away_defensive_rating', 
                    'away_defensive_rebound_percentage', 
                    'away_defensive_rebounds',
                    'away_effective_field_goal_percentage',
                    'away_field_goal_attempts',
                    'away_field_goal_percentage',
                    'away_field_goals',
                    'away_free_throw_attempt_rate',
                    'away_free_throw_attempts',
                    'away_free_throw_percentage',
                    'away_free_throws',
                    'away_losses',
                    'away_minutes_played',
                    'away_offensive_rating',
                    'away_offensive_rebound_percentage',
                    'away_offensive_rebounds',
                    'away_personal_fouls',
                    'away_points',
                    'away_steal_percentage',
                    'away_steals',
                    'away_three_point_attempt_rate',
                    'away_three_point_field_goal_attempts',
                    'away_three_point_field_goal_percentage',
                    'away_three_point_field_goals',
                    'away_total_rebound_percentage',
                    'away_total_rebounds',
                    'away_true_shooting_percentage',
                    'away_turnover_percentage',
                    'away_turnovers',
                    'away_two_point_field_goal_attempts',
                    'away_two_point_field_goal_percentage',
                    'away_two_point_field_goals',
                    'away_win_percentage',
                    'away_wins',
                    'home_assist_percentage',
                    'home_assists',
                    'home_block_percentage',
                    'home_blocks',
                    'home_defensive_rating',
                    'home_defensive_rebound_percentage',
                    'home_defensive_rebounds',
                    'home_effective_field_goal_percentage',
                    'home_field_goal_attempts',
                    'home_field_goal_percentage',
                    'home_field_goals',
                    'home_free_throw_attempt_rate',
                    'home_free_throw_attempts',
                    'home_free_throw_percentage',
                    'home_free_throws',
                    'home_losses',
                    'home_minutes_played',
                    'home_offensive_rating',
                    'home_offensive_rebound_percentage',
                    'home_offensive_rebounds',
                    'home_personal_fouls',
                    'home_points',
                    'home_steal_percentage',
                    'home_steals',
                    'home_three_point_attempt_rate',
                    'home_three_point_field_goal_attempts',
                    'home_three_point_field_goal_percentage',
                    'home_three_point_field_goals',
                    'home_total_rebound_percentage',
                    'home_total_rebounds',
                    'home_true_shooting_percentage',
                    'home_turnover_percentage',
                    'home_turnovers',
                    'home_two_point_field_goal_attempts',
                    'home_two_point_field_goal_percentage',
                    'home_two_point_field_goals',
                    'home_win_percentage',
                    'home_wins']

original_data.drop(columns=['away_ranking', 'home_ranking'], inplace=True)
original_data['date'] = pd.to_datetime(original_data['date'])
original_data['date'] = original_data['date'].dt.strftime('%Y-%m-%d')
original_data = original_data[reordered_labels]
original_data.dropna(inplace=True)
original_data.drop_duplicates(inplace=True)

print(f"There are {original_data.shape[0]} examples in the set")

There are 58764 examples in the set


In [6]:
new_labels = ['date',
              'location',
              'away',
              'home',
              'winner',
              'assist_percentage',
              'assists', 
              'block_percentage', 
              'blocks', 
              'defensive_rating', 
              'defensive_rebound_percentage', 
              'defensive_rebounds',
              'effective_field_goal_percentage',
              'field_goal_attempts',
              'field_goal_percentage',
              'field_goals',
              'free_throw_attempt_rate',
              'free_throw_attempts',
              'free_throw_percentage',
              'free_throws',
              'losses',
              'minutes_played',
              'offensive_rating',
              'offensive_rebound_percentage',
              'offensive_rebounds',
              'personal_fouls',
              'points',
              'steal_percentage',
              'steals',
              'three_point_attempt_rate',
              'three_point_field_goal_attempts',
              'three_point_field_goal_percentage',
              'three_point_field_goals',
              'total_rebound_percentage',
              'total_rebounds',
              'true_shooting_percentage',
              'turnover_percentage',
              'turnovers',
              'two_point_field_goal_attempts',
              'two_point_field_goal_percentage',
              'two_point_field_goals',
              'win_percentage',
              'wins']

def generateTeamSats(dataframe, teams=TEAMS, folder='../assets/data/team_data/'):
    """For each team, generate the games that they have played"""
    
    # Clean the folder
    clean_dir(folder)
    
    # Loop through all of the teams
    for team in tqdm(teams, unit="teams"):
        file_name = f'{team}_data.csv'
        path = os.path.join(folder, file_name)
        
        # Get home games and away games
        away_games = dataframe.loc[(dataframe["away"] == team)].copy()
        home_games = dataframe.loc[(dataframe["home"] == team)].copy()
        
        home_games.drop(home_games.filter(regex="away_").columns, axis=1, inplace=True)
        home_games.drop(columns=['losing_abbr',
                                 'losing_name',
                                 'pace',
                                 'winning_abbr',
                                 'winning_name'],
                        inplace=True)
        home_games.columns = new_labels
        
        away_games.drop(away_games.filter(regex="home_").columns, axis=1, inplace=True)
        away_games.drop(columns=['losing_abbr',
                                 'losing_name',
                                 'pace',
                                 'winning_abbr',
                                 'winning_name'],
                        inplace=True)
        away_games.columns = new_labels
        
        # Shaoe for both must match same columns
        assert away_games.shape[1] == home_games.shape[1]
        
        # Join the home games and away games, sort by date
        team_stats = pd.concat([home_games, away_games])
        team_stats.drop_duplicates(inplace=True)
        team_stats.dropna(inplace=True)
        team_stats.sort_values(by=["date"], inplace=True)
        
        # Save the stats
        team_stats.to_csv(path, index=None)

In [7]:
generateTeamSats(original_data)

  0%|          | 0/1169 [00:00<?, ?teams/s]

In [8]:
pd.read_csv('../assets/data/team_data/CONNECTICUT_data.csv').head()

Unnamed: 0,date,location,away,home,winner,assist_percentage,assists,block_percentage,blocks,defensive_rating,defensive_rebound_percentage,defensive_rebounds,effective_field_goal_percentage,field_goal_attempts,field_goal_percentage,field_goals,free_throw_attempt_rate,free_throw_attempts,free_throw_percentage,free_throws,losses,minutes_played,offensive_rating,offensive_rebound_percentage,offensive_rebounds,personal_fouls,points,steal_percentage,steals,three_point_attempt_rate,three_point_field_goal_attempts,three_point_field_goal_percentage,three_point_field_goals,total_rebound_percentage,total_rebounds,true_shooting_percentage,turnover_percentage,turnovers,two_point_field_goal_attempts,two_point_field_goal_percentage,two_point_field_goals,win_percentage,wins
0,2009-11-13,"Harry A. Gampel Pavilion, Storrs, Connecticut",WILLIAM-MARY,CONNECTICUT,Home,55.2,16,26.9,7,101.5,59.4,20,0.583,54,0.537,29,0.389,21,0.571,12,0,200.0,115.4,40.0,9,10,75,18.5,12,0.296,16.0,0.313,5.0,50.9,29,0.586,13.7,10,38.0,0.632,24.0,1.0,1
1,2009-11-16,"Harry A. Gampel Pavilion, Storrs, Connecticut",COLGATE,CONNECTICUT,Home,62.5,20,21.6,8,100.0,42.9,13,0.621,58,0.552,32,0.172,10,0.5,5,0,200.0,122.2,54.2,9,10,77,11.1,7,0.328,19.0,0.421,8.0,48.9,22,0.614,17.2,13,39.0,0.615,24.0,1.0,2
2,2009-11-17,"Harry A. Gampel Pavilion, Storrs, Connecticut",HOFSTRA,CONNECTICUT,Home,54.5,12,28.3,13,94.4,57.8,28,0.434,53,0.415,22,0.679,36,0.833,30,0,200.0,107.0,31.0,7,13,76,8.5,6,0.151,8.0,0.25,2.0,47.3,35,0.542,12.7,10,45.0,0.444,20.0,1.0,3
3,2009-11-25,"Madison Square Garden (IV), New York, New York",CONNECTICUT,LOUISIANA-STATE,Away,63.3,19,25.0,13,79.7,69.2,29,0.5,65,0.462,30,0.354,23,0.696,16,0,200.0,117.4,55.9,17,17,81,13.0,9,0.154,10.0,0.5,5.0,63.0,46,0.533,13.8,12,55.0,0.455,25.0,1.0,4
4,2009-11-27,"Madison Square Garden (IV), New York, New York",DUKE,CONNECTICUT,Away,59.1,13,16.1,9,91.9,46.8,26,0.373,59,0.373,22,0.475,28,0.536,15,1,200.0,79.7,37.8,10,21,59,5.4,4,0.068,4.0,0.0,0.0,42.9,36,0.408,18.3,16,55.0,0.4,22.0,0.8,4


In [9]:
def compute_ma(span, teams=TEAMS, team_data_folder='../assets/data/team_data/', dest_folder='../assets/data/team_ma/'):
    """Computes various moving averages for the stats"""
    
    # Clean directory we will be saving the CSVs to
    clean_dir(dest_folder)
    
    for team in tqdm(teams, unit='teams'):
        averages = pd.DataFrame()

        # Load the stats for a given team
        team_stats = pd.read_csv(os.path.join(team_data_folder, f"{team}_data.csv"))

        # Compute the moving averages for the appropriate columns
        for col in team_stats.columns:
            if col in {'date', 'location', 'away', 'home', 'winner'}:
                continue
                
            # Simple moving average
            team_stats[f"{col}_SMA"] = team_stats.loc[:, col].rolling(window=span).mean()
            team_stats[f"{col}_SMA"] = team_stats[f"{col}_SMA"].shift(1)

            # Cumulative moving average
            team_stats[f"{col}_CMA"] = team_stats.loc[:, col].expanding(min_periods=span).mean()
            team_stats[f"{col}_CMA"] = team_stats[f"{col}_CMA"].shift(1)

            # Exponential moving average
            team_stats[f"{col}_EMA"] = team_stats.loc[:, col].ewm(span=span, adjust=False).mean()
            team_stats[f"{col}_EMA"] = team_stats[f"{col}_EMA"].shift(1)

        # Drop any rows with NULL values and save the CSV
        team_stats.dropna(inplace=True)
        team_stats.drop_duplicates(inplace=True)
        team_stats.to_csv(os.path.join(dest_folder, f"{team}_ma.csv"), index=None)

In [10]:
compute_ma(5)

  0%|          | 0/1169 [00:00<?, ?teams/s]

In [11]:
pd.read_csv('../assets/data/team_ma/CONNECTICUT_ma.csv').head()

Unnamed: 0,date,location,away,home,winner,assist_percentage,assists,block_percentage,blocks,defensive_rating,defensive_rebound_percentage,defensive_rebounds,effective_field_goal_percentage,field_goal_attempts,field_goal_percentage,field_goals,free_throw_attempt_rate,free_throw_attempts,free_throw_percentage,free_throws,losses,minutes_played,offensive_rating,offensive_rebound_percentage,offensive_rebounds,personal_fouls,points,steal_percentage,steals,three_point_attempt_rate,three_point_field_goal_attempts,three_point_field_goal_percentage,three_point_field_goals,total_rebound_percentage,total_rebounds,true_shooting_percentage,turnover_percentage,turnovers,two_point_field_goal_attempts,two_point_field_goal_percentage,two_point_field_goals,win_percentage,wins,assist_percentage_SMA,assist_percentage_CMA,assist_percentage_EMA,assists_SMA,assists_CMA,assists_EMA,block_percentage_SMA,block_percentage_CMA,block_percentage_EMA,blocks_SMA,blocks_CMA,blocks_EMA,defensive_rating_SMA,defensive_rating_CMA,defensive_rating_EMA,defensive_rebound_percentage_SMA,defensive_rebound_percentage_CMA,defensive_rebound_percentage_EMA,defensive_rebounds_SMA,defensive_rebounds_CMA,defensive_rebounds_EMA,effective_field_goal_percentage_SMA,effective_field_goal_percentage_CMA,effective_field_goal_percentage_EMA,field_goal_attempts_SMA,field_goal_attempts_CMA,field_goal_attempts_EMA,field_goal_percentage_SMA,field_goal_percentage_CMA,field_goal_percentage_EMA,field_goals_SMA,field_goals_CMA,field_goals_EMA,free_throw_attempt_rate_SMA,free_throw_attempt_rate_CMA,free_throw_attempt_rate_EMA,free_throw_attempts_SMA,free_throw_attempts_CMA,free_throw_attempts_EMA,free_throw_percentage_SMA,free_throw_percentage_CMA,free_throw_percentage_EMA,free_throws_SMA,free_throws_CMA,free_throws_EMA,losses_SMA,losses_CMA,losses_EMA,minutes_played_SMA,minutes_played_CMA,minutes_played_EMA,offensive_rating_SMA,offensive_rating_CMA,offensive_rating_EMA,offensive_rebound_percentage_SMA,offensive_rebound_percentage_CMA,offensive_rebound_percentage_EMA,offensive_rebounds_SMA,offensive_rebounds_CMA,offensive_rebounds_EMA,personal_fouls_SMA,personal_fouls_CMA,personal_fouls_EMA,points_SMA,points_CMA,points_EMA,steal_percentage_SMA,steal_percentage_CMA,steal_percentage_EMA,steals_SMA,steals_CMA,steals_EMA,three_point_attempt_rate_SMA,three_point_attempt_rate_CMA,three_point_attempt_rate_EMA,three_point_field_goal_attempts_SMA,three_point_field_goal_attempts_CMA,three_point_field_goal_attempts_EMA,three_point_field_goal_percentage_SMA,three_point_field_goal_percentage_CMA,three_point_field_goal_percentage_EMA,three_point_field_goals_SMA,three_point_field_goals_CMA,three_point_field_goals_EMA,total_rebound_percentage_SMA,total_rebound_percentage_CMA,total_rebound_percentage_EMA,total_rebounds_SMA,total_rebounds_CMA,total_rebounds_EMA,true_shooting_percentage_SMA,true_shooting_percentage_CMA,true_shooting_percentage_EMA,turnover_percentage_SMA,turnover_percentage_CMA,turnover_percentage_EMA,turnovers_SMA,turnovers_CMA,turnovers_EMA,two_point_field_goal_attempts_SMA,two_point_field_goal_attempts_CMA,two_point_field_goal_attempts_EMA,two_point_field_goal_percentage_SMA,two_point_field_goal_percentage_CMA,two_point_field_goal_percentage_EMA,two_point_field_goals_SMA,two_point_field_goals_CMA,two_point_field_goals_EMA,win_percentage_SMA,win_percentage_CMA,win_percentage_EMA,wins_SMA,wins_CMA,wins_EMA
0,2009-12-02,"Harry A. Gampel Pavilion, Storrs, Connecticut",BOSTON-UNIVERSITY,CONNECTICUT,Home,62.1,18,14.6,6,84.2,72.7,33,0.517,60,0.483,29,0.583,35,0.857,30,1,200.0,121.1,58.1,17,17,92,9.2,7,0.183,11.0,0.364,4.0,66.7,50,0.6,18.4,17,49.0,0.51,25.0,0.833,5,58.92,58.92,58.917284,16.0,16.0,15.469136,23.58,23.58,22.561728,10.0,10.0,9.987654,93.5,93.5,92.255556,55.22,55.22,55.511111,23.2,23.2,24.493827,0.5022,0.5022,0.476235,57.8,57.8,58.358025,0.4678,0.4678,0.449074,27.0,27.0,26.148148,0.4138,0.4138,0.43142,23.6,23.6,24.91358,0.6272,0.6272,0.618914,15.6,15.6,15.864198,0.2,0.2,0.333333,200.0,200.0,200.0,108.34,108.34,103.371605,43.78,43.78,42.869136,10.4,10.4,10.814815,14.2,14.2,15.666667,73.6,73.6,71.345679,11.3,11.3,10.698765,7.6,7.6,7.283951,0.1994,0.1994,0.170123,11.4,11.4,9.777778,0.2968,0.2968,0.251556,4.0,4.0,3.185185,50.6,50.6,50.191358,33.6,33.6,35.308642,0.5366,0.5366,0.511136,15.14,15.14,15.453086,12.2,12.2,12.740741,46.4,46.4,48.580247,0.5092,0.5092,0.485802,23.0,23.0,22.962963,0.96,0.96,0.933333,2.8,2.8,3.061728
1,2009-12-06,"Harry A. Gampel Pavilion, Storrs, Connecticut",HARVARD,CONNECTICUT,Home,64.0,16,28.3,13,96.1,69.2,28,0.441,59,0.424,25,0.695,41,0.659,27,1,200.0,103.9,43.2,15,18,79,9.2,7,0.102,6.0,0.333,2.0,56.6,43,0.503,13.5,12,53.0,0.434,23.0,0.857,6,60.3,59.45,59.978189,16.4,16.333333,16.312757,21.12,22.083333,19.907819,9.8,9.333333,8.658436,90.04,91.95,89.57037,57.88,58.133333,61.240741,25.8,24.833333,27.329218,0.489,0.504667,0.489823,59.0,58.166667,58.90535,0.457,0.470333,0.460383,27.0,27.333333,27.098765,0.4526,0.442,0.481947,26.4,25.5,28.27572,0.6844,0.6655,0.698276,19.2,18.0,20.576132,0.4,0.333333,0.555556,200.0,200.0,200.0,109.48,110.466667,109.28107,47.4,46.166667,47.946091,12.0,11.5,12.876543,15.6,14.666667,16.111111,77.0,76.666667,78.230453,9.44,10.95,10.199177,6.6,7.5,7.1893,0.1768,0.196667,0.174416,10.4,11.333333,10.185185,0.307,0.308,0.289037,3.8,4.0,3.45679,53.76,53.283333,55.694239,37.8,36.333333,40.205761,0.5394,0.547167,0.540757,16.08,15.683333,16.435391,13.6,13.0,14.160494,48.6,46.833333,48.720165,0.4848,0.509333,0.493868,23.2,23.333333,23.641975,0.9266,0.938833,0.899889,3.6,3.166667,3.707819
2,2009-12-09,"Madison Square Garden (IV), New York, New York",KENTUCKY,CONNECTICUT,Away,47.8,11,15.4,8,92.8,60.5,25,0.461,51,0.451,23,0.471,24,0.583,14,2,200.0,88.4,48.4,13,18,61,4.3,3,0.118,6.0,0.167,1.0,55.1,38,0.489,24.5,19,45.0,0.489,22.0,0.75,6,60.6,60.1,61.318793,15.6,16.285714,16.208505,22.46,22.971429,22.705213,10.8,9.857143,10.105624,89.26,92.542857,91.746914,63.14,59.714286,63.893827,28.8,25.285714,27.552812,0.453,0.495571,0.473549,59.2,58.285714,58.9369,0.4314,0.463714,0.448255,25.6,27.0,26.399177,0.5572,0.478143,0.552964,32.6,27.714286,32.517147,0.7162,0.664571,0.685184,23.6,19.285714,22.717421,0.6,0.428571,0.703704,200.0,200.0,200.0,105.82,109.528571,107.48738,45.2,45.742857,46.36406,13.2,12.0,13.584362,17.2,15.142857,16.740741,77.4,77.0,78.486968,9.06,10.7,9.866118,6.6,7.428571,7.1262,0.1316,0.183143,0.150277,7.8,10.571429,8.790123,0.2894,0.311571,0.303691,2.6,3.714286,2.971193,55.3,53.757143,55.996159,42.0,37.285714,41.137174,0.5172,0.540857,0.528171,15.34,15.371429,15.456927,13.4,12.857143,13.440329,51.4,47.714286,50.146776,0.4486,0.498571,0.473912,23.0,23.285714,23.427984,0.898,0.927143,0.885593,4.4,3.571429,4.471879
3,2009-12-20,"Harry A. Gampel Pavilion, Storrs, Connecticut",CENTRAL-FLORIDA,CONNECTICUT,Home,83.3,15,22.4,11,81.0,57.9,24,0.524,41,0.439,18,0.683,28,0.607,17,2,200.0,95.2,34.6,7,13,60,11.1,7,0.317,13.0,0.538,7.0,48.4,31,0.552,24.2,17,28.0,0.393,11.0,0.778,7,59.26,58.5625,56.812529,15.4,15.625,14.472337,19.88,22.025,20.270142,9.8,9.625,9.403749,88.94,92.575,92.097942,63.68,59.8125,62.762551,28.2,25.25,26.701875,0.4584,0.49125,0.469366,58.8,57.375,56.291267,0.4386,0.462125,0.44917,25.8,26.5,25.266118,0.5156,0.47725,0.525643,30.2,27.25,29.678098,0.6662,0.654375,0.651123,20.4,18.625,19.811614,1.0,0.625,1.135802,200.0,200.0,200.0,102.1,106.8875,101.12492,48.68,46.075,47.042707,14.4,12.125,13.389575,18.2,15.5,17.160494,74.4,75.0,72.657979,8.22,9.9,8.010745,6.0,6.875,5.7508,0.125,0.175,0.139518,7.4,10.0,7.860082,0.2728,0.2935,0.258128,2.4,3.375,2.314129,56.86,53.925,55.697439,42.6,37.375,40.091449,0.5066,0.534375,0.515114,17.7,16.5125,18.471285,15.2,13.625,15.293553,51.4,47.375,48.431184,0.4576,0.497375,0.478941,23.4,23.125,22.951989,0.848,0.905,0.840395,5.0,3.875,4.981253
4,2009-12-22,"Harry A. Gampel Pavilion, Storrs, Connecticut",MAINE,CONNECTICUT,Home,66.7,16,44.4,16,79.4,75.0,33,0.491,57,0.421,24,0.439,25,0.6,15,2,200.0,104.4,42.4,14,13,71,10.3,7,0.386,22.0,0.364,8.0,61.0,47,0.515,15.0,12,35.0,0.457,16.0,0.8,8,63.26,61.311111,65.641686,14.6,15.555556,14.648224,19.36,22.066667,20.980094,9.4,9.777778,9.935833,89.2,91.288889,88.398628,61.42,59.6,61.141701,27.2,25.111111,25.80125,0.4632,0.494889,0.487577,54.0,55.555556,51.194178,0.434,0.459556,0.44578,23.4,25.555556,22.844079,0.5814,0.500111,0.578095,31.2,27.333333,29.118732,0.6484,0.649111,0.636415,20.6,18.444444,18.874409,1.4,0.777778,1.423868,200.0,200.0,200.0,97.66,105.588889,99.149947,44.42,44.8,42.895138,12.4,11.555556,11.259717,17.4,15.222222,15.773663,70.2,73.333333,68.438653,7.84,10.033333,9.040497,5.6,6.888889,6.1672,0.1576,0.190778,0.198679,8.0,10.333333,9.573388,0.2804,0.320667,0.351418,2.8,3.777778,3.876086,53.94,53.311111,53.26496,39.6,36.666667,37.060966,0.5104,0.536333,0.52741,19.78,17.366667,20.380857,16.2,14.0,15.862369,46.0,45.222222,41.62079,0.4452,0.485778,0.450294,20.6,21.777778,18.967993,0.8036,0.890889,0.819597,5.6,4.222222,5.654169


# Getting the Data Ready for Testing
Now that we have computed the exponential moving average for each team, we need to joing them back into one table so that it is easier to train models on.

In [12]:
games = original_data.copy()
num_rows = games.shape[0]

# We can uniquely identify a game given the teams, the date, and the game number.
games = games.filter(["date", "location", "home", "away", "winner"])
games.drop_duplicates()

# Just to make sure that there is no lost data
assert games.shape[0] == num_rows

# Arrays to hold the home and away dataframes
homes=[]
aways=[]

# Add the moving averages
for team in tqdm(TEAMS, unit='teams'):
    # load in the  file
    team_avgs = pd.read_csv(f'../assets/data/team_ma/{team}_ma.csv')
    team_avgs.drop(new_labels[5:], axis=1, inplace=True)
    
    # Merge in visiting team stats
    away = pd.merge(games,
                    team_avgs.loc[team_avgs["away"] == team],
                    on=["date", "location", "home", "away", 'winner'],
                    how="left")
    
    # Drop any rows with NULL values and drop redudant columns
    away.dropna(inplace=True)
    away.drop_duplicates(inplace=True)
    
    # Rename the columns and append to the array
    new_away_cols = list(away.columns)[:5] + ["away_" + col for col in list(away.columns)[5:]]
    away.columns = new_away_cols
    aways.append(away)
    
    # Merge in home team stats
    home = pd.merge(games,
                    team_avgs.loc[team_avgs["home"] == team],
                    on=["date", "location", "home", "away", 'winner'],
                    how="left")
    
    # Drop any rows with NULL values and drop redudant columns
    home.dropna(inplace=True)
    home.drop_duplicates(inplace=True)
    
    # Rename the columns and append to the array
    new_home_cols = list(home.columns)[:5] + ["home_" + col for col in list(home.columns)[5:]]
    home.columns = new_home_cols
    homes.append(home)
    
    # Sanity check, make sure that the column sizes match for both home and away dataframes
    assert home.shape[1] == away.shape[1]

  0%|          | 0/1169 [00:00<?, ?teams/s]

In [13]:
# Join the tables on the common attributes and sort by the Date and the Number of game
data = pd.merge(pd.concat(homes),
                pd.concat(aways),
                on=["date", "location", "home", "away", "winner"]).sort_values(["date"]).reset_index(drop=True)

data.drop_duplicates(inplace=True)
data.dropna(inplace=True)
data.head(10)

Unnamed: 0,date,location,home,away,winner,home_assist_percentage_SMA,home_assist_percentage_CMA,home_assist_percentage_EMA,home_assists_SMA,home_assists_CMA,home_assists_EMA,home_block_percentage_SMA,home_block_percentage_CMA,home_block_percentage_EMA,home_blocks_SMA,home_blocks_CMA,home_blocks_EMA,home_defensive_rating_SMA,home_defensive_rating_CMA,home_defensive_rating_EMA,home_defensive_rebound_percentage_SMA,home_defensive_rebound_percentage_CMA,home_defensive_rebound_percentage_EMA,home_defensive_rebounds_SMA,home_defensive_rebounds_CMA,home_defensive_rebounds_EMA,home_effective_field_goal_percentage_SMA,home_effective_field_goal_percentage_CMA,home_effective_field_goal_percentage_EMA,home_field_goal_attempts_SMA,home_field_goal_attempts_CMA,home_field_goal_attempts_EMA,home_field_goal_percentage_SMA,home_field_goal_percentage_CMA,home_field_goal_percentage_EMA,home_field_goals_SMA,home_field_goals_CMA,home_field_goals_EMA,home_free_throw_attempt_rate_SMA,home_free_throw_attempt_rate_CMA,home_free_throw_attempt_rate_EMA,home_free_throw_attempts_SMA,home_free_throw_attempts_CMA,home_free_throw_attempts_EMA,home_free_throw_percentage_SMA,home_free_throw_percentage_CMA,home_free_throw_percentage_EMA,home_free_throws_SMA,home_free_throws_CMA,home_free_throws_EMA,home_losses_SMA,home_losses_CMA,home_losses_EMA,home_minutes_played_SMA,home_minutes_played_CMA,home_minutes_played_EMA,home_offensive_rating_SMA,home_offensive_rating_CMA,home_offensive_rating_EMA,home_offensive_rebound_percentage_SMA,home_offensive_rebound_percentage_CMA,home_offensive_rebound_percentage_EMA,home_offensive_rebounds_SMA,home_offensive_rebounds_CMA,home_offensive_rebounds_EMA,home_personal_fouls_SMA,home_personal_fouls_CMA,home_personal_fouls_EMA,home_points_SMA,home_points_CMA,home_points_EMA,home_steal_percentage_SMA,home_steal_percentage_CMA,home_steal_percentage_EMA,home_steals_SMA,home_steals_CMA,home_steals_EMA,home_three_point_attempt_rate_SMA,home_three_point_attempt_rate_CMA,home_three_point_attempt_rate_EMA,home_three_point_field_goal_attempts_SMA,home_three_point_field_goal_attempts_CMA,home_three_point_field_goal_attempts_EMA,home_three_point_field_goal_percentage_SMA,home_three_point_field_goal_percentage_CMA,home_three_point_field_goal_percentage_EMA,home_three_point_field_goals_SMA,home_three_point_field_goals_CMA,home_three_point_field_goals_EMA,home_total_rebound_percentage_SMA,home_total_rebound_percentage_CMA,home_total_rebound_percentage_EMA,home_total_rebounds_SMA,home_total_rebounds_CMA,home_total_rebounds_EMA,home_true_shooting_percentage_SMA,home_true_shooting_percentage_CMA,home_true_shooting_percentage_EMA,home_turnover_percentage_SMA,home_turnover_percentage_CMA,home_turnover_percentage_EMA,home_turnovers_SMA,home_turnovers_CMA,home_turnovers_EMA,home_two_point_field_goal_attempts_SMA,home_two_point_field_goal_attempts_CMA,home_two_point_field_goal_attempts_EMA,home_two_point_field_goal_percentage_SMA,home_two_point_field_goal_percentage_CMA,home_two_point_field_goal_percentage_EMA,home_two_point_field_goals_SMA,home_two_point_field_goals_CMA,home_two_point_field_goals_EMA,home_win_percentage_SMA,home_win_percentage_CMA,home_win_percentage_EMA,home_wins_SMA,home_wins_CMA,home_wins_EMA,away_assist_percentage_SMA,away_assist_percentage_CMA,away_assist_percentage_EMA,away_assists_SMA,away_assists_CMA,away_assists_EMA,away_block_percentage_SMA,away_block_percentage_CMA,away_block_percentage_EMA,away_blocks_SMA,away_blocks_CMA,away_blocks_EMA,away_defensive_rating_SMA,away_defensive_rating_CMA,away_defensive_rating_EMA,away_defensive_rebound_percentage_SMA,away_defensive_rebound_percentage_CMA,away_defensive_rebound_percentage_EMA,away_defensive_rebounds_SMA,away_defensive_rebounds_CMA,away_defensive_rebounds_EMA,away_effective_field_goal_percentage_SMA,away_effective_field_goal_percentage_CMA,away_effective_field_goal_percentage_EMA,away_field_goal_attempts_SMA,away_field_goal_attempts_CMA,away_field_goal_attempts_EMA,away_field_goal_percentage_SMA,away_field_goal_percentage_CMA,away_field_goal_percentage_EMA,away_field_goals_SMA,away_field_goals_CMA,away_field_goals_EMA,away_free_throw_attempt_rate_SMA,away_free_throw_attempt_rate_CMA,away_free_throw_attempt_rate_EMA,away_free_throw_attempts_SMA,away_free_throw_attempts_CMA,away_free_throw_attempts_EMA,away_free_throw_percentage_SMA,away_free_throw_percentage_CMA,away_free_throw_percentage_EMA,away_free_throws_SMA,away_free_throws_CMA,away_free_throws_EMA,away_losses_SMA,away_losses_CMA,away_losses_EMA,away_minutes_played_SMA,away_minutes_played_CMA,away_minutes_played_EMA,away_offensive_rating_SMA,away_offensive_rating_CMA,away_offensive_rating_EMA,away_offensive_rebound_percentage_SMA,away_offensive_rebound_percentage_CMA,away_offensive_rebound_percentage_EMA,away_offensive_rebounds_SMA,away_offensive_rebounds_CMA,away_offensive_rebounds_EMA,away_personal_fouls_SMA,away_personal_fouls_CMA,away_personal_fouls_EMA,away_points_SMA,away_points_CMA,away_points_EMA,away_steal_percentage_SMA,away_steal_percentage_CMA,away_steal_percentage_EMA,away_steals_SMA,away_steals_CMA,away_steals_EMA,away_three_point_attempt_rate_SMA,away_three_point_attempt_rate_CMA,away_three_point_attempt_rate_EMA,away_three_point_field_goal_attempts_SMA,away_three_point_field_goal_attempts_CMA,away_three_point_field_goal_attempts_EMA,away_three_point_field_goal_percentage_SMA,away_three_point_field_goal_percentage_CMA,away_three_point_field_goal_percentage_EMA,away_three_point_field_goals_SMA,away_three_point_field_goals_CMA,away_three_point_field_goals_EMA,away_total_rebound_percentage_SMA,away_total_rebound_percentage_CMA,away_total_rebound_percentage_EMA,away_total_rebounds_SMA,away_total_rebounds_CMA,away_total_rebounds_EMA,away_true_shooting_percentage_SMA,away_true_shooting_percentage_CMA,away_true_shooting_percentage_EMA,away_turnover_percentage_SMA,away_turnover_percentage_CMA,away_turnover_percentage_EMA,away_turnovers_SMA,away_turnovers_CMA,away_turnovers_EMA,away_two_point_field_goal_attempts_SMA,away_two_point_field_goal_attempts_CMA,away_two_point_field_goal_attempts_EMA,away_two_point_field_goal_percentage_SMA,away_two_point_field_goal_percentage_CMA,away_two_point_field_goal_percentage_EMA,away_two_point_field_goals_SMA,away_two_point_field_goals_CMA,away_two_point_field_goals_EMA,away_win_percentage_SMA,away_win_percentage_CMA,away_win_percentage_EMA,away_wins_SMA,away_wins_CMA,away_wins_EMA
0,2009-11-28,"Bartow Arena, Birmingham, Alabama",ALABAMA-BIRMINGHAM,FLORIDA-AM,Home,48.84,47.366667,46.636626,11.8,11.5,11.777778,8.14,8.5,9.297531,2.8,3.0,3.358025,96.14,95.416667,92.79465,63.88,63.783333,63.360905,21.0,21.5,22.156379,0.5106,0.507333,0.526992,54.2,54.666667,55.069959,0.4484,0.446833,0.463366,24.4,24.5,25.580247,0.4114,0.398333,0.394679,22.4,21.833333,21.786008,0.6944,0.684,0.688214,15.8,15.166667,15.135802,1.0,0.833333,0.868313,200.0,200.0,200.0,113.42,113.1,115.725514,43.12,45.45,46.506173,10.8,10.833333,11.736626,16.8,16.666667,16.604938,71.2,70.666667,73.222222,10.3,9.683333,9.858436,6.4,6.0,6.1893,0.3218,0.309167,0.323486,17.2,16.666667,17.683128,0.379,0.387333,0.395609,6.6,6.5,6.925926,52.96,54.183333,54.906996,31.8,32.333333,33.893004,0.5468,0.5415,0.558185,14.72,14.483333,15.52963,11.2,11.0,12.098765,37.0,38.0,37.386831,0.4798,0.4735,0.50028,17.8,18.0,18.654321,0.71,0.758333,0.797041,3.0,2.666667,3.395062,51.9,51.9,54.960494,11.8,11.8,12.444444,5.14,5.14,5.058025,2.0,2.0,1.987654,111.08,111.08,110.341975,57.72,57.72,56.430864,21.0,21.0,20.320988,0.4154,0.4154,0.408111,61.2,61.2,63.17284,0.3682,0.3682,0.360469,22.4,22.4,22.518519,0.2732,0.2732,0.254852,16.4,16.4,15.62963,0.626,0.626,0.56921,10.4,10.4,9.234568,3.6,3.6,4.098765,200.0,200.0,200.0,77.86,77.86,78.053086,32.0,32.0,31.72963,10.0,10.0,10.407407,19.2,19.2,19.382716,61.0,61.0,60.333333,7.94,7.94,8.424691,6.2,6.2,6.481481,0.3366,0.3366,0.338222,20.6,20.6,21.382716,0.283,0.283,0.284198,5.8,5.8,6.061728,44.28,44.28,43.192593,31.0,31.0,30.728395,0.4436,0.4436,0.430062,23.36,23.36,21.008642,20.4,20.4,18.358025,40.6,40.6,41.790123,0.4122,0.4122,0.400951,16.6,16.6,16.45679,0.0,0.0,0.0,0.0,0.0,0.0
1,2009-11-28,"Hytche Athletic Center, Princess Anne, Maryland",MARYLAND-EASTERN-SHORE,AMERICAN,Home,54.46,54.46,52.204938,10.4,10.4,9.925926,4.4,4.4,3.781481,1.6,1.6,1.320988,119.42,119.42,121.093827,61.14,61.14,63.783951,19.6,19.6,19.82716,0.414,0.414,0.406852,52.2,52.2,52.728395,0.3654,0.3654,0.360926,19.0,19.0,18.962963,0.4338,0.4338,0.45142,22.4,22.4,23.530864,0.6358,0.6358,0.634691,13.6,13.6,14.358025,3.0,3.0,3.395062,200.0,200.0,200.0,85.54,85.54,86.409877,30.68,30.68,31.037037,9.0,9.0,9.432099,18.8,18.8,18.592593,56.6,56.6,57.037037,6.3,6.3,5.969136,4.2,4.2,3.975309,0.3244,0.3244,0.328235,17.0,17.0,17.345679,0.3114,0.3114,0.288222,5.0,5.0,4.753086,44.76,44.76,45.819753,28.6,28.6,29.259259,0.4518,0.4518,0.44716,18.64,18.64,17.402469,14.0,14.0,13.123457,35.2,35.2,35.382716,0.3992,0.3992,0.403074,14.0,14.0,14.209877,0.0,0.0,0.0,0.0,0.0,0.0,64.06,64.06,64.554321,14.2,14.2,14.08642,9.14,9.14,9.62716,3.8,3.8,4.234568,103.34,103.34,100.969136,68.84,68.84,70.982716,24.8,24.8,26.123457,0.4594,0.4594,0.466395,53.8,53.8,52.246914,0.4142,0.4142,0.419284,22.2,22.2,21.814815,0.3294,0.3294,0.346432,17.6,17.6,17.91358,0.655,0.655,0.650889,10.6,10.6,10.851852,4.0,4.0,4.395062,200.0,200.0,200.0,89.4,89.4,87.774074,33.66,33.66,33.150617,9.0,9.0,8.530864,17.6,17.6,16.938272,59.8,59.8,59.333333,5.74,5.74,4.992593,3.8,3.8,3.296296,0.2618,0.2618,0.267222,14.0,14.0,13.888889,0.3418,0.3418,0.348457,4.8,4.8,4.851852,51.8,51.8,53.04321,33.8,33.8,34.654321,0.4816,0.4816,0.488914,20.36,20.36,22.316049,15.8,15.8,17.395062,39.8,39.8,38.358025,0.438,0.438,0.442778,17.4,17.4,16.962963,0.0,0.0,0.0,0.0,0.0,0.0
2,2009-11-28,"South Padre Island Convention Centre, South Pa...",OLD-DOMINION,MISSISSIPPI-STATE,Away,63.76,63.76,65.154321,17.4,17.4,16.197531,7.84,7.84,7.446914,2.8,2.8,2.641975,82.18,82.18,85.990123,68.18,68.18,65.302469,24.0,24.0,22.839506,0.541,0.541,0.531852,55.4,55.4,51.580247,0.4894,0.4894,0.480321,27.4,27.4,25.024691,0.3766,0.3766,0.431667,19.4,19.4,20.987654,0.672,0.672,0.678247,13.6,13.6,14.703704,0.2,0.2,0.333333,200.0,200.0,200.0,114.88,114.88,109.819753,51.6,51.6,51.834568,12.6,12.6,11.851852,16.6,16.6,16.975309,74.2,74.2,70.012346,10.78,10.78,10.235802,7.0,7.0,6.567901,0.239,0.239,0.250444,13.0,13.0,12.654321,0.434,0.434,0.405012,5.8,5.8,5.259259,60.12,60.12,58.825926,36.6,36.6,34.691358,0.572,0.572,0.567667,19.5,19.5,21.909877,15.2,15.2,16.728395,42.4,42.4,38.925926,0.5072,0.5072,0.502222,21.6,21.6,19.765432,0.96,0.96,0.933333,2.8,2.8,3.061728,48.08,48.08,48.864198,11.8,11.8,11.407407,20.18,20.18,18.004938,8.6,8.6,7.54321,94.88,94.88,97.330864,76.5,76.5,76.409877,27.2,27.2,25.901235,0.5108,0.5108,0.498827,58.2,58.2,57.333333,0.4318,0.4318,0.418852,25.2,25.2,24.123457,0.3064,0.3064,0.318519,17.8,17.8,18.160494,0.6822,0.6822,0.676136,12.4,12.4,12.580247,1.2,1.2,1.333333,200.0,200.0,200.0,106.68,106.68,106.477778,38.46,38.46,41.604938,11.2,11.2,12.481481,12.8,12.8,13.691358,72.0,72.0,69.975309,7.76,7.76,6.162963,5.4,5.4,4.246914,0.4088,0.4088,0.418728,23.8,23.8,23.987654,0.3878,0.3878,0.382741,9.2,9.2,9.148148,57.88,57.88,58.996296,38.4,38.4,38.382716,0.5404,0.5404,0.530877,16.94,16.94,17.164198,13.2,13.2,13.37037,34.4,34.4,33.345679,0.4594,0.4594,0.440716,16.0,16.0,14.975309,0.5034,0.5034,0.514864,1.8,1.8,2.061728
3,2009-11-28,"The Palestra, Philadelphia, Pennsylvania",BROWN,SIENA,Away,62.98,62.98,62.004938,14.8,14.8,15.037037,8.92,8.92,9.353086,4.2,4.2,4.37037,106.24,106.24,105.667901,55.4,55.4,53.988889,21.4,21.4,21.580247,0.5278,0.5278,0.542457,51.6,51.6,52.012346,0.4588,0.4588,0.46884,23.6,23.6,24.308642,0.326,0.326,0.336951,16.4,16.4,17.111111,0.6874,0.6874,0.716889,12.0,12.0,13.012346,1.8,1.8,2.061728,200.0,200.0,200.0,100.66,100.66,106.697531,31.38,31.38,32.645679,7.6,7.6,7.728395,15.6,15.6,15.197531,66.2,66.2,69.197531,6.26,6.26,5.653086,4.2,4.2,3.790123,0.3854,0.3854,0.404148,20.2,20.2,21.271605,0.3514,0.3514,0.360296,7.0,7.0,7.567901,44.6,44.6,44.724691,29.0,29.0,29.308642,0.559,0.559,0.576741,20.9,20.9,19.002469,15.6,15.6,14.160494,31.4,31.4,30.740741,0.5302,0.5302,0.546012,16.6,16.6,16.740741,0.4966,0.4966,0.485136,1.2,1.2,1.333333,58.98,58.98,59.146914,16.0,16.0,16.419753,9.12,9.12,8.658025,3.6,3.6,3.382716,89.5,89.5,91.364198,69.74,69.74,71.012346,24.8,24.8,24.604938,0.5096,0.5096,0.511,57.6,57.6,59.296296,0.4794,0.4794,0.478296,27.2,27.2,27.82716,0.46,0.46,0.439531,25.8,25.8,25.296296,0.6358,0.6358,0.635605,16.4,16.4,16.074074,0.8,0.8,1.037037,200.0,200.0,200.0,100.62,100.62,101.174074,41.2,41.2,41.734568,12.0,12.0,12.259259,16.0,16.0,16.851852,74.2,74.2,75.506173,13.06,13.06,12.179012,9.6,9.6,9.08642,0.2328,0.2328,0.242407,13.6,13.6,14.617284,0.2612,0.2612,0.273926,3.4,3.4,3.777778,55.5,55.5,55.933333,36.8,36.8,36.864198,0.5348,0.5348,0.534086,19.52,19.52,19.382716,16.4,16.4,16.62963,44.0,44.0,44.679012,0.544,0.544,0.542716,23.8,23.8,24.049383,0.8034,0.8034,0.761778,2.2,2.2,2.358025
4,2009-11-28,"Daniel-Meyer Coliseum, Fort Worth, Texas",TEXAS-CHRISTIAN,LOUISIANA-TECH,Away,63.62,63.62,64.004938,17.0,17.0,18.666667,5.94,5.94,5.471605,2.2,2.2,2.037037,103.26,103.26,102.792593,60.44,60.44,63.616049,23.0,23.0,25.308642,0.5412,0.5412,0.552753,58.2,58.2,62.580247,0.4566,0.4566,0.469864,26.8,26.8,29.54321,0.3816,0.3816,0.395481,22.6,22.6,25.061728,0.6976,0.6976,0.671951,15.4,15.4,16.530864,1.4,1.4,1.506173,215.0,215.0,225.0,109.1,109.1,111.87037,36.42,36.42,36.350617,11.2,11.2,12.049383,22.4,22.4,21.975309,78.6,78.6,85.530864,7.46,7.46,6.960494,5.2,5.2,5.111111,0.4508,0.4508,0.427506,25.2,25.2,25.345679,0.3786,0.3786,0.389247,9.6,9.6,9.91358,49.38,49.38,50.98642,34.2,34.2,37.358025,0.5686,0.5686,0.576346,17.68,17.68,16.335802,14.4,14.4,13.864198,33.0,33.0,37.234568,0.5274,0.5274,0.538284,17.2,17.2,19.62963,0.6868,0.6868,0.693148,2.6,2.6,2.888889,49.34,49.34,50.7,12.4,12.4,13.246914,8.72,8.72,7.969136,2.6,2.6,2.395062,94.18,94.18,92.806173,72.08,72.08,72.930864,23.0,23.0,22.283951,0.532,0.532,0.55437,54.4,54.4,53.716049,0.4726,0.4726,0.494074,25.2,25.2,26.123457,0.4336,0.4336,0.457753,23.0,23.0,24.123457,0.7578,0.7578,0.759642,17.0,17.0,17.91358,0.6,0.6,0.703704,200.0,200.0,200.0,110.12,110.12,114.12963,39.66,39.66,40.934568,10.0,10.0,10.45679,17.8,17.8,17.222222,73.8,73.8,76.592593,11.0,11.0,11.745679,7.4,7.4,7.91358,0.3264,0.3264,0.314679,18.0,18.0,17.098765,0.3764,0.3764,0.394531,6.4,6.4,6.432099,56.42,56.42,57.434568,33.0,33.0,32.740741,0.569,0.569,0.590617,16.98,16.98,17.664198,12.8,12.8,13.382716,36.4,36.4,36.617284,0.5182,0.5182,0.539395,18.8,18.8,19.691358,0.8434,0.8434,0.828444,2.4,2.4,2.691358
5,2009-11-28,"UIC Pavilion, Chicago, Illinois",IOWA-STATE,NORTHWESTERN,Away,59.64,60.65,57.632099,18.0,18.833333,17.37037,11.92,11.5,9.925514,5.0,5.0,4.26749,80.52,83.066667,81.50535,76.38,73.333333,72.783539,27.6,26.666667,26.176955,0.5788,0.582667,0.564086,59.0,59.833333,59.119342,0.5032,0.5105,0.499214,29.8,30.666667,29.539095,0.3574,0.336833,0.340782,20.4,19.5,19.641975,0.6248,0.642833,0.666638,12.8,12.5,12.983539,0.0,0.0,0.0,200.0,200.0,200.0,115.2,116.65,113.055144,37.72,40.95,38.20823,10.8,11.0,10.296296,16.2,15.833333,15.687243,81.4,82.5,79.773663,9.56,9.616667,8.278601,6.8,6.833333,5.91358,0.289,0.279833,0.265947,17.2,16.833333,15.81893,0.5126,0.505,0.478457,9.0,8.666667,7.711934,57.64,57.633333,56.039095,38.4,37.666667,36.473251,0.5924,0.596833,0.582523,16.84,17.266667,16.885597,13.8,14.333333,13.823045,41.8,43.0,43.300412,0.4968,0.509167,0.505342,20.8,22.0,21.82716,1.0,1.0,1.0,4.0,3.5,4.263374,77.06,77.06,75.711111,17.0,17.0,17.271605,12.76,12.76,14.239506,3.8,3.8,4.320988,93.92,93.92,91.638272,59.78,59.78,59.535802,20.2,20.2,21.160494,0.533,0.533,0.531827,49.6,49.6,51.283951,0.4434,0.4434,0.44342,22.0,22.0,22.740741,0.4482,0.4482,0.434938,21.6,21.6,21.481481,0.703,0.703,0.717741,15.4,15.4,15.604938,0.8,0.8,0.802469,200.0,200.0,200.0,107.78,107.78,110.622222,31.3,31.3,30.783951,7.0,7.0,6.851852,18.2,18.2,18.419753,68.2,68.2,70.098765,11.26,11.26,11.935802,7.2,7.2,7.62963,0.5052,0.5052,0.490123,24.8,24.8,24.901235,0.3482,0.3482,0.356148,8.8,8.8,9.012346,46.6,46.6,46.661728,27.2,27.2,28.012346,0.5708,0.5708,0.571457,16.32,16.32,14.166667,11.2,11.2,9.654321,24.8,24.8,26.382716,0.54,0.54,0.528519,13.2,13.2,13.728395,0.7434,0.7434,0.779062,2.2,2.2,2.592593
6,2009-11-28,"Winfield Dunn Center, Clarksville, Tennessee",AUSTIN-PEAY,DRAKE,Away,55.28,55.28,56.783951,13.0,13.0,13.716049,5.02,5.02,6.462963,1.8,1.8,2.160494,107.42,107.42,105.4,62.24,62.24,65.585185,21.4,21.4,21.555556,0.4744,0.4744,0.48079,55.2,55.2,54.753086,0.4322,0.4322,0.443049,23.6,23.6,24.049383,0.3682,0.3682,0.335568,20.2,20.2,18.283951,0.6422,0.6422,0.63784,13.0,13.0,11.654321,2.0,2.0,2.259259,200.0,200.0,200.0,95.04,95.04,93.859259,35.24,35.24,34.855556,9.4,9.4,9.333333,17.4,17.4,17.493827,65.0,65.0,64.012346,6.2,6.2,5.759259,4.2,4.2,3.888889,0.2644,0.2644,0.24684,15.0,15.0,13.864198,0.3142,0.3142,0.299988,4.8,4.8,4.259259,48.22,48.22,49.959259,30.8,30.8,30.888889,0.5022,0.5022,0.505519,19.64,19.64,20.4,15.6,15.6,16.049383,40.2,40.2,40.888889,0.4606,0.4606,0.47684,18.8,18.8,19.790123,0.2966,0.2966,0.287605,1.0,1.0,1.135802,51.36,51.36,51.782716,11.6,11.6,11.320988,4.5,4.5,4.22963,1.8,1.8,1.567901,112.26,112.26,108.185185,60.86,60.86,64.091358,17.2,17.2,17.481481,0.545,0.545,0.511728,50.2,50.2,51.580247,0.4568,0.4568,0.429432,22.4,22.4,21.604938,0.3056,0.3056,0.27921,14.6,14.6,13.62963,0.8204,0.8204,0.841457,11.6,11.6,10.938272,2.4,2.4,2.691358,200.0,200.0,200.0,102.48,102.48,98.804938,24.82,24.82,26.822222,5.0,5.0,5.407407,16.4,16.4,17.320988,65.2,65.2,62.518519,12.98,12.98,14.297531,8.2,8.2,9.0,0.4548,0.4548,0.458198,22.8,22.8,23.604938,0.3892,0.3892,0.359074,8.8,8.8,8.37037,42.3,42.3,43.865432,22.2,22.2,22.888889,0.578,0.578,0.545667,17.7,17.7,17.308642,11.8,11.8,11.666667,27.4,27.4,27.975309,0.5148,0.5148,0.489543,13.6,13.6,13.234568,0.1566,0.1566,0.171556,0.6,0.6,0.703704
7,2009-11-28,"Dunkin' Donuts Center, Providence, Rhode Island",PROVIDENCE,BOSTON-COLLEGE,Away,44.18,44.18,45.950617,14.0,14.0,15.222222,13.52,13.52,12.611111,5.6,5.6,5.320988,93.26,93.26,90.390123,63.76,63.76,63.209877,26.2,26.2,26.111111,0.4846,0.4846,0.499568,72.6,72.6,73.654321,0.42,0.42,0.431481,30.6,30.6,31.91358,0.3316,0.3316,0.331259,24.0,24.0,24.296296,0.6604,0.6604,0.66842,15.8,15.8,16.234568,0.4,0.4,0.555556,200.0,200.0,200.0,116.12,116.12,117.651852,51.64,51.64,51.825926,19.4,19.4,18.975309,19.8,19.8,20.728395,86.4,86.4,90.17284,12.52,12.52,14.17037,9.6,9.6,11.197531,0.398,0.398,0.379111,28.8,28.8,27.888889,0.3278,0.3278,0.363407,9.4,9.4,10.111111,57.36,57.36,57.196296,45.6,45.6,45.08642,0.5142,0.5142,0.528358,12.1,12.1,13.095062,11.6,11.6,12.950617,43.8,43.8,45.765432,0.487,0.487,0.480951,21.2,21.2,21.802469,0.91,0.91,0.877778,2.6,2.6,2.839506,56.12,56.12,53.314815,15.6,15.6,14.641975,8.54,8.54,7.074074,3.6,3.6,3.012346,98.92,98.92,106.771605,70.14,70.14,70.474074,23.6,23.6,21.691358,0.5412,0.5412,0.550568,57.6,57.6,55.382716,0.4864,0.4864,0.495926,27.4,27.4,26.987654,0.3898,0.3898,0.40516,22.0,22.0,22.061728,0.7048,0.7048,0.711864,16.0,16.0,16.222222,0.8,0.8,1.037037,200.0,200.0,200.0,115.62,115.62,117.633333,47.12,47.12,48.576543,11.8,11.8,11.777778,16.8,16.8,17.197531,77.0,77.0,76.209877,5.22,5.22,4.750617,3.6,3.6,3.209877,0.2786,0.2786,0.276148,16.2,16.2,15.45679,0.3862,0.3862,0.384568,6.2,6.2,6.012346,58.9,58.9,59.367901,35.4,35.4,33.469136,0.5734,0.5734,0.584654,15.96,15.96,16.402469,12.4,12.4,12.407407,41.4,41.4,39.925926,0.52,0.52,0.531914,21.2,21.2,20.975309,0.8034,0.8034,0.761778,2.2,2.2,2.358025
8,2009-11-28,"Sprint Center, Kansas City, Missouri",IUPUI,KANSAS-STATE,Away,60.72,60.72,60.281481,15.2,15.2,14.679012,9.42,9.42,8.298765,3.2,3.2,2.753086,104.9,104.9,103.624691,64.52,64.52,62.517284,19.8,19.8,19.037037,0.5594,0.5594,0.54258,50.0,50.0,50.111111,0.5048,0.5048,0.490296,25.2,25.2,24.518519,0.4382,0.4382,0.455691,22.0,22.0,22.925926,0.7842,0.7842,0.786827,17.4,17.4,18.160494,0.4,0.4,0.555556,205.0,205.0,208.333333,110.14,110.14,108.987654,34.82,34.82,36.376543,8.2,8.2,8.716049,15.6,15.6,14.962963,73.2,73.2,72.382716,12.9,12.9,13.109877,8.6,8.6,8.740741,0.3178,0.3178,0.310506,15.8,15.8,15.481481,0.3292,0.3292,0.325012,5.4,5.4,5.185185,50.54,50.54,50.080247,28.0,28.0,27.753086,0.6086,0.6086,0.597025,19.46,19.46,19.577778,14.4,14.4,14.493827,34.2,34.2,34.62963,0.5842,0.5842,0.564716,19.8,19.8,19.333333,0.91,0.91,0.877778,2.6,2.6,2.839506,54.24,54.24,52.992593,14.8,14.8,14.308642,14.86,14.86,13.4,5.8,5.8,5.08642,93.92,93.92,97.928395,62.22,62.22,61.709877,24.8,24.8,24.506173,0.5244,0.5244,0.521951,57.4,57.4,56.91358,0.4738,0.4738,0.471568,27.2,27.2,26.802469,0.6536,0.6536,0.680988,36.4,36.4,37.654321,0.5994,0.5994,0.603852,22.2,22.2,23.074074,0.4,0.4,0.555556,200.0,200.0,200.0,114.74,114.74,114.230864,45.52,45.52,45.241975,14.4,14.4,14.197531,23.0,23.0,23.950617,82.2,82.2,82.234568,8.54,8.54,8.285185,6.2,6.2,6.049383,0.2996,0.2996,0.292185,17.0,17.0,16.493827,0.3314,0.3314,0.341543,5.6,5.6,5.555556,53.78,53.78,53.275309,39.2,39.2,38.703704,0.5508,0.5508,0.550235,15.86,15.86,16.028395,13.8,13.8,14.0,40.4,40.4,40.419753,0.5306,0.5306,0.522864,21.6,21.6,21.246914,0.91,0.91,0.877778,2.6,2.6,2.839506
9,2009-11-28,"Orleans Arena, Paradise, Nevada",UTAH,OKLAHOMA-STATE,Away,45.18,45.18,41.751852,12.2,12.2,11.049383,11.72,11.72,10.777778,4.8,4.8,4.530864,95.42,95.42,95.18642,66.96,66.96,67.091358,24.6,24.6,24.518519,0.5012,0.5012,0.489506,59.0,59.0,58.049383,0.4414,0.4414,0.43758,26.2,26.2,25.54321,0.3576,0.3576,0.372951,21.0,21.0,21.567901,0.7484,0.7484,0.738049,15.8,15.8,16.074074,1.4,1.4,1.555556,200.0,200.0,200.0,105.12,105.12,102.091358,33.14,33.14,32.758025,9.6,9.6,9.580247,19.2,19.2,19.740741,75.4,75.4,73.382716,10.9,10.9,10.679012,7.8,7.8,7.666667,0.3962,0.3962,0.369173,23.6,23.6,21.753086,0.29,0.29,0.26421,7.2,7.2,6.222222,51.4,51.4,51.582716,34.2,34.2,34.098765,0.5448,0.5448,0.534926,16.02,16.02,16.951852,13.0,13.0,13.716049,35.4,35.4,36.296296,0.5342,0.5342,0.530037,19.0,19.0,19.320988,0.4534,0.4534,0.459309,1.6,1.6,1.839506,47.22,47.22,49.397531,13.0,13.0,12.777778,8.96,8.96,7.258025,3.2,3.2,2.654321,85.82,85.82,85.574074,74.82,74.82,74.174074,29.8,29.8,29.407407,0.523,0.523,0.519914,60.6,60.6,57.728395,0.4646,0.4646,0.456346,28.2,28.2,26.407407,0.4546,0.4546,0.473235,27.4,27.4,27.358025,0.687,0.687,0.703123,18.6,18.6,18.950617,0.0,0.0,0.0,200.0,200.0,200.0,113.54,113.54,110.640741,33.92,33.92,33.103704,10.8,10.8,9.765432,19.8,19.8,20.074074,81.8,81.8,78.765432,7.46,7.46,7.350617,5.4,5.4,5.246914,0.3794,0.3794,0.395926,22.8,22.8,22.419753,0.2956,0.2956,0.305444,6.8,6.8,7.0,55.42,55.42,54.780247,40.6,40.6,39.17284,0.5592,0.5592,0.560642,14.74,14.74,16.308642,12.2,12.2,13.17284,37.8,37.8,35.308642,0.5596,0.5596,0.541753,21.4,21.4,19.407407,1.0,1.0,1.0,3.0,3.0,3.395062


In [14]:
# Save the dataframe that we just generated

clean_dir('../assets/data/cleaned_data')
data.to_csv('../assets/data/cleaned_data/cleaned_data.csv', index=None)