# Generating Team Data
In this notebook, file we will be generating statistics on a team basis. This will be better for training a model as it will contain avergaes for teams **going into** a match up.

# Imports

In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os

from tqdm import tqdm
from sportsreference.ncaab.teams import Teams

pd.set_option("display.max_rows", None, "display.max_columns", None)

# Utils

In [2]:
def clean_dir(path):
    """Makes a clean directory, removes all files and folders in the specified path"""
    
    if not os.path.exists(path):
        os.mkdir(path)
    
    for filename in os.listdir(path):
        file_path = os.path.join(path, filename)
        try:
            if os.path.isfile(file_path) or os.path.islink(file_path):
                os.unlink(file_path)
            elif os.path.isdir(file_path):
                shutil.rmtree(file_path)
        except Exception as e:
            print(f"Failed to delete {file_path}. Reason: {e}")

# Generating Moving Averages

Next, we want to create a file for each team that contains all of their stats for each game.

In [3]:
TEAMS = ['Abilene Christian', 'Air Force', 'Akron', 'Alabama', 'Alabama A&M', 'Alabama State', 'Alabama-Birmingham', 'Albany (NY)', 'Alcorn State', 'American', 'Appalachian State', 'Arizona', 'Arizona State', 'Arkansas', 'Arkansas State', 'Arkansas-Pine Bluff', 'Army', 'Auburn', 'Austin Peay', 'Ball State', 'Baylor', 'Bellarmine', 'Belmont', 'Binghamton', 'Boise State', 'Boston College', 'Boston University', 'Bowling Green State', 'Bradley', 'Brigham Young', 'Brown', 'Bryant', 'Bucknell', 'Buffalo', 'Butler', 'Cal Poly', 'Cal State Bakersfield', 'Cal State Fullerton', 'Cal State Long Beach', 'Cal State Northridge', 'California Baptist', 'Campbell', 'Canisius', 'Central Arkansas', 'Central Connecticut State', 'Central Florida', 'Central Michigan', 'Charleston Southern', 'Charlotte', 'Chattanooga', 'Chicago State', 'Cincinnati', 'Citadel', 'Clemson', 'Cleveland State', 'Coastal Carolina', 'Colgate', 'College of Charleston', 'Colorado', 'Colorado State', 'Columbia', 'Connecticut', 'Coppin State', 'Cornell', 'Creighton', 'Dartmouth', 'Davidson', 'Dayton', 'DePaul', 'Delaware', 'Delaware State', 'Denver', 'Detroit Mercy', 'Dixie State', 'Drake', 'Drexel', 'Duke', 'Duquesne', 'East Carolina', 'East Tennessee State', 'Eastern Illinois', 'Eastern Kentucky', 'Eastern Michigan', 'Eastern Washington', 'Elon', 'Evansville', 'Fairfield', 'Fairleigh Dickinson', 'Florida', 'Florida A&M', 'Florida Atlantic', 'Florida Gulf Coast', 'Florida International', 'Florida State', 'Fordham', 'Fresno State', 'Furman', 'Gardner-Webb', 'George Mason', 'George Washington', 'Georgetown', 'Georgia', 'Georgia Southern', 'Georgia State', 'Georgia Tech', 'Gonzaga', 'Grambling', 'Grand Canyon', 'Green Bay', 'Hampton', 'Hartford', 'Harvard', 'Hawaii', 'High Point', 'Hofstra', 'Holy Cross', 'Houston', 'Houston Baptist', 'Howard', 'IUPUI', 'Idaho', 'Idaho State', 'Illinois', 'Illinois State', 'Illinois-Chicago', 'Incarnate Word', 'Indiana', 'Indiana State', 'Iona', 'Iowa', 'Iowa State', 'Jackson State', 'Jacksonville', 'Jacksonville State', 'James Madison', 'Kansas', 'Kansas State', 'Kennesaw State', 'Kent State', 'Kentucky', 'La Salle', 'Lafayette', 'Lamar', 'Lehigh', 'Liberty', 'Lipscomb', 'Little Rock', 'Long Island University', 'Longwood', 'Louisiana', 'Louisiana State', 'Louisiana Tech', 'Louisiana-Monroe', 'Louisville', 'Loyola (IL)', 'Loyola (MD)', 'Loyola Marymount', 'Maine', 'Manhattan', 'Marist', 'Marquette', 'Marshall', 'Maryland', 'Maryland-Baltimore County', 'Maryland-Eastern Shore', 'Massachusetts', 'Massachusetts-Lowell', 'McNeese State', 'Memphis', 'Mercer', 'Merrimack', 'Miami (FL)', 'Miami (OH)', 'Michigan', 'Michigan State', 'Middle Tennessee', 'Milwaukee', 'Minnesota', 'Mississippi', 'Mississippi State', 'Mississippi Valley State', 'Missouri', 'Missouri State', 'Missouri-Kansas City', 'Monmouth', 'Montana', 'Montana State', 'Morehead State', 'Morgan State', "Mount St. Mary's", 'Murray State', 'NJIT', 'Navy', 'Nebraska', 'Nevada', 'Nevada-Las Vegas', 'New Hampshire', 'New Mexico', 'New Mexico State', 'New Orleans', 'Niagara', 'Nicholls State', 'Norfolk State', 'North Alabama', 'North Carolina', 'North Carolina A&T', 'North Carolina Central', 'North Carolina State', 'North Carolina-Asheville', 'North Carolina-Greensboro', 'North Carolina-Wilmington', 'North Dakota', 'North Dakota State', 'North Florida', 'North Texas', 'Northeastern', 'Northern Arizona', 'Northern Colorado', 'Northern Illinois', 'Northern Iowa', 'Northern Kentucky', 'Northwestern', 'Northwestern State', 'Notre Dame', 'Oakland', 'Ohio', 'Ohio State', 'Oklahoma', 'Oklahoma State', 'Old Dominion', 'Omaha', 'Oral Roberts', 'Oregon', 'Oregon State', 'Pacific', 'Penn State', 'Pennsylvania', 'Pepperdine', 'Pittsburgh', 'Portland', 'Portland State', 'Prairie View', 'Presbyterian', 'Princeton', 'Providence', 'Purdue', 'Purdue-Fort Wayne', 'Quinnipiac', 'Radford', 'Rhode Island', 'Rice', 'Richmond', 'Rider', 'Robert Morris', 'Rutgers', 'SIU Edwardsville', 'Sacramento State', 'Sacred Heart', 'Saint Francis (PA)', "Saint Joseph's", 'Saint Louis', "Saint Mary's (CA)", "Saint Peter's", 'Sam Houston State', 'Samford', 'San Diego', 'San Diego State', 'San Francisco', 'San Jose State', 'Santa Clara', 'Seattle', 'Seton Hall', 'Siena', 'South Alabama', 'South Carolina', 'South Carolina State', 'South Carolina Upstate', 'South Dakota', 'South Dakota State', 'South Florida', 'Southeast Missouri State', 'Southeastern Louisiana', 'Southern', 'Southern California', 'Southern Illinois', 'Southern Methodist', 'Southern Mississippi', 'Southern Utah', 'St. Bonaventure', 'St. Francis (NY)', "St. John's (NY)", 'Stanford', 'Stephen F. Austin', 'Stetson', 'Stony Brook', 'Syracuse', 'Tarleton State', 'Temple', 'Tennessee', 'Tennessee State', 'Tennessee Tech', 'Tennessee-Martin', 'Texas', 'Texas A&M', 'Texas A&M-Corpus Christi', 'Texas Christian', 'Texas Southern', 'Texas State', 'Texas Tech', 'Texas-Arlington', 'Texas-El Paso', 'Texas-Rio Grande Valley', 'Texas-San Antonio', 'Toledo', 'Towson', 'Troy', 'Tulane', 'Tulsa', 'UC-Davis', 'UC-Irvine', 'UC-Riverside', 'UC-San Diego', 'UC-Santa Barbara', 'UCLA', 'University of California', 'Utah', 'Utah State', 'Utah Valley', 'VMI', 'Valparaiso', 'Vanderbilt', 'Vermont', 'Villanova', 'Virginia', 'Virginia Commonwealth', 'Virginia Tech', 'Wagner', 'Wake Forest', 'Washington', 'Washington State', 'Weber State', 'West Virginia', 'Western Carolina', 'Western Illinois', 'Western Kentucky', 'Western Michigan', 'Wichita State', 'William & Mary', 'Winthrop', 'Wisconsin', 'Wofford', 'Wright State', 'Wyoming', 'Xavier', 'Yale', 'Youngstown State']

In [4]:
original_data = pd.read_csv('../assets/data/data.csv')
original_data["away"] = np.where(original_data["winner"]=="Away",
                                 original_data["winning_name"],
                                 original_data["losing_name"])

original_data["home"] = np.where(original_data["winner"]=="Home",
                                 original_data["winning_name"],
                                 original_data["losing_name"])

# This will help later
reordered_labels = ['date',
                    'location',
                    'losing_abbr',
                    'losing_name',
                    'pace',
                    'winning_abbr',
                    'winning_name',
                    'away',
                    'home',
                    'winner',
                    'away_assist_percentage',
                    'away_assists', 
                    'away_block_percentage', 
                    'away_blocks', 
                    'away_defensive_rating', 
                    'away_defensive_rebound_percentage', 
                    'away_defensive_rebounds',
                    'away_effective_field_goal_percentage',
                    'away_field_goal_attempts',
                    'away_field_goal_percentage',
                    'away_field_goals',
                    'away_free_throw_attempt_rate',
                    'away_free_throw_attempts',
                    'away_free_throw_percentage',
                    'away_free_throws',
                    'away_losses',
                    'away_minutes_played',
                    'away_offensive_rating',
                    'away_offensive_rebound_percentage',
                    'away_offensive_rebounds',
                    'away_personal_fouls',
                    'away_points',
                    'away_steal_percentage',
                    'away_steals',
                    'away_three_point_attempt_rate',
                    'away_three_point_field_goal_attempts',
                    'away_three_point_field_goal_percentage',
                    'away_three_point_field_goals',
                    'away_total_rebound_percentage',
                    'away_total_rebounds',
                    'away_true_shooting_percentage',
                    'away_turnover_percentage',
                    'away_turnovers',
                    'away_two_point_field_goal_attempts',
                    'away_two_point_field_goal_percentage',
                    'away_two_point_field_goals',
                    'away_win_percentage',
                    'away_wins',
                    'home_assist_percentage',
                    'home_assists',
                    'home_block_percentage',
                    'home_blocks',
                    'home_defensive_rating',
                    'home_defensive_rebound_percentage',
                    'home_defensive_rebounds',
                    'home_effective_field_goal_percentage',
                    'home_field_goal_attempts',
                    'home_field_goal_percentage',
                    'home_field_goals',
                    'home_free_throw_attempt_rate',
                    'home_free_throw_attempts',
                    'home_free_throw_percentage',
                    'home_free_throws',
                    'home_losses',
                    'home_minutes_played',
                    'home_offensive_rating',
                    'home_offensive_rebound_percentage',
                    'home_offensive_rebounds',
                    'home_personal_fouls',
                    'home_points',
                    'home_steal_percentage',
                    'home_steals',
                    'home_three_point_attempt_rate',
                    'home_three_point_field_goal_attempts',
                    'home_three_point_field_goal_percentage',
                    'home_three_point_field_goals',
                    'home_total_rebound_percentage',
                    'home_total_rebounds',
                    'home_true_shooting_percentage',
                    'home_turnover_percentage',
                    'home_turnovers',
                    'home_two_point_field_goal_attempts',
                    'home_two_point_field_goal_percentage',
                    'home_two_point_field_goals',
                    'home_win_percentage',
                    'home_wins']

original_data.drop(columns=['away_ranking', 'home_ranking'], inplace=True)
original_data['date'] = pd.to_datetime(original_data['date'])
original_data['date'] = original_data['date'].dt.strftime('%Y-%m-%d')
original_data = original_data[reordered_labels]
original_data.dropna(inplace=True)
original_data.drop_duplicates(inplace=True)

print(f"There are {original_data.shape[0]} examples in the set")

There are 5747 examples in the set


In [5]:
new_labels = ['date',
              'location',
              'away',
              'home',
              'winner',
              'assist_percentage',
              'assists', 
              'block_percentage', 
              'blocks', 
              'defensive_rating', 
              'defensive_rebound_percentage', 
              'defensive_rebounds',
              'effective_field_goal_percentage',
              'field_goal_attempts',
              'field_goal_percentage',
              'field_goals',
              'free_throw_attempt_rate',
              'free_throw_attempts',
              'free_throw_percentage',
              'free_throws',
              'losses',
              'minutes_played',
              'offensive_rating',
              'offensive_rebound_percentage',
              'offensive_rebounds',
              'personal_fouls',
              'points',
              'steal_percentage',
              'steals',
              'three_point_attempt_rate',
              'three_point_field_goal_attempts',
              'three_point_field_goal_percentage',
              'three_point_field_goals',
              'total_rebound_percentage',
              'total_rebounds',
              'true_shooting_percentage',
              'turnover_percentage',
              'turnovers',
              'two_point_field_goal_attempts',
              'two_point_field_goal_percentage',
              'two_point_field_goals',
              'win_percentage',
              'wins']

def generateTeamSats(dataframe, teams=TEAMS, folder='../assets/data/team_data/'):
    """For each team, generate the games that they have played"""
    
    # Clean the folder
    clean_dir(folder)
    
    # Loop through all of the teams
    for team in tqdm(teams, unit="teams"):
        file_name = f'{team}_data.csv'
        path = os.path.join(folder, file_name)
        
        # Get home games and away games
        away_games = dataframe.loc[(dataframe["away"] == team)].copy()
        home_games = dataframe.loc[(dataframe["home"] == team)].copy()
        
        home_games.drop(home_games.filter(regex="away_").columns, axis=1, inplace=True)
        home_games.drop(columns=['losing_abbr',
                                 'losing_name',
                                 'pace',
                                 'winning_abbr',
                                 'winning_name'],
                        inplace=True)
        home_games.columns = new_labels
        
        away_games.drop(away_games.filter(regex="home_").columns, axis=1, inplace=True)
        away_games.drop(columns=['losing_abbr',
                                 'losing_name',
                                 'pace',
                                 'winning_abbr',
                                 'winning_name'],
                        inplace=True)
        away_games.columns = new_labels
        
        # Shaoe for both must match same columns
        assert away_games.shape[1] == home_games.shape[1]
        
        # Join the home games and away games, sort by date
        team_stats = pd.concat([home_games, away_games])
        team_stats.drop_duplicates(inplace=True)
        team_stats.dropna(inplace=True)
        team_stats.sort_values(by=["date"], inplace=True)
        
        # Save the stats
        team_stats.to_csv(path, index=None)

In [6]:
generateTeamSats(original_data)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 356/356 [00:07<00:00, 47.66teams/s]


In [7]:
pd.read_csv('../assets/data/team_data/Yale_data.csv').head()

Unnamed: 0,date,location,away,home,winner,assist_percentage,assists,block_percentage,blocks,defensive_rating,defensive_rebound_percentage,defensive_rebounds,effective_field_goal_percentage,field_goal_attempts,field_goal_percentage,field_goals,free_throw_attempt_rate,free_throw_attempts,free_throw_percentage,free_throws,losses,minutes_played,offensive_rating,offensive_rebound_percentage,offensive_rebounds,personal_fouls,points,steal_percentage,steals,three_point_attempt_rate,three_point_field_goal_attempts,three_point_field_goal_percentage,three_point_field_goals,total_rebound_percentage,total_rebounds,true_shooting_percentage,turnover_percentage,turnovers,two_point_field_goal_attempts,two_point_field_goal_percentage,two_point_field_goals,win_percentage,wins
0,2019-11-05,"Island Federal Credit Union, Stony Brook, New ...",Yale,Stony Brook,Away,59.3,16,15.6,7,84.1,71.1,32,0.45,70,0.386,27,0.271,19,0.579,11,0,225,90.2,25.0,11,18,74,9.8,8,0.371,26,0.346,9,48.3,43,0.468,13.3,12,44,0.409,18,1.0,1
1,2019-11-08,"John J. Lee Ampitheater, New Haven, Connecticut",Oberlin\r\n\t\t\t,Yale,Home,73.3,22,19.2,5,50.7,87.9,29,0.632,53,0.566,30,0.811,43,0.628,27,0,200,128.8,50.0,13,12,94,15.1,11,0.302,16,0.438,7,71.2,42,0.64,17.3,15,37,0.622,23,1.0,2
2,2019-11-11,"War Memorial Gymnasium, San Francisco, California",Yale,San Francisco,Home,41.7,10,9.8,4,106.3,71.8,28,0.46,63,0.381,24,0.444,28,0.75,21,1,225,100.0,12.8,5,20,79,6.3,5,0.429,27,0.37,10,42.3,33,0.518,8.5,7,36,0.389,14,0.667,2
3,2019-11-17,"Gallagher-Iba Arena, Stillwater, Oklahoma",Yale,Oklahoma State,Home,61.9,13,8.7,4,88.9,75.0,30,0.464,55,0.382,21,0.164,9,0.667,6,2,200,79.2,26.5,9,16,57,6.9,5,0.564,31,0.29,9,52.7,39,0.481,25.3,20,24,0.5,12,0.5,2
4,2019-11-20,"John J. Lee Ampitheater, New Haven, Connecticut",Siena,Yale,Home,74.3,26,16.7,10,95.7,72.2,39,0.603,68,0.515,35,0.412,28,0.643,18,2,275,107.5,12.8,5,23,100,3.2,3,0.426,29,0.414,12,47.3,44,0.615,16.6,16,39,0.59,23,0.6,3


In [8]:
def compute_ma(span, teams=TEAMS, team_data_folder='../assets/data/team_data/', dest_folder='../assets/data/team_ma/'):
    """Computes various moving averages for the stats"""
    
    # Clean directory we will be saving the CSVs to
    clean_dir(dest_folder)
    
    for team in tqdm(teams, unit='teams'):
        averages = pd.DataFrame()

        # Load the stats for a given team
        team_stats = pd.read_csv(os.path.join(team_data_folder, f"{team}_data.csv"))

        # Compute the moving averages for the appropriate columns
        for col in team_stats.columns:
            if col in {'date', 'location', 'away', 'home', 'winner'}:
                continue
                
            # Simple moving average
            team_stats[f"{col}_SMA"] = team_stats.loc[:, col].rolling(window=span).mean()
            team_stats[f"{col}_SMA"] = team_stats[f"{col}_SMA"].shift(1)

            # Cumulative moving average
            team_stats[f"{col}_CMA"] = team_stats.loc[:, col].expanding(min_periods=span).mean()
            team_stats[f"{col}_CMA"] = team_stats[f"{col}_CMA"].shift(1)

            # Exponential moving average
            team_stats[f"{col}_EMA"] = team_stats.loc[:, col].ewm(span=span, adjust=False).mean()
            team_stats[f"{col}_EMA"] = team_stats[f"{col}_EMA"].shift(1)

        # Drop any rows with NULL values and save the CSV
        team_stats.dropna(inplace=True)
        team_stats.drop_duplicates(inplace=True)
        team_stats.to_csv(os.path.join(dest_folder, f"{team}_ma.csv"), index=None)

In [9]:
compute_ma(5)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 356/356 [00:37<00:00,  9.43teams/s]


In [10]:
pd.read_csv('../assets/data/team_ma/Yale_ma.csv').head()

Unnamed: 0,date,location,away,home,winner,assist_percentage,assists,block_percentage,blocks,defensive_rating,defensive_rebound_percentage,defensive_rebounds,effective_field_goal_percentage,field_goal_attempts,field_goal_percentage,field_goals,free_throw_attempt_rate,free_throw_attempts,free_throw_percentage,free_throws,losses,minutes_played,offensive_rating,offensive_rebound_percentage,offensive_rebounds,personal_fouls,points,steal_percentage,steals,three_point_attempt_rate,three_point_field_goal_attempts,three_point_field_goal_percentage,three_point_field_goals,total_rebound_percentage,total_rebounds,true_shooting_percentage,turnover_percentage,turnovers,two_point_field_goal_attempts,two_point_field_goal_percentage,two_point_field_goals,win_percentage,wins,assist_percentage_SMA,assist_percentage_CMA,assist_percentage_EMA,assists_SMA,assists_CMA,assists_EMA,block_percentage_SMA,block_percentage_CMA,block_percentage_EMA,blocks_SMA,blocks_CMA,blocks_EMA,defensive_rating_SMA,defensive_rating_CMA,defensive_rating_EMA,defensive_rebound_percentage_SMA,defensive_rebound_percentage_CMA,defensive_rebound_percentage_EMA,defensive_rebounds_SMA,defensive_rebounds_CMA,defensive_rebounds_EMA,effective_field_goal_percentage_SMA,effective_field_goal_percentage_CMA,effective_field_goal_percentage_EMA,field_goal_attempts_SMA,field_goal_attempts_CMA,field_goal_attempts_EMA,field_goal_percentage_SMA,field_goal_percentage_CMA,field_goal_percentage_EMA,field_goals_SMA,field_goals_CMA,field_goals_EMA,free_throw_attempt_rate_SMA,free_throw_attempt_rate_CMA,free_throw_attempt_rate_EMA,free_throw_attempts_SMA,free_throw_attempts_CMA,free_throw_attempts_EMA,free_throw_percentage_SMA,free_throw_percentage_CMA,free_throw_percentage_EMA,free_throws_SMA,free_throws_CMA,free_throws_EMA,losses_SMA,losses_CMA,losses_EMA,minutes_played_SMA,minutes_played_CMA,minutes_played_EMA,offensive_rating_SMA,offensive_rating_CMA,offensive_rating_EMA,offensive_rebound_percentage_SMA,offensive_rebound_percentage_CMA,offensive_rebound_percentage_EMA,offensive_rebounds_SMA,offensive_rebounds_CMA,offensive_rebounds_EMA,personal_fouls_SMA,personal_fouls_CMA,personal_fouls_EMA,points_SMA,points_CMA,points_EMA,steal_percentage_SMA,steal_percentage_CMA,steal_percentage_EMA,steals_SMA,steals_CMA,steals_EMA,three_point_attempt_rate_SMA,three_point_attempt_rate_CMA,three_point_attempt_rate_EMA,three_point_field_goal_attempts_SMA,three_point_field_goal_attempts_CMA,three_point_field_goal_attempts_EMA,three_point_field_goal_percentage_SMA,three_point_field_goal_percentage_CMA,three_point_field_goal_percentage_EMA,three_point_field_goals_SMA,three_point_field_goals_CMA,three_point_field_goals_EMA,total_rebound_percentage_SMA,total_rebound_percentage_CMA,total_rebound_percentage_EMA,total_rebounds_SMA,total_rebounds_CMA,total_rebounds_EMA,true_shooting_percentage_SMA,true_shooting_percentage_CMA,true_shooting_percentage_EMA,turnover_percentage_SMA,turnover_percentage_CMA,turnover_percentage_EMA,turnovers_SMA,turnovers_CMA,turnovers_EMA,two_point_field_goal_attempts_SMA,two_point_field_goal_attempts_CMA,two_point_field_goal_attempts_EMA,two_point_field_goal_percentage_SMA,two_point_field_goal_percentage_CMA,two_point_field_goal_percentage_EMA,two_point_field_goals_SMA,two_point_field_goals_CMA,two_point_field_goals_EMA,win_percentage_SMA,win_percentage_CMA,win_percentage_EMA,wins_SMA,wins_CMA,wins_EMA
0,2019-11-23,"Bryce Jordan Center, University Park, Pennsylv...",Yale,Penn State,Home,45.5,10,9.5,4,86.6,68.9,31,0.52,51,0.431,22,0.216,11,0.273,3,3,200,83.6,9.4,3,15,56,3.0,2,0.451,23,0.391,9,44.2,34,0.498,21.2,15,28,0.464,13,0.5,3,62.1,62.1,63.653086,17.4,17.4,18.37037,14.0,14.0,13.92963,6.0,6.0,6.691358,85.14,85.14,89.023457,75.6,75.6,74.096296,31.6,31.6,33.0,0.5218,0.5218,0.523568,61.8,61.8,63.283951,0.446,0.446,0.445148,27.4,27.4,28.185185,0.4204,0.4204,0.373185,25.4,25.4,23.481481,0.6534,0.6534,0.650062,16.6,16.6,15.283951,1.0,1.0,1.259259,225.0,225.0,233.641975,101.14,101.14,98.78642,25.42,25.42,21.928395,8.6,8.6,7.864198,17.8,17.8,18.925926,80.8,80.8,81.604938,8.26,8.26,6.960494,6.4,6.4,5.518519,0.4184,0.4184,0.434,25.8,25.8,27.271605,0.3716,0.3716,0.368864,9.4,9.4,9.950617,52.36,52.36,50.317284,40.2,40.2,40.864198,0.5444,0.5444,0.544284,16.2,16.2,16.750617,14.0,14.0,14.666667,36.0,36.0,36.012346,0.502,0.502,0.50763,18.0,18.0,18.234568,0.7534,0.7534,0.706222,2.0,2.0,2.135802
1,2019-11-25,"HP Field House, Orlando, Florida",Yale,Western Michigan,Away,58.3,14,9.4,3,79.7,86.5,32,0.575,53,0.453,24,0.358,19,0.632,12,3,200,114.1,36.4,12,12,73,1.6,1,0.491,26,0.5,13,62.9,44,0.588,17.5,13,27,0.407,11,0.571,4,59.34,59.333333,57.602058,16.2,16.166667,15.580247,12.78,13.25,12.453086,5.4,5.666667,5.794239,85.64,85.383333,88.215638,75.16,74.483333,72.364198,31.4,31.5,32.333333,0.5358,0.5215,0.522379,58.0,60.0,59.1893,0.455,0.4435,0.440432,26.4,26.5,26.123457,0.4094,0.386333,0.32079,23.8,23.0,19.320988,0.5922,0.59,0.524374,15.0,14.333333,11.1893,1.6,1.333333,1.839506,220.0,220.833333,222.427984,99.82,98.216667,93.72428,22.3,22.75,17.752263,7.0,7.666667,6.242798,17.2,17.333333,17.617284,77.2,76.666667,73.069959,6.9,7.383333,5.640329,5.2,5.666667,4.345679,0.4344,0.423833,0.439667,25.2,25.333333,25.847737,0.3806,0.374833,0.376243,9.4,9.333333,9.633745,51.54,51.0,48.278189,38.4,39.166667,38.576132,0.5504,0.536667,0.528856,17.78,17.033333,18.233745,14.6,14.166667,14.777778,32.8,34.666667,33.341564,0.513,0.495667,0.493086,17.0,17.166667,16.489712,0.6534,0.711167,0.637481,2.4,2.166667,2.423868
2,2019-11-26,"HP Field House, Orlando, Florida",Yale,Bucknell,Away,41.9,13,16.0,4,87.1,86.1,31,0.623,53,0.585,31,0.321,17,0.882,15,3,200,115.7,23.8,5,8,81,14.3,10,0.302,16,0.25,4,63.2,36,0.663,17.7,13,37,0.73,27,0.625,5,56.34,59.185714,57.834705,14.6,15.857143,15.053498,10.82,12.7,11.435391,5.0,5.285714,4.862826,91.44,84.571429,85.377092,74.88,76.2,77.076132,32.0,31.571429,32.222222,0.5244,0.529143,0.539919,58.0,59.0,57.1262,0.4324,0.444857,0.444621,25.2,26.142857,25.415638,0.3188,0.382286,0.333193,19.0,22.428571,19.213992,0.593,0.596,0.56025,12.0,14.0,11.459534,2.2,1.571429,2.226337,220.0,217.857143,214.951989,96.88,100.485714,100.516187,19.58,24.7,23.968176,6.8,8.285714,8.161866,17.2,16.571429,15.744856,73.0,76.142857,73.046639,4.2,6.557143,4.293553,3.2,5.0,3.230453,0.4722,0.433429,0.456778,27.2,25.428571,25.898491,0.393,0.392714,0.417495,10.6,9.857143,10.75583,49.88,52.7,53.152126,38.8,39.857143,40.384088,0.54,0.544,0.548571,17.82,17.1,17.989163,14.2,14.0,14.185185,30.8,33.571429,31.227709,0.47,0.483,0.464391,14.6,16.285714,14.659808,0.5676,0.691143,0.615321,2.8,2.428571,2.949246
3,2019-12-01,"John J. Lee Ampitheater, New Haven, Connecticut",Vermont,Yale,Home,68.2,15,3.2,1,77.6,79.5,31,0.481,52,0.423,22,0.327,17,0.882,15,3,200,97.0,20.7,6,13,65,11.9,8,0.365,19,0.316,6,54.4,37,0.541,17.9,13,33,0.485,16,0.667,6,56.38,57.025,52.523137,15.2,15.5,14.368999,12.06,13.1125,12.956927,5.0,5.125,4.575217,87.6,84.8875,85.951395,77.74,77.4375,80.084088,32.6,31.5,31.814815,0.557,0.540875,0.567613,56.0,58.25,55.7508,0.4732,0.462375,0.491414,26.6,26.75,27.277092,0.2942,0.374625,0.329129,16.8,21.75,18.475995,0.6194,0.63175,0.6675,10.8,14.125,12.639689,2.6,1.75,2.484225,215.0,215.625,209.967993,100.02,102.3875,105.577458,21.78,24.5875,23.912117,6.8,7.875,7.10791,14.8,15.5,13.163237,73.4,76.75,75.697759,5.8,7.525,7.629035,4.2,5.625,5.486968,0.4468,0.417,0.405185,25.0,24.25,22.598994,0.369,0.374875,0.361663,9.4,9.125,8.503887,54.06,54.0125,56.501417,39.4,39.375,38.922725,0.569,0.558875,0.586714,19.66,17.175,17.892775,15.4,13.875,13.790123,31.0,34.0,33.151806,0.5382,0.513875,0.552927,17.2,17.625,18.773205,0.5592,0.682875,0.618547,3.4,2.75,3.63283
4,2019-12-04,"SEFCU Arena, Albany, New York",Yale,Albany (NY),Away,56.5,13,3.7,1,83.9,84.4,27,0.571,49,0.469,23,0.163,8,0.625,5,3,200,98.4,15.4,4,9,61,9.7,6,0.469,23,0.435,10,53.4,31,0.578,19.8,13,26,0.5,13,0.7,7,57.64,58.266667,57.748758,15.6,15.444444,14.579332,10.96,12.011111,9.704618,4.4,4.666667,3.383478,85.34,84.077778,83.167596,78.64,77.666667,79.889392,32.8,31.444444,31.54321,0.5604,0.534222,0.538742,55.4,57.555556,54.500533,0.4814,0.458,0.46861,26.8,26.222222,25.518061,0.3268,0.369333,0.328419,18.4,21.222222,17.983996,0.6624,0.659556,0.739,12.6,14.222222,13.426459,2.8,1.888889,2.65615,215.0,213.888889,206.645328,103.58,101.788889,102.718305,20.62,24.155556,22.841411,6.2,7.666667,6.738607,14.2,15.222222,13.108825,75.0,75.444444,72.13184,6.8,8.011111,9.05269,4.8,5.888889,6.324646,0.407,0.411222,0.39179,22.6,23.666667,21.399329,0.3742,0.368333,0.346442,8.8,8.777778,7.669258,54.4,54.055556,55.800945,39.0,39.111111,38.281817,0.581,0.556889,0.571476,18.18,17.255556,17.895184,14.0,13.777778,13.526749,32.8,33.888889,33.101204,0.5352,0.510667,0.530285,18.0,17.444444,17.848804,0.5926,0.681111,0.634698,4.2,3.111111,4.421887


# Getting the Data Ready for Testing
Now that we have computed the exponential moving average for each team, we need to joing them back into one table so that it is easier to train models on.

In [11]:
games = original_data.copy()
num_rows = games.shape[0]

# We can uniquely identify a game given the teams, the date, and the game number.
games = games.filter(["date", "location", "home", "away", "winner"])
games.drop_duplicates()

# Just to make sure that there is no lost data
assert games.shape[0] == num_rows

# Arrays to hold the home and away dataframes
homes=[]
aways=[]

# Add the moving averages
for team in tqdm(TEAMS, unit='teams'):
    # load in the  file
    team_avgs = pd.read_csv(f'../assets/data/team_ma/{team}_ma.csv')
    team_avgs.drop(new_labels[5:], axis=1, inplace=True)
    
    # Merge in visiting team stats
    away = pd.merge(games,
                    team_avgs.loc[team_avgs["away"] == team],
                    on=["date", "location", "home", "away", 'winner'],
                    how="left")
    
    # Drop any rows with NULL values and drop redudant columns
    away.dropna(inplace=True)
    away.drop_duplicates(inplace=True)
    
    # Rename the columns and append to the array
    new_away_cols = list(away.columns)[:5] + ["away_" + col for col in list(away.columns)[5:]]
    away.columns = new_away_cols
    aways.append(away)
    
    # Merge in home team stats
    home = pd.merge(games,
                    team_avgs.loc[team_avgs["home"] == team],
                    on=["date", "location", "home", "away", 'winner'],
                    how="left")
    
    # Drop any rows with NULL values and drop redudant columns
    home.dropna(inplace=True)
    home.drop_duplicates(inplace=True)
    
    # Rename the columns and append to the array
    new_home_cols = list(home.columns)[:5] + ["home_" + col for col in list(home.columns)[5:]]
    home.columns = new_home_cols
    homes.append(home)
    
    # Sanity check, make sure that the column sizes match for both home and away dataframes
    assert home.shape[1] == away.shape[1]

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 356/356 [00:27<00:00, 12.88teams/s]


In [12]:
# Join the tables on the common attributes and sort by the Date and the Number of game
data = pd.merge(pd.concat(homes),
                pd.concat(aways),
                on=["date", "location", "home", "away", "winner"]).sort_values(["date"]).reset_index(drop=True)

data.drop_duplicates(inplace=True)
data.dropna(inplace=True)
data.head(10)

Unnamed: 0,date,location,home,away,winner,home_assist_percentage_SMA,home_assist_percentage_CMA,home_assist_percentage_EMA,home_assists_SMA,home_assists_CMA,home_assists_EMA,home_block_percentage_SMA,home_block_percentage_CMA,home_block_percentage_EMA,home_blocks_SMA,home_blocks_CMA,home_blocks_EMA,home_defensive_rating_SMA,home_defensive_rating_CMA,home_defensive_rating_EMA,home_defensive_rebound_percentage_SMA,home_defensive_rebound_percentage_CMA,home_defensive_rebound_percentage_EMA,home_defensive_rebounds_SMA,home_defensive_rebounds_CMA,home_defensive_rebounds_EMA,home_effective_field_goal_percentage_SMA,home_effective_field_goal_percentage_CMA,home_effective_field_goal_percentage_EMA,home_field_goal_attempts_SMA,home_field_goal_attempts_CMA,home_field_goal_attempts_EMA,home_field_goal_percentage_SMA,home_field_goal_percentage_CMA,home_field_goal_percentage_EMA,home_field_goals_SMA,home_field_goals_CMA,home_field_goals_EMA,home_free_throw_attempt_rate_SMA,home_free_throw_attempt_rate_CMA,home_free_throw_attempt_rate_EMA,home_free_throw_attempts_SMA,home_free_throw_attempts_CMA,home_free_throw_attempts_EMA,home_free_throw_percentage_SMA,home_free_throw_percentage_CMA,home_free_throw_percentage_EMA,home_free_throws_SMA,home_free_throws_CMA,home_free_throws_EMA,home_losses_SMA,home_losses_CMA,home_losses_EMA,home_minutes_played_SMA,home_minutes_played_CMA,home_minutes_played_EMA,home_offensive_rating_SMA,home_offensive_rating_CMA,home_offensive_rating_EMA,home_offensive_rebound_percentage_SMA,home_offensive_rebound_percentage_CMA,home_offensive_rebound_percentage_EMA,home_offensive_rebounds_SMA,home_offensive_rebounds_CMA,home_offensive_rebounds_EMA,home_personal_fouls_SMA,home_personal_fouls_CMA,home_personal_fouls_EMA,home_points_SMA,home_points_CMA,home_points_EMA,home_steal_percentage_SMA,home_steal_percentage_CMA,home_steal_percentage_EMA,home_steals_SMA,home_steals_CMA,home_steals_EMA,home_three_point_attempt_rate_SMA,home_three_point_attempt_rate_CMA,home_three_point_attempt_rate_EMA,home_three_point_field_goal_attempts_SMA,home_three_point_field_goal_attempts_CMA,home_three_point_field_goal_attempts_EMA,home_three_point_field_goal_percentage_SMA,home_three_point_field_goal_percentage_CMA,home_three_point_field_goal_percentage_EMA,home_three_point_field_goals_SMA,home_three_point_field_goals_CMA,home_three_point_field_goals_EMA,home_total_rebound_percentage_SMA,home_total_rebound_percentage_CMA,home_total_rebound_percentage_EMA,home_total_rebounds_SMA,home_total_rebounds_CMA,home_total_rebounds_EMA,home_true_shooting_percentage_SMA,home_true_shooting_percentage_CMA,home_true_shooting_percentage_EMA,home_turnover_percentage_SMA,home_turnover_percentage_CMA,home_turnover_percentage_EMA,home_turnovers_SMA,home_turnovers_CMA,home_turnovers_EMA,home_two_point_field_goal_attempts_SMA,home_two_point_field_goal_attempts_CMA,home_two_point_field_goal_attempts_EMA,home_two_point_field_goal_percentage_SMA,home_two_point_field_goal_percentage_CMA,home_two_point_field_goal_percentage_EMA,home_two_point_field_goals_SMA,home_two_point_field_goals_CMA,home_two_point_field_goals_EMA,home_win_percentage_SMA,home_win_percentage_CMA,home_win_percentage_EMA,home_wins_SMA,home_wins_CMA,home_wins_EMA,away_assist_percentage_SMA,away_assist_percentage_CMA,away_assist_percentage_EMA,away_assists_SMA,away_assists_CMA,away_assists_EMA,away_block_percentage_SMA,away_block_percentage_CMA,away_block_percentage_EMA,away_blocks_SMA,away_blocks_CMA,away_blocks_EMA,away_defensive_rating_SMA,away_defensive_rating_CMA,away_defensive_rating_EMA,away_defensive_rebound_percentage_SMA,away_defensive_rebound_percentage_CMA,away_defensive_rebound_percentage_EMA,away_defensive_rebounds_SMA,away_defensive_rebounds_CMA,away_defensive_rebounds_EMA,away_effective_field_goal_percentage_SMA,away_effective_field_goal_percentage_CMA,away_effective_field_goal_percentage_EMA,away_field_goal_attempts_SMA,away_field_goal_attempts_CMA,away_field_goal_attempts_EMA,away_field_goal_percentage_SMA,away_field_goal_percentage_CMA,away_field_goal_percentage_EMA,away_field_goals_SMA,away_field_goals_CMA,away_field_goals_EMA,away_free_throw_attempt_rate_SMA,away_free_throw_attempt_rate_CMA,away_free_throw_attempt_rate_EMA,away_free_throw_attempts_SMA,away_free_throw_attempts_CMA,away_free_throw_attempts_EMA,away_free_throw_percentage_SMA,away_free_throw_percentage_CMA,away_free_throw_percentage_EMA,away_free_throws_SMA,away_free_throws_CMA,away_free_throws_EMA,away_losses_SMA,away_losses_CMA,away_losses_EMA,away_minutes_played_SMA,away_minutes_played_CMA,away_minutes_played_EMA,away_offensive_rating_SMA,away_offensive_rating_CMA,away_offensive_rating_EMA,away_offensive_rebound_percentage_SMA,away_offensive_rebound_percentage_CMA,away_offensive_rebound_percentage_EMA,away_offensive_rebounds_SMA,away_offensive_rebounds_CMA,away_offensive_rebounds_EMA,away_personal_fouls_SMA,away_personal_fouls_CMA,away_personal_fouls_EMA,away_points_SMA,away_points_CMA,away_points_EMA,away_steal_percentage_SMA,away_steal_percentage_CMA,away_steal_percentage_EMA,away_steals_SMA,away_steals_CMA,away_steals_EMA,away_three_point_attempt_rate_SMA,away_three_point_attempt_rate_CMA,away_three_point_attempt_rate_EMA,away_three_point_field_goal_attempts_SMA,away_three_point_field_goal_attempts_CMA,away_three_point_field_goal_attempts_EMA,away_three_point_field_goal_percentage_SMA,away_three_point_field_goal_percentage_CMA,away_three_point_field_goal_percentage_EMA,away_three_point_field_goals_SMA,away_three_point_field_goals_CMA,away_three_point_field_goals_EMA,away_total_rebound_percentage_SMA,away_total_rebound_percentage_CMA,away_total_rebound_percentage_EMA,away_total_rebounds_SMA,away_total_rebounds_CMA,away_total_rebounds_EMA,away_true_shooting_percentage_SMA,away_true_shooting_percentage_CMA,away_true_shooting_percentage_EMA,away_turnover_percentage_SMA,away_turnover_percentage_CMA,away_turnover_percentage_EMA,away_turnovers_SMA,away_turnovers_CMA,away_turnovers_EMA,away_two_point_field_goal_attempts_SMA,away_two_point_field_goal_attempts_CMA,away_two_point_field_goal_attempts_EMA,away_two_point_field_goal_percentage_SMA,away_two_point_field_goal_percentage_CMA,away_two_point_field_goal_percentage_EMA,away_two_point_field_goals_SMA,away_two_point_field_goals_CMA,away_two_point_field_goals_EMA,away_win_percentage_SMA,away_win_percentage_CMA,away_win_percentage_EMA,away_wins_SMA,away_wins_CMA,away_wins_EMA
0,2019-11-17,"American Bank Center, Corpus Christi, Texas",North Dakota State,Stony Brook,Away,59.06,59.06,60.748148,13.6,13.6,13.012346,5.08,5.08,4.704938,1.8,1.8,1.580247,91.44,91.44,90.790123,83.66,83.66,82.006173,27.2,27.2,26.987654,0.4888,0.4888,0.477049,55.2,55.2,53.641975,0.4142,0.4142,0.398296,23.2,23.2,21.555556,0.3932,0.3932,0.370296,21.0,21.0,19.555556,0.7884,0.7884,0.800123,16.2,16.2,15.148148,1.0,1.0,1.0,199.8,199.8,199.802469,106.62,106.62,102.702469,26.12,26.12,23.65679,8.4,8.4,7.518519,13.2,13.2,13.407407,70.8,70.8,66.703704,7.38,7.38,7.507407,4.8,4.8,4.740741,0.471,0.471,0.493753,25.6,25.6,26.271605,0.3174,0.3174,0.318815,8.2,8.2,8.444444,55.1,55.1,53.501235,35.6,35.6,34.506173,0.5398,0.5398,0.527469,12.42,12.42,12.735802,9.2,9.2,9.185185,29.6,29.6,27.37037,0.4842,0.4842,0.463333,15.0,15.0,13.111111,0.5434,0.5434,0.581531,2.0,2.0,2.395062,49.0,49.0,49.120988,11.8,11.8,11.617284,15.36,15.36,13.42963,5.8,5.8,5.074074,87.66,87.66,86.544444,74.36,74.36,74.550617,25.8,25.8,25.592593,0.4646,0.4646,0.485864,60.8,60.8,58.802469,0.3926,0.3926,0.406642,23.6,23.6,23.37037,0.3672,0.3672,0.397914,21.0,21.0,21.740741,0.7182,0.7182,0.699975,15.2,15.2,15.37037,1.8,1.8,1.802469,205.0,205.0,204.938272,98.82,98.82,99.222222,31.38,31.38,28.197531,12.0,12.0,10.679012,14.0,14.0,13.481481,70.8,70.8,70.851852,8.86,8.86,8.940741,6.4,6.4,6.444444,0.4518,0.4518,0.451827,27.2,27.2,26.234568,0.3146,0.3146,0.343704,8.4,8.4,8.740741,53.1,53.1,52.322222,37.8,37.8,36.271605,0.506,0.506,0.522778,16.32,16.32,17.501235,13.4,13.4,14.148148,33.6,33.6,32.567901,0.4606,0.4606,0.460296,15.2,15.2,14.62963,0.2866,0.2866,0.360444,1.2,1.2,1.592593
1,2019-11-19,"A.G. Spanos Center, Stockton, California",Pacific,Coppin State,Home,46.56,45.466667,47.316461,12.0,11.666667,11.90535,8.22,8.183333,9.581481,2.2,2.166667,2.600823,94.42,90.55,86.714403,84.0,85.0,87.358436,24.4,24.833333,26.843621,0.5322,0.531667,0.524333,54.8,54.166667,54.08642,0.4756,0.478,0.472276,26.2,26.0,25.654321,0.3178,0.3465,0.355288,17.6,18.833333,19.345679,0.6628,0.652333,0.64284,11.2,11.833333,12.074074,1.6,1.333333,1.572016,200.0,200.0,200.0,109.78,108.9,108.503292,35.24,35.316667,32.730864,9.4,9.5,9.333333,15.2,16.666667,15.477366,69.8,69.666667,69.045267,11.28,11.166667,10.812757,7.2,7.166667,6.884774,0.3006,0.302833,0.286979,16.6,16.5,15.687243,0.3764,0.355333,0.364667,6.2,5.833333,5.662551,60.5,61.05,60.92428,33.8,34.333333,36.176955,0.551,0.550667,0.54351,12.6,13.016667,11.871193,9.0,9.333333,8.460905,38.2,37.666667,38.399177,0.5232,0.536,0.522975,20.0,20.166667,19.99177,0.5868,0.655667,0.660226,2.4,2.166667,2.691358,39.36,39.36,38.283951,9.0,9.0,9.259259,7.54,7.54,7.653086,2.6,2.6,2.617284,98.74,98.74,100.492593,72.74,72.74,69.771605,25.8,25.8,24.814815,0.407,0.407,0.425185,66.2,66.2,66.0,0.3474,0.3474,0.366716,22.8,22.8,24.098765,0.3176,0.3176,0.323642,20.6,20.6,21.08642,0.7126,0.7126,0.697506,14.8,14.8,14.82716,2.4,2.4,2.691358,200.0,200.0,200.0,84.1,84.1,87.290123,21.54,21.54,21.64321,9.2,9.2,8.950617,22.8,22.8,22.987654,68.2,68.2,70.666667,9.28,9.28,9.046914,7.6,7.6,7.407407,0.4754,0.4754,0.468247,31.8,31.8,31.111111,0.2632,0.2632,0.255556,7.8,7.8,7.641975,44.62,44.62,43.81358,35.0,35.0,33.765432,0.4496,0.4496,0.465407,16.48,16.48,16.412346,14.8,14.8,14.716049,34.4,34.4,34.888889,0.4368,0.4368,0.472333,15.0,15.0,16.45679,0.1566,0.1566,0.171556,0.6,0.6,0.703704
2,2019-11-20,"Daskalakis Athletic Center, Philadelphia, Penn...",Drexel,Bryant,Home,59.88,59.88,58.08642,14.4,14.4,13.345679,8.72,8.72,8.462963,3.6,3.6,3.469136,95.92,95.92,96.674074,80.28,80.28,76.877778,25.8,25.8,24.938272,0.515,0.515,0.502827,55.0,55.0,53.950617,0.4482,0.4482,0.438494,23.8,23.8,22.740741,0.407,0.407,0.438173,20.0,20.0,20.567901,0.6972,0.6972,0.717667,14.0,14.0,14.790123,1.6,1.6,1.888889,205.0,205.0,203.703704,91.6,91.6,88.416049,26.94,26.94,24.940741,8.8,8.8,8.395062,17.0,17.0,17.901235,68.8,68.8,67.098765,8.92,8.92,8.839506,6.6,6.6,6.617284,0.3904,0.3904,0.388099,21.8,21.8,21.333333,0.3468,0.3468,0.335617,7.2,7.2,6.82716,54.3,54.3,51.477778,34.6,34.6,33.333333,0.542,0.542,0.536617,23.14,23.14,24.403704,19.2,19.2,20.345679,33.2,33.2,32.617284,0.5124,0.5124,0.502358,16.6,16.6,15.91358,0.4134,0.4134,0.392642,1.4,1.4,1.506173,57.62,57.35,55.643621,17.8,17.166667,17.403292,11.3,9.416667,8.805761,4.4,3.666667,3.419753,82.64,86.0,83.51893,79.62,76.833333,78.460905,31.4,29.833333,31.119342,0.521,0.512167,0.505222,68.0,67.0,68.880658,0.4334,0.428333,0.427494,29.8,29.0,29.855967,0.2386,0.255333,0.249572,15.8,16.666667,16.740741,0.6914,0.679333,0.728457,10.8,11.166667,12.057613,2.0,1.833333,1.868313,200.0,200.0,200.0,103.66,103.05,105.719753,35.84,34.5,36.811934,13.4,12.833333,13.954733,19.0,19.833333,18.152263,82.2,80.333333,82.45679,11.32,11.55,11.466255,9.0,9.0,8.958848,0.5036,0.495,0.500111,34.0,33.0,34.201646,0.3486,0.338167,0.313041,11.8,11.166667,10.687243,58.46,56.233333,58.314815,44.8,42.666667,45.074074,0.54,0.532167,0.53163,17.82,17.083333,15.970782,16.0,15.166667,14.288066,34.0,34.0,34.679012,0.5162,0.5135,0.542395,18.0,17.833333,19.168724,0.42,0.35,0.46263,2.0,1.666667,2.395062
3,2019-11-21,"Winthrop Coliseum, Rock Hill, South Carolina",Winthrop,Tennessee Tech,Away,52.3,52.3,56.101235,15.0,15.0,16.728395,6.52,6.52,6.618519,2.2,2.2,2.333333,90.84,90.84,86.535802,74.1,74.1,74.764198,24.2,24.2,26.654321,0.527,0.527,0.541531,61.8,61.8,62.209877,0.455,0.455,0.467111,28.0,28.0,28.975309,0.2258,0.2258,0.260531,14.2,14.2,16.530864,0.4862,0.4862,0.509358,7.6,7.6,9.469136,1.2,1.2,1.358025,200.0,200.0,200.0,103.36,103.36,106.550617,33.0,33.0,36.482716,11.4,11.4,12.271605,18.2,18.2,18.222222,72.6,72.6,76.802469,7.4,7.4,8.318519,5.2,5.2,5.975309,0.4252,0.4252,0.430716,26.6,26.6,27.148148,0.325,0.325,0.331852,9.0,9.0,9.382716,52.44,52.44,55.488889,35.6,35.6,38.925926,0.5276,0.5276,0.544173,15.68,15.68,16.495062,12.6,12.6,13.666667,35.2,35.2,35.061728,0.549,0.549,0.571531,19.0,19.0,19.592593,0.6534,0.6534,0.65684,1.8,1.8,2.037037,43.14,43.65,37.239095,10.4,10.666667,8.0,6.32,6.166667,6.974074,2.2,2.166667,2.209877,98.64,100.033333,97.430041,71.52,70.933333,75.27284,23.0,22.0,21.288066,0.4242,0.439667,0.398642,56.8,57.0,55.374486,0.367,0.3805,0.343132,21.4,22.166667,19.222222,0.1558,0.147,0.127189,9.2,8.666667,7.271605,0.6868,0.6835,0.689782,6.6,6.166667,5.234568,3.0,2.666667,3.395062,205.0,204.166667,202.469136,79.94,81.633333,73.690123,27.74,27.95,28.074486,9.6,9.5,9.872428,13.2,14.5,13.329218,55.8,57.166667,49.683128,7.74,8.333333,9.016872,5.2,5.666667,5.921811,0.3856,0.396,0.42316,21.2,22.0,23.012346,0.315,0.313833,0.266218,6.4,6.666667,6.004115,48.5,48.15,49.236626,32.6,31.5,31.160494,0.4456,0.459,0.419004,22.36,22.616667,23.020988,17.2,17.5,17.320988,35.6,35.0,32.36214,0.3996,0.426833,0.396086,15.0,15.5,13.218107,0.29,0.241667,0.202959,1.0,0.833333,0.868313
4,2019-11-21,"Maples Pavilion, Stanford, California",Stanford,William & Mary,Home,52.62,52.62,54.062963,15.0,15.0,15.185185,8.92,8.92,8.966667,3.0,3.0,2.975309,82.8,82.8,83.458025,76.68,76.68,77.058025,24.8,24.8,24.382716,0.5796,0.5796,0.586593,54.2,54.2,53.45679,0.5156,0.5156,0.518963,28.0,28.0,27.765432,0.3804,0.3804,0.384519,19.8,19.8,19.950617,0.7644,0.7644,0.772049,14.4,14.4,14.851852,0.0,0.0,0.0,200.0,200.0,200.0,109.34,109.34,110.328395,21.32,21.32,23.088889,6.2,6.2,6.407407,14.6,14.6,14.271605,77.4,77.4,77.62963,12.76,12.76,12.88642,9.0,9.0,9.037037,0.3082,0.3082,0.304247,16.8,16.8,16.333333,0.414,0.414,0.440086,7.0,7.0,7.246914,50.3,50.3,51.52716,31.0,31.0,30.790123,0.6086,0.6086,0.617494,17.28,17.28,18.171605,13.2,13.2,13.888889,37.4,37.4,37.123457,0.5622,0.5622,0.552975,21.0,21.0,20.518519,1.0,1.0,1.0,3.0,3.0,3.395062,53.48,53.48,52.595062,15.0,15.0,14.703704,8.4,8.4,7.064198,4.0,4.0,3.506173,96.78,96.78,96.116049,77.36,77.36,78.709877,30.4,30.4,31.395062,0.5734,0.5734,0.572198,56.8,56.8,57.160494,0.4976,0.4976,0.492778,28.2,28.2,28.098765,0.2544,0.2544,0.222556,14.4,14.4,12.580247,0.7424,0.7424,0.735235,10.4,10.4,8.864198,0.2,0.2,0.333333,200.0,200.0,200.0,105.62,105.62,103.12716,21.88,21.88,19.211111,5.6,5.6,4.987654,15.2,15.2,16.160494,75.4,75.4,74.148148,5.58,5.58,4.751852,4.0,4.0,3.407407,0.3942,0.3942,0.420272,22.4,22.4,24.098765,0.382,0.382,0.380975,8.6,8.6,9.08642,54.42,54.42,53.922222,36.0,36.0,36.382716,0.5946,0.5946,0.588753,17.04,17.04,17.720988,13.0,13.0,13.530864,34.4,34.4,33.061728,0.5742,0.5742,0.578,19.6,19.6,19.012346,0.96,0.96,0.933333,2.8,2.8,3.061728
5,2019-11-22,"Baha Mar Convention Center, Nassau",Morgan State,Liberty,Away,48.92,48.92,49.995062,13.0,13.0,14.209877,19.26,19.26,20.241975,6.2,6.2,6.703704,82.58,82.58,77.65679,78.6,78.6,80.22716,26.2,26.2,26.54321,0.4506,0.4506,0.464235,63.2,63.2,65.45679,0.4112,0.4112,0.423506,26.2,26.2,27.91358,0.3176,0.3176,0.297753,19.6,19.6,19.024691,0.6158,0.6158,0.63984,12.4,12.4,12.481481,1.4,1.4,1.506173,202.0,202.0,200.987654,93.26,93.26,97.204938,38.8,38.8,42.976543,14.0,14.0,15.111111,24.2,24.2,24.567901,69.8,69.8,73.728395,9.02,9.02,9.991358,6.8,6.8,7.62963,0.3256,0.3256,0.322506,20.6,20.6,21.246914,0.2236,0.2236,0.234333,5.0,5.0,5.419753,57.36,57.36,60.453086,40.2,40.2,41.654321,0.479,0.479,0.492358,17.06,17.06,16.659259,14.8,14.8,14.82716,42.6,42.6,44.209877,0.4954,0.4954,0.508951,21.2,21.2,22.493827,0.5866,0.5866,0.607358,1.6,1.6,1.888889,56.64,56.64,55.593827,13.2,13.2,12.345679,11.24,11.24,13.039506,3.4,3.4,3.62963,84.42,84.42,84.898765,77.42,77.42,75.950617,24.4,24.4,23.864198,0.5308,0.5308,0.51042,51.6,51.6,51.024691,0.4496,0.4496,0.429617,23.2,23.2,21.962963,0.3328,0.3328,0.359741,16.2,16.2,17.493827,0.6298,0.6298,0.67642,11.0,11.0,12.419753,0.0,0.0,0.0,200.0,200.0,200.0,106.96,106.96,105.754321,25.34,25.34,22.209877,7.8,7.8,6.950617,14.6,14.6,15.308642,65.8,65.8,64.62963,9.86,9.86,9.702469,6.0,6.0,5.839506,0.444,0.444,0.44458,22.6,22.6,22.419753,0.3712,0.3712,0.368519,8.4,8.4,8.283951,51.94,51.94,49.364198,32.2,32.2,30.814815,0.5552,0.5552,0.543951,13.38,13.38,11.933333,9.2,9.2,8.148148,29.0,29.0,28.604938,0.5224,0.5224,0.486617,14.8,14.8,13.679012,1.0,1.0,1.0,3.0,3.0,3.395062
6,2019-11-22,"Madison Square Garden (IV), New York, New York",Duke,Georgetown,Home,48.84,48.84,51.722222,14.6,14.6,14.641975,14.22,14.22,13.598765,5.6,5.6,5.555556,76.48,76.48,78.02716,80.56,80.56,79.735802,27.6,27.6,26.876543,0.4994,0.4994,0.482864,69.0,69.0,68.308642,0.444,0.444,0.422333,30.8,30.8,28.975309,0.3366,0.3366,0.371914,23.0,23.0,25.17284,0.681,0.681,0.68516,15.4,15.4,17.074074,0.0,0.0,0.0,200.0,200.0,200.0,111.8,111.8,113.054321,37.22,37.22,41.307407,15.2,15.2,17.222222,17.0,17.0,16.345679,84.6,84.6,83.222222,13.48,13.48,12.296296,10.4,10.4,9.320988,0.3472,0.3472,0.365247,23.8,23.8,24.802469,0.327,0.327,0.336716,7.6,7.6,8.197531,58.06,58.06,59.220988,42.8,42.8,44.098765,0.5302,0.5302,0.519481,13.3,13.3,13.223457,12.2,12.2,12.160494,45.2,45.2,43.506173,0.5026,0.5026,0.468556,23.2,23.2,20.777778,1.0,1.0,1.0,3.0,3.0,3.395062,55.18,55.18,53.760494,15.0,15.0,14.407407,11.66,11.66,12.096296,4.2,4.2,4.271605,98.44,98.44,98.924691,76.0,76.0,73.176543,25.6,25.6,23.617284,0.4986,0.4986,0.502654,60.2,60.2,58.876543,0.4522,0.4522,0.457617,27.2,27.2,26.864198,0.4724,0.4724,0.509765,28.2,28.2,29.901235,0.7712,0.7712,0.771185,21.8,21.8,23.098765,0.6,0.6,0.703704,200.0,200.0,200.0,107.78,107.78,111.214815,36.74,36.74,37.848148,12.8,12.8,12.703704,16.2,16.2,15.975309,81.8,81.8,82.123457,8.94,8.94,8.750617,6.8,6.8,6.469136,0.2804,0.2804,0.268099,17.0,17.0,15.91358,0.335,0.335,0.345617,5.6,5.6,5.296296,56.32,56.32,55.454321,38.4,38.4,36.320988,0.5562,0.5562,0.563012,17.14,17.14,15.897531,15.2,15.2,13.802469,43.2,43.2,42.962963,0.5026,0.5026,0.504111,21.6,21.6,21.567901,0.8434,0.8434,0.828444,2.4,2.4,2.691358
7,2019-11-22,"Montego Bay Convention Centre, Montego Bay",Nicholls State,North Carolina A&T,Away,45.06,45.06,42.190123,12.2,12.2,11.358025,7.0,7.0,5.42963,2.8,2.8,2.17284,92.7,92.7,93.055556,61.9,61.9,62.838272,20.6,20.6,20.91358,0.4672,0.4672,0.461037,66.6,66.6,66.382716,0.408,0.408,0.408,27.2,27.2,27.111111,0.17,0.17,0.155136,11.4,11.4,10.358025,0.791,0.791,0.784667,9.0,9.0,8.160494,1.6,1.6,1.888889,205.0,205.0,204.938272,91.9,91.9,89.453086,27.04,27.04,25.406173,10.0,10.0,9.17284,20.2,20.2,20.641975,71.2,71.2,69.345679,14.42,14.42,13.869136,11.2,11.2,10.777778,0.3544,0.3544,0.342889,23.6,23.6,22.728395,0.3306,0.3306,0.303914,7.8,7.8,6.962963,43.78,43.78,43.565432,30.6,30.6,30.08642,0.494,0.494,0.485827,18.5,18.5,18.62716,16.2,16.2,16.209877,43.0,43.0,43.654321,0.4494,0.4494,0.459506,19.4,19.4,20.148148,0.4134,0.4134,0.392642,1.4,1.4,1.506173,47.36,47.36,46.940741,10.2,10.2,9.777778,11.48,11.48,12.917284,3.6,3.6,3.888889,106.14,106.14,111.449383,66.78,66.78,68.566667,20.2,20.2,19.123457,0.428,0.428,0.418543,55.0,55.0,54.283951,0.395,0.395,0.387605,21.8,21.8,21.098765,0.3692,0.3692,0.382605,19.4,19.4,19.876543,0.548,0.548,0.579926,11.0,11.0,12.012346,2.2,2.2,2.592593,200.0,200.0,200.0,80.12,80.12,80.723457,24.56,24.56,24.202469,9.0,9.0,8.641975,21.2,21.2,21.777778,58.2,58.2,57.580247,11.76,11.76,11.088889,8.6,8.6,7.975309,0.26,0.26,0.244346,14.2,14.2,13.259259,0.2362,0.2362,0.229296,3.6,3.6,3.37037,43.22,43.22,43.335802,29.2,29.2,27.765432,0.4534,0.4534,0.452037,21.32,21.32,20.587654,17.2,17.2,16.308642,40.8,40.8,41.024691,0.4402,0.4402,0.42637,18.2,18.2,17.728395,0.2566,0.2566,0.220938,0.8,0.8,0.802469
8,2019-11-22,"TD Arena, Charleston, South Carolina",Florida,Miami (FL),Home,45.9,45.9,44.408642,10.4,10.4,10.481481,11.66,11.66,13.675309,4.2,4.2,5.135802,90.96,90.96,89.946914,73.46,73.46,72.891358,25.4,25.4,26.493827,0.4378,0.4378,0.455877,56.8,56.8,57.271605,0.3908,0.3908,0.406765,22.4,22.4,23.407407,0.3518,0.3518,0.344667,19.6,19.6,19.444444,0.717,0.717,0.705358,14.0,14.0,13.592593,1.2,1.2,1.358025,200.0,200.0,200.0,94.82,94.82,96.520988,27.72,27.72,28.235802,9.6,9.6,9.716049,18.0,18.0,18.62963,64.0,64.0,65.876543,6.18,6.18,6.338272,4.2,4.2,4.345679,0.3642,0.3642,0.359802,20.2,20.2,20.17284,0.2536,0.2536,0.266148,5.2,5.2,5.469136,50.92,50.92,51.546914,35.0,35.0,36.209877,0.4834,0.4834,0.495173,14.0,14.0,13.997531,10.6,10.6,10.666667,36.6,36.6,37.098765,0.4648,0.4648,0.48179,17.2,17.2,17.938272,0.6534,0.6534,0.65684,1.8,1.8,2.037037,41.3,41.3,38.577778,12.0,12.0,11.024691,8.88,8.88,9.298765,3.6,3.6,3.82716,95.36,95.36,96.355556,62.84,62.84,62.061728,22.0,22.0,21.654321,0.505,0.505,0.507778,65.2,65.2,64.333333,0.4428,0.4428,0.439198,28.8,28.8,28.185185,0.2298,0.2298,0.232049,14.6,14.6,14.481481,0.701,0.701,0.734667,10.4,10.4,10.82716,1.0,1.0,1.0,200.0,200.0,200.0,107.02,107.02,106.990123,24.12,24.12,23.091358,8.6,8.6,8.185185,13.8,13.8,14.592593,76.2,76.2,76.074074,9.28,9.28,8.932099,6.6,6.6,6.345679,0.3382,0.3382,0.352444,22.0,22.0,22.592593,0.3632,0.3632,0.385864,8.2,8.2,8.876543,43.3,43.3,42.479012,30.6,30.6,29.839506,0.529,0.529,0.535494,10.26,10.26,10.82963,8.2,8.2,8.580247,43.2,43.2,41.740741,0.4768,0.4768,0.462975,20.6,20.6,19.308642,0.5434,0.5434,0.581531,2.0,2.0,2.395062
9,2019-11-22,"Baha Mar Convention Center, Nassau",Milwaukee,Rice,Away,53.34,53.34,54.718519,16.0,16.0,15.617284,10.0,10.0,9.966667,3.6,3.6,3.481481,86.24,86.24,85.897531,78.24,78.24,76.309877,28.0,28.0,26.888889,0.5216,0.5216,0.515605,67.2,67.2,64.444444,0.4542,0.4542,0.449309,30.4,30.4,28.91358,0.3368,0.3368,0.331012,23.4,23.4,21.604938,0.6784,0.6784,0.667938,15.2,15.2,13.888889,0.8,0.8,0.802469,215.0,215.0,207.407407,106.96,106.96,105.349383,33.32,33.32,32.796296,12.4,12.4,11.925926,23.0,23.0,21.54321,85.0,85.0,80.234568,7.98,7.98,7.538272,6.8,6.8,6.049383,0.3804,0.3804,0.374444,25.4,25.4,23.987654,0.3554,0.3554,0.35658,9.0,9.0,8.518519,55.56,55.56,54.448148,40.4,40.4,38.814815,0.5482,0.5482,0.540506,15.48,15.48,15.746914,14.0,14.0,13.567901,41.8,41.8,40.45679,0.5162,0.5162,0.506556,21.4,21.4,20.395062,0.7434,0.7434,0.779062,2.2,2.2,2.592593,58.28,55.866667,56.728395,15.6,14.166667,14.670782,9.66,8.85,5.787654,4.0,3.666667,2.345679,95.38,97.333333,106.604938,79.0,78.183333,74.335391,27.8,26.5,24.337449,0.5466,0.506167,0.531407,60.2,59.5,58.777778,0.4504,0.423,0.438004,27.0,25.166667,25.695473,0.3152,0.307333,0.361004,18.8,18.166667,21.242798,0.7492,0.724333,0.703403,13.8,13.0,14.790123,1.2,1.166667,1.333333,200.0,200.0,200.0,107.72,98.2,102.569136,24.94,22.5,20.27037,8.6,7.833333,6.987654,15.8,15.833333,16.950617,79.2,73.166667,77.090535,9.0,9.266667,9.120165,6.6,7.0,6.942387,0.4764,0.471333,0.487045,28.6,28.0,28.646091,0.3986,0.3455,0.376934,11.4,9.833333,10.909465,51.78,49.216667,46.065844,36.4,34.333333,31.325103,0.5738,0.535,0.558399,15.6,17.616667,16.792181,12.6,14.5,13.765432,31.6,31.5,30.131687,0.4902,0.483833,0.488774,15.6,15.333333,14.786008,0.6768,0.564,0.610021,2.8,2.333333,2.930041


In [13]:
# Save the dataframe that we just generated

clean_dir('../assets/data/cleaned_data')
data.to_csv('../assets/data/cleaned_data/cleaned_data.csv', index=None)