In [None]:
import os
import numpy as np
import pandas as pd

# Combine Team ID's with Game ID's for Master List

- Open Team ID's File
- Open Game ID's File
- Create Master List (DataFrame) of Game ID's per Team

### Open Team ID's File to be used for both 2016 and 2016 Game ID's

In [None]:
# path to team_ids.csv file
team_ids_path = 'data/id_files/team_ids.csv'

# open team_ids.csv file, select 'id', 'name', 'abbr' columns, set 'id' as index
df_team_ids = pd.read_csv(team_ids_path, usecols=['id','name', 'abbr'], index_col='id')

In [None]:
# confirm there are 30 MLB teams and 2 All-Star teams (AL, NL) 
df_team_ids.info()

### Open Game ID's Files

In [None]:
# path to game_ids.csv files for 2016 and 2017
game_ids_2016_path = 'data/id_files/game_ids_2016.csv'
game_ids_2017_path = 'data/id_files/game_ids_2017.csv'

# open game_ids.csv file to combine with team IDs
df_game_ids_2016 = pd.read_csv(game_ids_2016_path, index_col='id')
df_game_ids_2017 = pd.read_csv(game_ids_2017_path, index_col='id')

In [None]:
# confirm there are 2,430 game id's
# (162 games * 15 team pairs) + 1 all-star game = 2,431
# 26 games have been rescheduled for various reasons
df_game_ids_2016.info()

In [None]:
# confirm there are 2,430 game id's
# (162 games * 15 team pairs) + 1 all-star game = 2,431
# 39 games have been rescheduled for various reasons
df_game_ids_2017.info()

### Function to associate Game ID's to Team ID's

In [None]:
# check_id function to return datafram of combined team and game id's
def check_id(team_frame, game_frame):
    """
    Create new DataFrame with combined Team and Game ID's.

    Keyword arguments:
    team_frame -- pd.DataFrame containing unique Team ID's
    game_frame -- pd.DataFrame containing unique Game ID's
    """
    # list to store selected values
    temp_list = []
    # list of column names for new dataframe
    columns = ['team_id', 'team_name', 'team_abbr', 'game_id', 
               'at_away', 'at_home', 'scheduled', 'rescheduled']
    
    for t_idx, t_row in team_frame.iterrows(): # iterate through each row of team id's
        for g_idx, g_row in game_frame.iterrows(): # iterate through each row of game id's
            if t_idx == g_row['home_team']: # check if team id == home_team id in game_id row
                temp_list.append([t_idx, t_row['name'], t_row['abbr'], g_idx, 0, 1, g_row['scheduled'], 
                                g_row['rescheduled']])
            elif t_idx == g_row['away_team']: # check if team id == away_team id in game_id row
                temp_list.append([t_idx, t_row['name'], t_row['abbr'], g_idx, 1, 0, g_row['scheduled'], 
                                g_row['rescheduled']])
    # create and return new DataFrame with combined team and game id's
    new_frame = pd.DataFrame(temp_list, columns=columns) 
    return new_frame 

# test check_id function        
#result = check_id(df_team_ids, df_game_ids)

In [None]:
df_combined_ids_2016 = check_id(df_team_ids, df_game_ids_2016)
df_combined_ids_2017 = check_id(df_team_ids, df_game_ids_2017)

## Save Combined Team and Game ID's to File

In [None]:
df_combined_ids_2016.to_csv('data/id_files/combined_ids_2016.csv', index=False)
df_combined_ids_2017.to_csv('data/id_files/combined_ids_2017.csv', index=False)

In [None]:
# path to team_ids.csv file
path_2016 = 'data/id_files/combined_ids_2016.csv'
path_2017 = 'data/id_files/combined_ids_2017.csv'

# open game_ids.csv file to combine with team IDs
combined_ids_2016 = pd.read_csv(path_2016)
combined_ids_2017 = pd.read_csv(path_2017)

In [None]:
combined_ids_2016.info()

In [None]:
combined_ids_2017.info()