In [26]:
import pandas as pd
import json

In [27]:
df = pd.read_csv('ncaa_results.csv')

In [28]:
regions = [team for team in df["Region Name"].unique() if team not in ["Final Four", "Championship"]]

In [29]:
df.loc[df['Year'] == 1985]

Unnamed: 0,Year,Round,Region Number,Region Name,Seed,Score,Team,Team.1,Score.1,Seed.1
0,1985,1,1,West,1,83,St Johns,Southern,59,16
1,1985,1,1,West,2,81,VCU,Marshall,65,15
2,1985,1,1,West,3,65,NC State,Nevada,56,14
3,1985,1,1,West,4,85,UNLV,San Diego St,80,13
4,1985,1,1,West,5,58,Washington,Kentucky,65,12
5,1985,1,1,West,6,75,Tulsa,UTEP,79,11
6,1985,1,1,West,7,50,Alabama,Arizona,41,10
7,1985,1,1,West,8,54,Iowa,Arkansas,63,9
8,1985,1,2,East,1,68,Georgetown,Lehigh,43,16
9,1985,1,2,East,2,65,Georgia Tech,Mercer,58,15


In [30]:
def empty_tournament():
    ''' Generate an empty tournament '''
    result = []
    for round_size in [32, 16, 8, 4, 2, 1]:
        round_result = []
        for j in range(round_size):
            round_result.append([None, None])
        result.append(round_result)
    return result

In [31]:
def empty_tournament_by_region():
    result = {}
    for region in regions:
        region_result = []
        for round_size in [8, 4, 2, 1]:
            round_result = []
            for j in range(round_size):
                round_result.append([None, None])
            region_result.append(round_result)
        result[region] = region_result
    result['Final Four'] = [[[None, None], [None, None]]]
    result['Championship'] = [[[None, None]]]
    return result

In [32]:
empty_tournament_by_region()

{'Championship': [[[None, None]]],
 'East': [[[None, None],
   [None, None],
   [None, None],
   [None, None],
   [None, None],
   [None, None],
   [None, None],
   [None, None]],
  [[None, None], [None, None], [None, None], [None, None]],
  [[None, None], [None, None]],
  [[None, None]]],
 'Final Four': [[[None, None], [None, None]]],
 'Midwest': [[[None, None],
   [None, None],
   [None, None],
   [None, None],
   [None, None],
   [None, None],
   [None, None],
   [None, None]],
  [[None, None], [None, None], [None, None], [None, None]],
  [[None, None], [None, None]],
  [[None, None]]],
 'South': [[[None, None],
   [None, None],
   [None, None],
   [None, None],
   [None, None],
   [None, None],
   [None, None],
   [None, None]],
  [[None, None], [None, None], [None, None], [None, None]],
  [[None, None], [None, None]],
  [[None, None]]],
 'Southeast': [[[None, None],
   [None, None],
   [None, None],
   [None, None],
   [None, None],
   [None, None],
   [None, None],
   [None, None

In [34]:
def process_year(df, year):
    result = []
    year_df = df.loc[df['Year'] == year]
    for round_number in year_df.Round.unique():
        round_result = []
        round_df = year_df.loc[year_df["Round"] == round_number]
        for index, row in round_df.iterrows():
            round_result.append([
                {
                    "name": row["Team"],
                    "seed": row["Seed"],
                    "score": row["Score"],
                },
                {
                    "name": row["Team.1"],
                    "score": row["Score.1"],
                    "seed": row["Seed.1"]
                }
            ])
        result.append(round_result)
    return result

In [35]:
def process_year_by_region(df, year):
    ''' Process the year and split up the brackets by region '''
    result = {}
    year_df = df.loc[df['Year'] == year]
    for region in year_df["Region Name"].unique():
        region_df = year_df.loc[year_df['Region Name'] == region]
        region_result = []
        for round_number in region_df.Round.unique():
            round_result = []
            round_df = region_df.loc[region_df["Round"] == round_number]
            for index, row in round_df.iterrows():
                round_result.append([
                    {
                        "name": row["Team"],
                        "seed": row["Seed"],
                        "score": row["Score"],
                    },
                    {
                        "name": row["Team.1"],
                        "score": row["Score.1"],
                        "seed": row["Seed.1"]
                    }
                ])
            region_result.append(round_result)
        result[region] = region_result
    return result

In [36]:
def process_teams(df, year):
    ''' Get the teams and their associated rankings for each region '''
    result = {}
    # filter data frame by year and keep only first round where we see all the teams
    year_df = df.loc[df['Year'] == year]
    regions_df = year_df.loc[year_df['Round'] == 1]
    for region in regions:
        region_result = []
        region_df = regions_df.loc[regions_df['Region Name'] == region]
        for index, row in region_df.iterrows():
            region_result.append({
                "name": row['Team'],
                "seed": row['Seed']
            })
            region_result.append({
                "name": row['Team.1'],
                "seed": row['Seed.1']
            })
        region_result.sort(key=lambda k: k["seed"])
        result[region] = region_result
    return result

In [37]:
process_year_by_region(df, 1985)

{'Championship': [[[{'name': 'Georgetown', 'score': 64, 'seed': 1},
    {'name': 'Villanova', 'score': 66, 'seed': 8}]]],
 'East': [[[{'name': 'Georgetown', 'score': 68, 'seed': 1},
    {'name': 'Lehigh', 'score': 43, 'seed': 16}],
   [{'name': 'Georgia Tech', 'score': 65, 'seed': 2},
    {'name': 'Mercer', 'score': 58, 'seed': 15}],
   [{'name': 'Illinois', 'score': 76, 'seed': 3},
    {'name': 'Northeastern', 'score': 57, 'seed': 14}],
   [{'name': 'Loyola Illinois', 'score': 59, 'seed': 4},
    {'name': 'Iona', 'score': 58, 'seed': 13}],
   [{'name': 'SMU', 'score': 85, 'seed': 5},
    {'name': 'Old Dominion', 'score': 68, 'seed': 12}],
   [{'name': 'Georgia', 'score': 67, 'seed': 6},
    {'name': 'Wichita St', 'score': 59, 'seed': 11}],
   [{'name': 'Syracuse', 'score': 70, 'seed': 7},
    {'name': 'DePaul', 'score': 65, 'seed': 10}],
   [{'name': 'Temple', 'score': 60, 'seed': 8},
    {'name': 'Virginia Tech', 'score': 57, 'seed': 9}]],
  [[{'name': 'Georgetown', 'score': 63, 'see

In [38]:
def process_all_years(df, by_region=False):
    data = {}
    for year in range(1985, 2018):
        data[year] = process_year_by_region(df, year) if by_region else process_year(df, year)
#     data[2018] = empty_tournament_by_region() if by_region else empty_tournament()
    return data

In [39]:
with open('master.json', 'w') as outfile:
    tournaments = process_all_years(df)
    tournaments_by_region = process_all_years(df, True)
    data = {
        "tournaments": tournaments,
        "tournaments_by_region": tournaments_by_region
    }
    json.dump(data, outfile, indent=4)

In [85]:
with open('empty.json', 'w') as outfile:
    json.dump(empty_tournament(), outfile, indent=4, sort_keys=True)
with open('empty_by_region.json', 'w') as outfile:
    json.dump(empty_tournament_by_region(), outfile, indent=4, sort_keys=True)