# Goal
In this notebook, we will reformat the data contained in `MNCAATourneyCompactResults.csv` to better match the required submission format.

## Imports

In [1]:
import pandas as pd

## Read in the data

In [2]:
df = pd.read_csv('../../data/raw/MNCAATourneyCompactResults.csv')
df.head()

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,WLoc,NumOT
0,1985,136,1116,63,1234,54,N,0
1,1985,136,1120,59,1345,58,N,0
2,1985,136,1207,68,1250,43,N,0
3,1985,136,1229,58,1425,55,N,0
4,1985,136,1242,49,1325,38,N,0


## Making a sample row 

In [3]:
row = df.iloc[0]
row

Season     1985
DayNum      136
WTeamID    1116
WScore       63
LTeamID    1234
LScore       54
WLoc          N
NumOT         0
Name: 0, dtype: object

## Row parser

In [4]:
def parse_row(row):
    season = row['Season']
    winning_team_id = row['WTeamID']
    losing_team_id = row['LTeamID']
    if winning_team_id < losing_team_id:
        small_id = winning_team_id
        big_id = losing_team_id
        outcome = 1
    elif losing_team_id < winning_team_id:
        small_id = losing_team_id
        big_id = winning_team_id
        outcome = 0
    record = {
        'Season': season,
        'LowID': small_id,
        'HighID': big_id,
        'Win': outcome
    }
    return record

parse_row(row)

{'Season': 1985, 'LowID': 1116, 'HighID': 1234, 'Win': 1}

## Data Frame Parser

In [5]:
def parse_dataframe(df):
    input_rows = df.to_records()
    output_rows = [parse_row(input_row) for input_row in input_rows]
    out_df = pd.DataFrame(output_rows)
    return out_df
    
parse_dataframe(df[:5])

Unnamed: 0,Season,LowID,HighID,Win
0,1985,1116,1234,1
1,1985,1120,1345,1
2,1985,1207,1250,1
3,1985,1229,1425,1
4,1985,1242,1325,1


## Make and Write Tournament Outcomes

In [6]:
outcomes = parse_dataframe(df)
outcomes.to_csv('../../data/processed/tournament_outcomes.csv')