In [None]:
import itertools, gc

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.metrics import log_loss
from sklearn.model_selection import KFold

from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
import xgboost
import lightgbm
import catboost

import warnings
warnings.simplefilter('ignore')

INPUT_PATH = '../input/ncaam-march-mania-2021'

In [None]:
def make_key(seed, col):
    return seed['Season'].astype(str) + '_' + seed[col].astype(str)

def make_slot():
    slot = pd.read_csv('{}/MNCAATourneySlots.csv'.format(INPUT_PATH))
    slot['Round'] = slot['Slot'].str[1].astype(int)
    rounds = slot[slot['Round'] == 1].copy()
    rounds.rename(columns={'Slot': 1}, inplace=True)
    rounds.drop('Round', axis=1, inplace=True)

    slot_uppers = slot[slot['Round'] > 1]
    display(slot_uppers)
    strong_map = slot_uppers.set_index('StrongSeed')['Slot']
    weak_map = slot_uppers.set_index('WeakSeed')['Slot']

    for i in range(2, 7):
        before_slot = rounds[i - 1]

        after_col = i
        rounds[after_col] = before_slot.map(strong_map)
        rounds.loc[rounds[after_col].isnull(), after_col] = before_slot.map(weak_map)

    rounds.set_index(list(range(1, 7)), inplace=True)
    rounds = pd.concat([rounds['StrongSeed'].rename('Seed'), rounds['WeakSeed'].rename('Seed')]).to_frame()
    rounds.reset_index(inplace=True)
    rounds.sort_values('Seed', inplace=True)
    rounds.set_index('Seed', inplace=True)
    
    stack = rounds.stack().to_frame()
    stack.reset_index(inplace=True)
    stack.columns = ['Seed', 'Round', 'Slot']
    
    unique = stack['Seed'].unique()
    product = pd.DataFrame(list(itertools.product(unique, unique)), columns=['T_Seed', 'O_Seed'])
    product = product[product['T_Seed'] != product['O_Seed']]

    t = pd.merge(product, stack.rename(columns={'Seed': 'T_Seed'}), on='T_Seed', how='left')
    t.drop('Round', axis=1, inplace=True)
    t = pd.merge(t, stack.rename(columns={'Seed': 'O_Seed'}), on=['O_Seed', 'Slot'], how='left')
    t.dropna(inplace=True)
    t['Round'] = t['Round'].astype(int)
    return t.groupby(['T_Seed', 'O_Seed'], as_index=False).min()

season = pd.read_csv('{}/MSeasons.csv'.format(INPUT_PATH), parse_dates=['DayZero'])
season.set_index('Season', inplace=True)
print('season:', season.shape)
display(season.head())

seed = pd.read_csv('{}/MNCAATourneySeeds.csv'.format(INPUT_PATH))
seed['Key'] = make_key(seed, 'TeamID')
seed['SeedNo'] = seed['Seed'].str[1:3].astype(int)
seed['SeedPi'] = seed['Seed'].str[3].fillna('')
seed.set_index('Key', inplace=True)
print('seed:', seed.shape)
display(seed.head())

slot = make_slot()
print('slot:', slot.shape)
display(slot.head())