In [None]:
import cupy as cp
import cuml, cudf
from sklearn.model_selection import train_test_split 
from cuml.linear_model import Ridge
from cuml.neighbors import KNeighborsRegressor
from cuml.svm import SVC
from cuml.ensemble import RandomForestRegressor
from cuml.preprocessing.TargetEncoder import TargetEncoder
from sklearn.model_selection import GroupKFold, KFold
from cuml.metrics import mean_squared_error
from tqdm.notebook import tqdm

In [None]:
train_cr = cudf.read_csv('../input/ncaam-march-mania-2021-spread/MNCAATourneyCompactResults.csv')
train_seeds = cudf.read_csv('../input/ncaam-march-mania-2021-spread/MNCAATourneySeeds.csv')
submission = cudf.read_csv('../input/ncaam-march-mania-2021-spread/MSampleSubmissionStage1.csv')

In [None]:
train_cr.head()

In [None]:
train_seeds.head()

In [None]:
mean_squared_error(cp.array([2]),cp.array([0]), squared=False)

In [None]:
train_seeds['seed_int'] = [int(train_seeds['Seed'][x][1:3]) for x in range(len(train_seeds))]

In [None]:
train_seeds.head()

In [None]:
train_cr.head()

In [None]:
train_seeds.head()

In [None]:
ren1 = {'TeamID':'WTeamID', 'seed_int':'WS'}
ren2 = {'TeamID':'LTeamID', 'seed_int':'LS'}

In [None]:
df1 = cudf.merge(left=train_cr, right=train_seeds.rename(columns=ren1), how='left', on=['Season', 'WTeamID'])
df2 = cudf.merge(left=df1, right=train_seeds.rename(columns=ren2), on=['Season', 'LTeamID'])



In [None]:
df1.head()

In [None]:
df_w = cudf.DataFrame()
df_w['dff'] = df2.WS - df2.LS
df_w['rsl'] = 1
df_w['dff_points'] = df2.WScore - df2.LScore

df_l = cudf.DataFrame()
df_l['dff'] = -df_w['dff']
df_l['rsl'] = 0
df_l['dff_points']= - df_w['dff_points']

df_prd = cudf.concat((df_w, df_l))
df_prd.head()

In [None]:
X = df_prd.dff.values.astype('float32').reshape(-1,1)
y = df_prd.dff_points.values.astype('float32')

In [None]:
X_test = cp.zeros(shape=(len(submission), 1))

for ind, row in submission.to_pandas().iterrows():
    yr, o, t = [int(x) for x in row.ID.split('_')]  
    X_test[ind, 0] = train_seeds[(train_seeds.TeamID == o) & (train_seeds.Season == yr)].seed_int.values[0] - train_seeds[(train_seeds.TeamID == t) & (train_seeds.Season == yr)].seed_int.values[0]

In [None]:
rr_train_oof = cp.zeros((X.shape[0],))
rr_test_preds = 0
rr_train_oof.shape

In [None]:
NUM_FOLDS = 10
kf = KFold(n_splits=NUM_FOLDS, shuffle=True, random_state=0)

for f, (train_ind, val_ind) in tqdm(enumerate(kf.split(X, y))):
        #print(f'Fold {f}')
        train, val = X[train_ind], X[val_ind]
        train_target, val_target = y[train_ind], y[val_ind]
        

            
        model = Ridge()
        model.fit(train, train_target)
        temp_oof = model.predict(val)
        temp_test = model.predict(X_test)

        rr_train_oof[val_ind] = temp_oof
        rr_test_preds += temp_test/NUM_FOLDS
        
        print(mean_squared_error(val_target, temp_oof, squared=False))

In [None]:
rr_train_oof.dtype

In [None]:
print(mean_squared_error(y, rr_train_oof.astype('float32'), squared=False))

In [None]:
knn_train_oof = cp.zeros((X.shape[0],))
knn_test_preds = 0
knn_train_oof.shape

In [None]:
NUM_FOLDS = 10
kf = KFold(n_splits=NUM_FOLDS, shuffle=True, random_state=0)

for f, (train_ind, val_ind) in tqdm(enumerate(kf.split(X, y))):
        #print(f'Fold {f}')
        train, val = X[train_ind], X[val_ind]
        train_target, val_target = y[train_ind], y[val_ind]
        

            
        model = KNeighborsRegressor(n_neighbors=250)
        model.fit(train, train_target)
        temp_oof = model.predict(val)
        temp_test = model.predict(X_test)

        knn_train_oof[val_ind] = temp_oof
        knn_test_preds += temp_test/NUM_FOLDS
        
        print(mean_squared_error(val_target, temp_oof, squared=False))

In [None]:
print(mean_squared_error(y, knn_train_oof.astype('float32'), squared=False))

In [None]:
11.512119

In [None]:
print(mean_squared_error(y, 0.5*rr_train_oof.astype('float32')+0.5*knn_train_oof.astype('float32'), squared=False))

In [None]:
submission.head()

In [None]:
submission.Pred = rr_test_preds   
submission.to_csv('rr_submission.csv', index=False)

In [None]:
submission.Pred = knn_test_preds   
submission.to_csv('knn_submission.csv', index=False)

In [None]:
submission.Pred = 0.5*knn_test_preds+0.5*rr_test_preds
submission.to_csv('blend_submission.csv', index=False)