In [None]:
import os, random, gc, pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
import datetime
from kaggle.competitions import nflrush
import tqdm
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn import preprocessing
from sklearn.model_selection import KFold, GroupKFold
from sklearn.metrics import mean_squared_error, mean_absolute_error
from scipy.spatial import distance_matrix
from scipy.stats import cumfreq
from scipy.optimize import nnls
import lightgbm as lgb
from joblib import Parallel, delayed
import multiprocessing
import keras
from keras.models import Model
from keras.callbacks import EarlyStopping, ModelCheckpoint, Callback
from keras.engine.saving import load_model
from keras.layers import Dense, Activation, BatchNormalization, Dropout
from keras.layers import Input, Concatenate
from keras.models import Sequential, Model
from keras.layers import Input, Dense, Embedding, Concatenate, Flatten, BatchNormalization, Dropout, Activation, PReLU, Add
from keras.callbacks import ModelCheckpoint
from keras.utils.vis_utils import plot_model
from keras import metrics
from collections import Counter
from scipy.spatial import Voronoi, voronoi_plot_2d, KDTree, ConvexHull 
from shapely.ops import polygonize, unary_union
from shapely.geometry import LineString, MultiPolygon, MultiPoint, Point, Polygon
from shapely.geometry import shape, mapping
import math
import warnings
warnings.filterwarnings("ignore")

In [None]:
mname = 'zoo_keras'
# path = '/kaggle/input/nfl-big-data-bowl-2020/'
path = './'
build_data = True
search = False
nrep = 10
patience = 21
w_holdout = True
perm = False

In [None]:
np.set_printoptions(linewidth=200, precision=6, suppress=True)
pd.set_option('display.float_format', lambda x: '%.6f' % x)
pd.set_option('display.width', 200)

In [None]:
# standardize coordinates, angles, yardline so offense is always driving to the right
# https://www.kaggle.com/kernels/scriptcontent/21906255/
def std_cols(df):
    
    # fix inconsistent team abbreviations
    ita = {'ARZ': 'ARI', 'BLT':'BAL', 'CLV':'CLE', 'HST':'HOU'}
    update = ['PossessionTeam', 'FieldPosition']
    for col in update:
        for old, new in ita.items():
            df.loc[df[col] == old,col] = new
                
    df['X'] = df.apply(lambda x: x.X if x.PlayDirection == 'right'\
                           else 120-x.X, axis=1) 
    
    df['Y'] = df.apply(lambda x: x.Y if x.PlayDirection == 'right'\
                           else 53.3-x.Y, axis=1) 
    
#     # adjust 2017 Orientation as it differs by 90 degrees from 2018 and 2019
#     df.loc[df.Season == 2017, 'Orientation'] = np.mod(df.Orientation + 90, 360)
#     # set angles so 0 degrees is directly downfield for rusher and range -180 to 180
#     df['Orientation'] = df.apply(lambda x: 180 - np.mod(x.Orientation + 90 \
#                                      if x.PlayDirection == 'right' \
#                                      else x.Orientation - 90, 360), axis=1)
    
    df['Dir'] = df.apply(lambda x: 180 - np.mod(x.Dir + 90 \
                                     if x.PlayDirection == 'right' \
                                     else x.Dir - 90, 360), axis=1)
    
    df['YardLine'] = df.apply(lambda x: x.YardLine + 10 \
                              if (x.FieldPosition == x.PossessionTeam) \
                              else 60 + (50-x.YardLine), axis=1)
    
    df.loc[:, 'S'] = 10 * df['Dis']
    
        
    return df

In [None]:
%%time
sns.set_style('darkgrid')
mpl.rcParams['figure.figsize'] = [15,10]

train = pd.read_csv(path + 'train1.csv', dtype={'WindSpeed': 'object'})
print(train.shape)

In [None]:
%%time
# train0 = train.copy()
train = std_cols(train)
# train_df = train.copy()
train_df = train
print(train_df.shape)

In [None]:
df = train_df
mode = 'train'
verbose = True

# make a copy so as not to alter the original df
df = df.copy()

df['OffenseDefense'] = \
df.apply(lambda x: "Offense" if ((x.Team == 'home') \
                                 & (x.PossessionTeam == \
                                    x.HomeTeamAbbr)) | \
                                ((x.Team == 'away') & \
                                 (x.PossessionTeam == \
                                  x.VisitorTeamAbbr)) \
                                else "Defense", axis=1)

df['IsRusher'] = df['NflId'] == df['NflIdRusher']

df.loc[df.IsRusher, 'OffenseDefense'] = "Rusher"

keep = ['GameId','PlayId','X','Y','Dir','S','IsRusher','OffenseDefense']

df = df[keep]

if verbose:
    print(df.shape)

# flip defense direction
# df.loc[df.OffenseDefense=='Defense','Dir'] = df.loc[df.OffenseDefense=='Defense','Dir'] + 180

newdir = df.Dir
# newdir = 0.99*df.Dir + 0.01*df.Orientation
df['SX'] = df.S * np.cos(newdir/180*np.pi) 
df['SY'] = df.S * np.sin(newdir/180*np.pi)

bdf = df.loc[df.IsRusher,['PlayId','X','Y','SX','SY']]
bdf.columns = ['PlayId','Ball_X','Ball_Y','Ball_SX','Ball_SY']

# bdf = df.loc[df.IsRusher,['PlayId','X','Y','S','Dir']]
# bdf.columns = ['PlayId','Ball_X','Ball_Y','Ball_S','Ball_Dir']

df = df.merge(bdf, how='left', on='PlayId')

df['XR'] = df.X - df.Ball_X
df['YR'] = df.Y - df.Ball_Y

df['SXR'] = df.SX - df.Ball_SX
df['SYR'] = df.SY - df.Ball_SY 

# df['SXR'] = (df.S - df.Ball_S) * np.cos(df.Dir/180*np.pi) 
# df['SYR'] = (df.S - df.Ball_S) * np.sin(df.Dir/180*np.pi) 


print(df.head(), df.shape)

# return X

In [None]:
k = ['PlayId','XR','YR','SX','SY','SXR','SYR']
# b = ['Ball_X','Ball_Y','Ball_SX','Ball_SY']
# o = df.loc[df.OffenseDefense=='Offense',k]
o = df.loc[df.OffenseDefense=='Offense',k]
d = df.loc[df.OffenseDefense=='Defense',k]
o.columns = ['PlayId','XRO','YRO','SXO','SYO','SXRO','SYRO']
d.columns = ['PlayId','XRD','YRD','SXD','SYD','SXRD','SYRD']
print(o.shape, d.shape)

In [None]:
od = o.merge(d, how='outer', on='PlayId')
print(od.shape)

In [None]:
od.shape[0]/23171

In [None]:
od.head()

In [None]:
od['XOD'] = od.XRO - od.XRD
od['YOD'] = od.YRO - od.YRD
od['SXOD'] = od.SXO - od.SXD
od['SYOD'] = od.SYO - od.SYD
print(od.head, od.shape)

In [None]:
od.drop(['PlayId','XRO','YRO','SXO','SYO','SXRO','SYRO'], axis=1, inplace=True)
print(od.shape, od.shape[0]/110)

In [None]:
od.describe()

In [None]:
od.head()

In [None]:
# cols = od.columns
# scaler = preprocessing.StandardScaler()
# od = scaler.fit_transform(od)
# od = np.nan_to_num(od)
# od = pd.DataFrame(od, columns=cols)
# od.describe()

In [None]:
# reshape to 4d: play, off, def, feature
x4 = od.values.reshape(-1,10,11,od.shape[1])
print(x4.shape)

In [None]:
# x4 = x4.transpose(0, 2, 1, 3)
# print(x4.shape)

In [None]:
x4[0,0,:5]

In [None]:
# %%time
# if build_data:
#     X0 = cruncher0(train_df, mode='train', verbose=True)

In [None]:
np.isnan(x4).mean()

In [None]:
x4 = np.nan_to_num(x4)

In [None]:
np.isnan(x4).mean()

In [None]:
x4_train = x4.copy()

In [None]:
# target
n = len(train_df) // 22
print(n)

y01_train = train_df["Yards"][::22].values.copy()

y0_train = np.zeros(shape=(n, 199))
for i,yard in enumerate(train_df['Yards'][::22]):
    y0_train[i, yard+99:] = np.ones(shape=(1, 100-yard))


# limit target range
y = y01_train.copy()
MIN = -10
MAX = 35
# MIN = -30
# MAX = 50
y[y < MIN] = MIN
y[y > MAX] = MAX
y -= MIN

num_class = MAX - MIN + 1

y_train = np.zeros(shape=(n, num_class))
for i, yard in enumerate(y):
    y_train[i, yard:] = np.ones(shape=(1, num_class-yard))

y1_train = y

# y_train = np.zeros(len(y_train_),dtype=np.float)
# for i in range(len(y_train)):
#     y_train[i] = (y_train_[i])

# scaler = preprocessing.StandardScaler()
# scaler.fit([[y] for y in y_train])
# y_train = np.array([y[0] for y in scaler.transform([[y] for y in y_train])])

print(y0_train.shape, y01_train.shape)
print(y_train.shape, y1_train.shape)

In [None]:
# y_true is a vector of scalars and y_pred cdfs
def crps0(y_true, y_pred):
    ans = 0
    for i, y in enumerate(y_true):
        h = np.zeros(199)
        yf = int(np.floor(y))
        h[(yf+99):] = 1.0
                
        ans += mean_squared_error(h, y_pred[i])
        
    return ans / (len(y_true))

In [None]:
# enforce monotonicity
def mono(p):
    for pred in p:
        prev = 0
        for i in range(len(pred)):
            if pred[i] < prev:
                pred[i] = prev
            prev = pred[i]
    return p

In [None]:
lgb_params = {
#     'device': 'gpu',
    'objective':'regression_l1',
#     'is_unbalance': True,
    'boosting_type':'gbdt',
    'metric': 'l1',
    'n_jobs': -1,
    'learning_rate': 0.01,
    'num_leaves': 2**6,
    'max_depth': 4,
    'tree_learner':'serial',
    'colsample_bytree': 0.7,
#     'subsample_freq': 1,
    'subsample': 0.7,
    'max_bin': 255,
    'verbose': -1,
    'seed': 123,
} 

# parallelize lgb predictor
# def gb_cox(tr_yc, v):
#     cf = cumfreq(tr_yc + v, numbins=199, defaultreallimits=(-99,100))
#     return cf.cumcount / len(tr_yc) 

def gb_cox(tr_yc, v):
    cf = cumfreq(tr_yc + v, numbins=num_class, defaultreallimits=(0,num_class))
    return cf.cumcount / len(tr_yc) 

In [None]:
# adjust predictions by modified yardline
def adjusty(p, df, y_true=None, reduced=True):
    n = len(p)
#     p = np.cumsum(p, axis=1)
#     p = np.clip(p, 0, 1)
    if reduced:
        pred = np.zeros((n, 199))        
        pred[:, (99+MIN):(100+MAX)] = p
        pred[:, 100+MAX:] = 1
    else:
        pred = p
    cdf = pred.copy()
    for i in range(0,n):
        r = i*22
        y = df["YardLine"].iloc[r] - 10
        
        if y < 99: cdf[i,:(100-y-1)] = 0
        if y > 1: cdf[i,-(y-1):] = 1
                
        # check for improvement, should never be worse
        if y_true is not None:
            mse_orig = mean_squared_error(y_true[i], pred[i])
            mse_new = mean_squared_error(y_true[i], cdf[i])
            if (mse_new > mse_orig):
                print('adjusty inconsistency', i, df["FieldPosition"].iloc[r],
                      df["PossessionTeam"].iloc[r],
                      df["YardLine"].iloc[r], y, df["Yards"].iloc[r], mse_orig, mse_new)
                print(y_true[i])
                print(pred[i])
                print(cdf[i])
                break
            
    return cdf

In [None]:
# adjust predictions by modified yardline
def adjusty2(p, yardline, y_true=None, reduced=True):
    n = len(yardline)
    if reduced:
#         p = np.cumsum(p, axis=1)
#         p = np.clip(p, 0, 1)
        pred = np.zeros((n, 199))        
        pred[:, (99+MIN):(100+MAX)] = p
        pred[:, 100+MAX:] = 1
    else:
        # pred = np.clip(p, 0, 1)
        pred = p
    cdf = pred.copy()
    for i in range(0,n):
        y = yardline[i]
        
        if y < 99: cdf[i,:(100-y-1)] = 0
        if y > 1: cdf[i,-(y-1):] = 1
                
        # check for improvement, should never be worse
        if y_true is not None:
            mse_orig = mean_squared_error(y_true[i], pred[i])
            mse_new = mean_squared_error(y_true[i], cdf[i])
            if (mse_new > mse_orig):
                print('adjusty2 inconsistency', i, y, mse_orig, mse_new)
                print(y_true[i])
                print(pred[i])
                print(cdf[i])
                break
            
    return cdf

In [None]:
def permutation_importance(X, y, model, func, better='smaller', nrep=5): 
    perm = {}
    pred = model.predict(X)
    baseline = func(y, pred)
    print('\nPermutation Importance Baseline Score', baseline)
    for i, c in enumerate(X.columns):
        values = X[c].values.copy()
        dtype = X[c].dtype.name
        score = 0.0
        for r in range(nrep):
            X[c] = np.random.permutation(values)
            X[c] = X[c].astype(dtype) 
            pred = model.predict(X)
            score = score + func(y, pred)
        if better=='smaller':
            perm[c] = score/nrep - baseline
        else:
            perm[c] = baseline - score/nrep
        X[c] = values.copy()
        X[c] = X[c].astype(dtype) 
        print(f'{i} {perm[c]:11.8f} {c}')
    
    df = pd.DataFrame.from_dict(perm, orient='index').reset_index()
    df.columns = ['Feature','Perm']
    
    return df

In [None]:
# feature list and X_list assumed to be nested lists of same sizes, X_list contains numpy arrays
def permutation_importance_list(feature_list, X_list, y, yardline, model, func, better='smaller', nrep=5): 
    perm = {}
    p = model.predict(X_list)
    p = mono(p)
    pred = adjusty2(p, yardline, y_true=y)
    baseline = func(y, pred)
    print('\npermutation importance baseline score', baseline)
    for feat, X in zip(feature_list, X_list):
        if len(feat) == 0: continue
        for c, f in enumerate(feat):
            values = X[...,c].copy()
            score = 0.0
            for r in range(nrep):
                X[...,c] = np.random.permutation(values)
                p = model.predict(X_list)
                p = mono(p)
                pred = adjusty2(p, yardline, y_true=y)
                score = score + func(y, pred)
            if better=='smaller':
                perm[f] = score/nrep - baseline
            else:
                perm[f] = baseline - score/nrep
            X[...,c] = values.copy()
            print(f'{c} {perm[f]:.7f} {f}')
    
    df = pd.DataFrame.from_dict(perm, orient='index').reset_index()
    df.columns = ['Feature','Perm']
    
    return df

In [None]:
keras.backend.clear_session()
import keras.backend as K
def crps(y_true, y_pred):
    loss = K.mean((K.cumsum(y_pred, axis = 1) - y_true)**2)
    return loss

In [None]:
# https://www.kaggle.com/c/nfl-big-data-bowl-2020/discussion/119400#latest-683614
keras.backend.clear_session()

def build_model(inp1, inp2, inp3, units=128, print_summary=False):
    
    keras.backend.clear_session()
    gc.collect()
    
    # inputs
    inputs = keras.layers.Input(shape=(inp1,inp2,inp3))
    
    # 4D
    x = keras.layers.Conv2D(128,(1,1),activation='relu')(inputs)
    x = keras.layers.Conv2D(160,(1,1),activation='relu')(x)
    x = keras.layers.Conv2D(128,(1,1),activation='relu')(x)
    a = keras.layers.AveragePooling2D(pool_size=(inp1,1))(x)
    a = keras.layers.Lambda(lambda x1 : x1*0.7)(a)
    m = keras.layers.MaxPooling2D(pool_size=(inp1,1))(x)
    m = keras.layers.Lambda(lambda x1 : x1*0.3)(m)
    x = keras.layers.Add()([a,m])
    x = keras.layers.Reshape((inp2,units))(x)
    x = keras.layers.BatchNormalization()(x)

    # 3D
    x = keras.layers.Conv1D(160,(1),activation='relu')(x)
    x = keras.layers.BatchNormalization()(x)
    x = keras.layers.Conv1D(96,(1),activation='relu')(x)
    x = keras.layers.BatchNormalization()(x)
    x = keras.layers.Conv1D(96,(1),activation='relu')(x)
    x = keras.layers.BatchNormalization()(x)
    a = keras.layers.AveragePooling1D(pool_size=inp2)(x)
    m = keras.layers.MaxPooling1D(pool_size=inp2)(x)
    x = keras.layers.Average()([a,m])
    x = keras.layers.Flatten()(x)

    # 2D
    x = keras.layers.Dense(96, activation='relu')(x)
    x = keras.layers.BatchNormalization()(x)
#     x = keras.layers.Dropout(0.05)(x)  
    x = keras.layers.Dense(256, activation='relu')(x)
    x = keras.layers.BatchNormalization()(x)
    x = keras.layers.Dropout(0.3)(x)
    
    x = keras.layers.Dense(num_class, activation='sigmoid')(x)
#     x = keras.layers.Dense(num_class, activation='softmax')(x)
    
    model = keras.models.Model(inputs = [inputs], outputs = [x])
    
    opt = keras.optimizers.Adam(learning_rate=2e-3, beta_1=0.9, beta_2=0.999, amsgrad=False)
    model.compile(optimizer=opt, loss='mse')
    
#     model.compile(optimizer='adam', loss='mse')
#     model.compile(optimizer='sgd', loss='mse')
#     model.compile(optimizer='adam', loss=crps)
    
    if print_summary: print(model.summary())
        
    return model

In [None]:
df.reset_index(drop=True, inplace=True)
print(df.shape)

In [None]:
df1 = df[::22].reset_index(drop=True)
print(df1.shape)

In [None]:
%%time
if w_holdout:

#     train_df['week'] = train_df.groupby('PossessionTeam')['GameId'].rank(method='dense')
#     print(train_df['week'].describe())
#     dfw = train_df[['GameId','week']][::22].copy().reset_index(drop=True)
    y_pred = np.zeros((df.shape[0], 199)) 

#     if perm: nrepw = 1
#     else: nrepw = 10
    nrepw = 10
    nepoch = 100
    batch_size = 64
    units = [199] * nrepw
    max_depths = [5] * nrepw

    ncores = multiprocessing.cpu_count()

    if perm:
        os.makedirs('imp',exist_ok=True)

    # collect modeling results in these lists
    models_nn = []
    models_lgb = []
    models_lgb_bi = []
    models_tr_yc = []
    models_w = []
    models_h = []

    ecdfs = []
    escores = []
    bscores = []
    bscorew = []
    bscorea = []
    vscores = []

    n = len(y1_train)
    print('nn train shape', x4_train.shape)
    # print('lgb train shape', X_train1.shape)
    # nn_features = list(X_train0.columns)
    # lgb_features = list(X_train1.columns)
    os.makedirs('imp', exist_ok=True)
    first = True

    for nfold in [1]:

        # kfold = KFold(n_splits=nfold, shuffle=False)

        # kfold = GroupKFold(n_splits=K)
        # groups = train_df['GameId'][::22]

        # groups = 10 * train_df['Season'][::22] + train_df['Week'][::22]

        # kfold = StratifiedKFold(n_splits = K, 
        #                             random_state = 231, 
        #                             shuffle = True)    


        # full_val_preds = np.zeros((n))
        full_val_preds = np.zeros((n,199))

        # test_preds = np.zeros((np.shape(X_test)[0],K))

        # for f, (f_ind, outf_ind) in enumerate(kfold.split(X_train, y_train)):
        # for f, (f_ind, outf_ind) in enumerate(kfold.split(X_train, y_train, groups=groups)):
        for f in range(nfold):
#             f_ind = df[~df.week.between(30, 32)].index
#             outf_ind = df[df.week.between(30, 32)].index
            f_ind = df1[df1.GameId < 2019110000].index
            outf_ind = df1[df1.GameId >= 2019110000].index
            print(len(f_ind), len(outf_ind))

            x4_train_f, x4_val_f = x4_train[f_ind].copy(), x4_train[outf_ind].copy()
            y_train_f, y_val_f = y_train[f_ind], y_train[outf_ind]
            y1_train_f, y1_val_f = y1_train[f_ind], y1_train[outf_ind]
            y0_train_f, y0_val_f = y0_train[f_ind], y0_train[outf_ind]
            y01_train_f, y01_val_f = y01_train[f_ind], y01_train[outf_ind]
    #         sw_f = sw[f_ind] 

            # shuffle data
            idx = np.arange(len(y_train_f))
            np.random.shuffle(idx)
        #     X_train_f = X_train_f[idx]
            y_train_f = y_train_f[idx]
            y1_train_f = y1_train_f[idx]
            y0_train_f = y0_train_f[idx]
            y01_train_f = y01_train_f[idx]
            x4_train_f = x4_train_f[idx]
        #     y_train_f = y_train_f.iloc[idx]

            # track oof prediction for cv scores
            val_preds = 0
            vi = np.array([np.array([v*22 + i for i in range(22)]) for v in outf_ind]).flatten()
            di = train.iloc[vi].copy()
            di = di.reset_index(drop=True)

            # ecdf, to be ensembled with nn prediction, kind of a cox neural net model
            nt = len(y1_train_f)
            nv = len(y1_val_f)
            cf = cumfreq(y1_train_f, numbins=199, defaultreallimits=(-99,100))
            ecdf = cf.cumcount / nt
            ecdfs.append(ecdf)
            ecdfr = ecdf.repeat(nv).reshape(199,nv).transpose()
            escore = mean_squared_error(y0_val_f, ecdfr)

            print('')
            print('*'*10)
            print(f'Fold {f+1}/{nfold}')
            print('*'*10)

            print('')
            print(f'escore {escore:.6f}')
            escores.append(escore)

            for j in range(nrepw):

                print('')
                print(f'Rep {j+1}/{nrepw}')

                model= build_model(x4_train.shape[1], x4_train.shape[2], x4_train.shape[3],
                    print_summary=first)
                if first: first = False

                es = EarlyStopping(monitor='val_loss', 
                   mode='min',
                   restore_best_weights=True, 
                   verbose=2, 
                   patience=patience)
                es.set_model(model)
                
                lr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5,
                                                       patience=10, verbose=2, mode='min',
                                                       min_delta=0.00001)
                
#                 oc = OneCycleLR(0.0005)

                h = model.fit(x4_train_f, y_train_f, epochs=nepoch,
                          # sample_weight=sw_f,
                          # default batch_size is 32
                          batch_size=batch_size,
                          callbacks=[es, lr],
                          # large batch sizes tend to perform poorly
        #                   batch_size=2**(j+10),
                          validation_data=(x4_val_f, y_val_f),
                          verbose=2)

                models_nn.append(model)
#                 models_w.append(0.5 / len(fold_list) / nfold / nrep)
                models_h.append(h)

                vp = model.predict(x4_val_f)
                vp = mono(vp)
                vp = adjusty(vp, di, y0_val_f)
                vs = mean_squared_error(y0_val_f,vp)
                print(f'nn crps {vs:.6f}')

#                 # lgb
#                 # print('')
#                 tr_data = lgb.Dataset(X_train_f1, label=y1_train_f)
#                 vl_data = lgb.Dataset(X_val_f1, label=y1_val_f) 
#                 # vary max_depth with rep
#                 lgb_params['max_depth'] = max_depths[j]
#                 lgb_params['seed'] = 123 + j
#                 clf = lgb.train(lgb_params, tr_data, valid_sets=[tr_data, vl_data],
#                                 num_boost_round=20000, early_stopping_rounds=100,
#                                 verbose_eval=0)
#                 models_lgb.append(clf)
#                 models_lgb_bi.append(clf.best_iteration)

#                 vpl = clf.predict(X_val_f1, num_iteration=clf.best_iteration)
#                 # lgb cox model, shift ecdf so its median is at lgb point prediction
#                 tr_yc = y1_train_f - np.median(y1_train_f)
#                 models_tr_yc.append(tr_yc)
#                 cl = Parallel(n_jobs=ncores)(delayed(gb_cox)(tr_yc,v) for v in vpl)
#                 c = np.concatenate(cl).reshape(-1,num_class)
#                 c = mono(c)
#                 c = adjusty(c, di, y0_val_f)
#                 print(f'lgb crps {mean_squared_error(y0_val_f,c):.6f}')

#                 # nonnegative least squares to estimate ensemble weights
#                 b = y0_val_f.flatten()
#                 A = np.zeros((len(b),3))
#                 A[:,0] = vp.flatten()
#                 A[:,1] = c.flatten()
#                 A[:,2] = ecdfr.flatten()
#                 bestw = nnls(A,b)[0]
#                 besta = np.matmul(A,bestw).reshape(-1,199)
#                 besta = mono(besta)
#                 besta = adjusty(besta, di, y0_val_f, reduced=False)
#                 bscore = bests = mean_squared_error(y0_val_f, besta)

#                 # print('')        
#                 print(f'bscore {bests:.6f} {bestw}')
#                 bscores.append(bests)
#                 bscorew.append(bestw)
#                 bscorea.append(besta)

#                 val_preds += besta / nrepw

                bscores.append(vs)
                val_preds += vp / nrepw

                # test_preds[:,f] += model.predict(proc_X_test_f)[:,0] / nrep


                if perm:
                    ff = str(nfold) + '_' + str(f+1)
                    
#                     feature_imp = pd.DataFrame(zip(lgb_features, clf.feature_importance(),
#                                                    clf.feature_importance(importance_type='gain')),
#                                                    columns=['Feature','Splits'+ff,'Gain'+ff])
                    
#                     perm_imp = permutation_importance(X_val_f1,
#                                                       y1_val_f, clf,
#                                                       mean_absolute_error)
#                     perm_imp.columns = ['Feature','Perm'+ff]
#                     feature_imp = feature_imp.merge(perm_imp, how='left', on='Feature')

                    yardline = train_df['YardLine'][::22].values - 10
                    perm_imp = permutation_importance_list([list(od.columns)],
                                                           [x4_val_f],
                                                           y0_val_f, yardline[outf_ind],
                                                           model, mean_squared_error)

#                     perm_imp = permutation_importance(X_val_f0,
#                                                       y_val_f, model,
#                                                       mean_squared_error)

                    perm_imp.columns = ['Feature','PermNN'+ff]
                    perm_imp = perm_imp.sort_values(by='PermNN'+ff, ascending=False).reset_index(drop=True)
                    print()
                    print(perm_imp.head(n=50))
#                     print()
#                     print(perm_imp.tail(n=50))
    
                    # feature_imp = feature_imp.merge(perm_imp, how='left', on='Feature')

#                     feature_imp.sort_values(by='Splits'+ff, inplace=True, ascending=False)
#                     print('')
#                     print(feature_imp.head(n=10))

#                     feature_imp.sort_values(by='Gain'+ff, inplace=True, ascending=False)
#                     print('')
#                     print(feature_imp.head(n=10))

#                     feature_imp.sort_values(by='Perm'+ff, inplace=True, ascending=False)
#                     print('')
#                     print(feature_imp.head(n=15))

#                     feature_imp.sort_values(by='PermNN'+ff, inplace=True, ascending=False)
#                     print('')
#                     print(feature_imp.head(n=15))

#                     print(feature_imp.shape)

#                     fname = 'imp/' + mname + '_imp' + ff + '.csv'
#                     perm_imp.to_csv(fname, index=False)
#                     print(fname, feature_imp.shape)

                gc.collect()

            val_preds = mono(val_preds)
            val_preds = adjusty(val_preds, di, y0_val_f, reduced=False)
            full_val_preds[outf_ind] += val_preds
            vscore = mean_squared_error(y0_val_f, val_preds)
            print(f'\nvscore {vscore:.6f}')
            vscores.append(vscore)

        #     if f == 0: break

        nfh = int(np.ceil(nfold / 2))
        nfq = int(np.ceil(nfold / 4))

        print('')
        print(f'\nAll bscores {np.array(bscores)}')
        print('Mean bscores: %.6f' % np.mean(bscores))
        print('Mean vscores: %.6f' % np.mean(vscores))
    #         print('Mean vscores last half: %.6f' % np.mean(vscores[-nfh:]))
    #         print('Mean vscores last quar: %.6f' % np.mean(vscores[-nfq:]))
    #     print('Mean ecdf weights last half: %.6f' % np.mean(bscorew[-nfh*nrep:]))
    #     print('Mean ecdf weights last quar: %.6f' % np.mean(bscorew[-nfq*nrep:]))
#         print(f'\nAll bscores {np.array(bscores)}')
        # print(f'\nAll vscores {np.array(vscores)}')
#         print(f'\nAll lgb iters {np.array(models_lgb_bi)}')
#         print(f'\nAll ecdf weights {bscorew}')