In [None]:
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import RepeatedKFold
def getSplitsStratKFold(X,
                        y,
                        n_splits,
                        n_repeats = 2,
                        stratified = False,
                        seed = 0):
    

    if stratified:
        kf = RepeatedStratifiedKFold(n_splits = n_splits,
                                     n_repeats = n_repeats,
                                     random_state=seed)
    else:
        kf = RepeatedKFold(n_splits = n_splits,
                           n_repeats = n_repeats,
                           random_state=seed)
    
    # get the splits
    splits = []
    for train_idx_temp,test_idx_temp in kf.split(X,y):
        split = {
            'train_idx': train_idx_temp,
            'test_idx': test_idx_temp
        }
        splits.append(split)
    
    return splits

In [None]:
from sklearn.model_selection import GridSearchCV
def train_and_test(X,
                   y,
                   splits,
                   model_class,
                   score_func,
                   model_args=None,
                   gridcv_params=None,
                   gridcv_args=None,
                   fit_args=None,
                   feature_norm = True,
                   return_models=False
                   ):
    
    if model_args is None:
        model_args = {}
        
    training_sidedata = []
    coefs = []
    models = []
    
    train_scores = []
    test_scores  = []
    
    train_errors = []
    train_errors_true = []
    test_errors  = []
    test_errors_true = []
    
    # instantiate model for each split
    for split in splits:
        
        # construct model
        model = model_class(**model_args)
        
        # whether to do gridcv
        if gridcv_params is not None:
            if gridcv_args is None:
                gridcv_args = {}
            model = GridSearchCV(model,gridcv_params,**gridcv_args)
        
        X_train = X[split['train_idx']]
        y_train = y[split['train_idx']]
        X_test  = X[split['test_idx']]
        y_test  = y[split['test_idx']]
        
        
        # normalize training data
        if feature_norm:
            X_train, fmean, fvar = featurewise_norm(X_train)
            sidedata = {'fmean': fmean, 'fvar': fvar}
            training_sidedata.append(sidedata)
            
        
        # fit model!
        model.fit(X_train,y_train)
        
        
        # normalize test data to training data
        if feature_norm:
            X_test, _ignore,_ignore = featurewise_norm(X_test,
                                                       fmean = fmean,
                                                       fvar = fvar
                                                       )
            
        # test/train predictions
        test_predictions = model.predict(X_test)
        train_predictions = model.predict(X_train)
        
        # get training and testing scores
        test_accu = mean_squared_error(y_test,test_predictions,squared=False)
        train_accu = mean_squared_error(y_train, train_predictions,squared=False)
        
        train_scores.append(train_accu)
        test_scores.append(test_accu)
        
        # get some additional info about the predictions for regression
        # to get error distribution...
        train_errors.append(train_predictions - y_train)
        train_errors_true.append(y_train)
        
        # this is true number so I can plot it..
        test_errors.append(test_predictions  - y_test)
        test_errors_true.append(y_test)
        
        # save model
        if return_models:
            models.append(model)

        
        
    results = {
        'train': train_scores,
        'test': test_scores,
        'training_sidedata': training_sidedata,
        'train_errors':train_errors,
        'train_errors_true':train_errors_true,
        'test_errors': test_errors,
        'test_errors_true': test_errors_true
    }

    if return_models:
        results['models']= models
    
        
    return results