In [1]:
import pandas as pd
import numpy as np

import tensorflow as tf
from tensorflow.keras import layers

np.set_printoptions(precision=3, suppress=True)
#tf.random.set_seed(1)

def get_feat_and_labels(filename):
    data = pd.read_csv(filename)

    data["Angle"] = np.abs(np.arctan2(data["OppY"], data["OppX"]))
    data["OppY"] = np.abs(data["OppY"])
    data = data[["Angle",  "DistanceToGoal", "DistanceToOpp",  "OppX", "OppY","Success"]]
    
    #Converting text yes no to int.
    if(data["Success"].dtype == object):
        data["Success"] = (data["Success"] == "YES")*1
    
    return data


In [2]:
def printYesNoCount(data):
    if type(data) == pd.DataFrame:
        success_no = data[data['Success'] == 0]
        success_yes = data[data['Success'] == 1]
    else:
        success_no = data[data == 0]
        success_yes = data[data == 1]
    print('success no:', success_no.shape[0])
    print('success yes:', success_yes.shape[0])

In [3]:
"""Code used to create training data"""
    
from sklearn.model_selection import train_test_split
    
def get_data(names):
    original_data = get_feat_and_labels(names[0])
    
    for n in names[1:]:
        d = get_feat_and_labels(n)        
        original_data = pd.concat([original_data, d],axis=0)
    
    #printYesNoCount(original_data)
    return original_data

# data_names = [
#                 "..\MotionTestingData\CanShootTolLocal2x.csv",
#                 "..\MotionTestingData\CanShootTolLocal3x.csv",
#                 "..\MotionTestingData\CanShootTolLocal4x.csv",
#                 "..\\NewGoodData\Testfeb8num3\CanShoot2MotionTol2x.csv",
#                 "..\\NewGoodData\Testfeb8num3\CanShoot2MotionTol3x.csv",
#                 "..\\NewGoodData\Testfeb8num3\CanShoot2MotionTol4x.csv",
#                 "..\\NewGoodData\Testfeb8num3\CanShoot2plus75tolerance.csv",
#                 "..\\NewGoodData\Testfeb8num3\CanShoot2Tol3x.csv",
#                 "..\\NewGoodData\Testfeb8num3\CanShoot2Tol4x.csv",

#                 "..\\NewGoodData\Testfeb8num3\CanShoot2Tolerance2x.csv",
#                 "..\\NewGoodData\Originals\CanShoot2Behavior.csv",
#                 "..\\NewGoodData\Originals\CanShoot2Bottom.csv",
#                 "..\\NewGoodData\Originals\CanShoot2Upper.csv",
#                 "..\\NewGoodData\Originals\CanShoot2Mid.csv"
    
#              ]
data_names = [
               "..\MotionTestingData\CanShootStillLocal3x.csv",
                
                "..\MotionTestingData\CanShootTolLocal2x.csv",
                "..\MotionTestingData\CanShootTolLocal3x.csv",
                "..\MotionTestingData\CanShootTolLocal4x.csv",
                
                "..\\NewGoodData\Testfeb8num3\CanShoot2MotionTol2x.csv",
                "..\\NewGoodData\Testfeb8num3\CanShoot2MotionTol3x.csv",
                "..\\NewGoodData\Testfeb8num3\CanShoot2MotionTol4x.csv",
                
                "..\\NewGoodData\Testfeb8num3\CanShoot2plus75tolerance.csv",
                "..\\NewGoodData\Testfeb8num3\CanShoot2Tol3x.csv",
                "..\\NewGoodData\Testfeb8num3\CanShoot2Tol4x.csv",
                "..\\NewGoodData\Testfeb8num3\CanShoot2Tolerance2x.csv",
                
                "..\\NewGoodData\Originals\CanShoot2Behavior.csv",
                
                "..\\NewGoodData\Originals\CanShoot2Bottom.csv",
                "..\\NewGoodData\Originals\CanShoot2Upper.csv",
                "..\\NewGoodData\Originals\CanShoot2Center.csv",
                "..\\NewGoodData\Originals\CanShoot2Mid.csv"

             ]

    
#Load all data  normally the stratified train/valid split would be used created.
train_data = get_data(data_names)


# Train Model loop

In [4]:
def get_model(num_feats):
    model = tf.keras.models.Sequential([
      tf.keras.layers.BatchNormalization(input_dim=num_feats),
      tf.keras.layers.Dense(6, activation='relu'),

      tf.keras.layers.BatchNormalization(),
      tf.keras.layers.Dense(3, activation='swish'),

      tf.keras.layers.Dense(1, activation='sigmoid') 
    ])

    model.compile(
        optimizer=tf.keras.optimizers.Adam(0.05),
        loss=tf.keras.losses.BinaryCrossentropy(from_logits=False),
        metrics=[tf.keras.metrics.BinaryAccuracy(), tf.keras.metrics.Recall(), tf.keras.metrics.Precision()], 
    )
    
    return model


loud_mode = False
def train_model(model, inputs, targets):
    #Model early stops on a validation loss on a split of the training data
#     print(inputs.shape)
    
#     f_train, f_valid, l_train, l_valid = train_test_split(inputs, targets, test_size=0.33, stratify=targets)#, random_state=3, stratify=targets)
    
#     from keras.callbacks import ReduceLROnPlateau
#     reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5,
#                               patience=75, min_lr=0.001)
#     model.fit(
#         f_train,
#         l_train,
#         batch_size=16,
#         epochs = 64,   
#         validation_data= (f_valid, l_valid ),
#         callbacks=[reduce_lr],
#         verbose = loud_mode
#     )


#     #add early stopping
#     early_stop= tf.keras.callbacks.EarlyStopping(
#         monitor='val_loss', min_delta=0, patience=50, verbose=0,
#         mode='min', baseline=None, restore_best_weights=True
#     )

#     model.fit(
#         f_train,
#         l_train,
#         batch_size=16,
#         epochs = 512,   
#         validation_data= (f_valid, l_valid ),
#         callbacks=[reduce_lr, early_stop] ,
#         verbose = loud_mode
#     )


#     inputs = tf.convert_to_tensor(inputs,dtype=tf.float32)
#     targets= tf.convert_to_tensor(targets,dtype=tf.float32)
#     print(inputs.shape)
#     print(targets.shape)

    
    #Reduce lr on loss instead of val_los
    from keras.callbacks import ReduceLROnPlateau
    reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.5,
                              patience=75, min_lr=0.001)
    model.fit(
        inputs,
        targets,
        batch_size=16,
        epochs = 64,   
        callbacks=[reduce_lr],
        verbose = loud_mode
    )


    #Early stop on loss instead of val_loss.
    early_stop= tf.keras.callbacks.EarlyStopping(
        monitor='loss', min_delta=0, patience=50, verbose=0,
        mode='min', baseline=None, restore_best_weights=True
    )

    model.fit(
        inputs,
        targets,
        batch_size=16,
        epochs = 512,   
        callbacks=[reduce_lr, early_stop] ,
        verbose = loud_mode
    )

    


In [5]:
def printResults(name, acc_per_fold, train_idx, test_idx):
    output_file = open(f'CrossValidationResults\\{name}.csv', "w")
    output_file.write('Fold,Accuracy,Train_size,Test_size'+'\n')
    
    print(f'------------------------------------------------------------------------\n{name}\nScore per fold:')
    for i in range(0, len(acc_per_fold)):
        print(f'> Fold {i+1} - Accuracy: {acc_per_fold[i]}%')
        output_file.write(f'{i+1},{acc_per_fold[i]},{len(train_idx[i])},{len(test_idx[i])}'+'\n')
    
    #output_file.write(f'{np.mean(acc_per_fold)},{np.std(acc_per_fold)}'+'\n')
    output_file.close()   
    
    #print('------------------------------------------------------------------------\nAverage scores for all folds:')
    #print(f'> Accuracy: {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})')
    

In [6]:
#referenced:
#https://medium.com/analytics-vidhya/using-the-corrected-paired-students-t-test-for-comparing-the-performance-of-machine-learning-dc6529eaa97f
def CorrectedResampledPairedT_Test(nn_acc_per_fold, lr_acc_per_fold, trains, tests):
    diff = [y - x for y, x in zip(nn_acc_per_fold, lr_acc_per_fold)]
    
    mean_diff = np.mean(diff)
    s2 = np.var(diff, ddof=1)
    
    #Size of folds
    n1 = np.mean(trains)
    n2 = np.mean(tests)
    print(f'n1:{n1} n2:{n2}')
    
    #total number of folds
    K = len(diff)
    S2 = (1/K + n2/n1) * s2
    
    m_s =  mean_diff / np.sqrt(S2)
    
    from scipy.stats import t
    
    #Compute p-value and plot the results 
    Pvalue = ((1 - t.cdf(np.abs(m_s), K-1))*2.0)
    
    print(m_s)
    print(Pvalue)
    print("T", t.cdf(np.abs(m_s), K-1))
    #print("2.276003475")



In [None]:
from sklearn.model_selection import KFold
from sklearn.model_selection import RepeatedStratifiedKFold

#based on: 
# https://github.com/christianversloot/machine-learning-articles/blob/main/how-to-use-k-fold-cross-validation-with-keras.md

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

def ten_ten_fold_cross_val():
    # Combine all of training data for cross_validation
    fold_info = []
    train_inf = []
    test_inf = []
    
    nn_acc_per_fold= []    
    lr_acc_per_fold= []

    # K-fold Cross Validation model evaluation
    fold_no = 1
    
    inputs = train_data.drop(columns="Success")
    targets = train_data["Success"]

    print(inputs.shape)

    #kfold = KFold(n_splits=10, shuffle=True)
    kfold = RepeatedStratifiedKFold(n_splits=10, n_repeats=10)#,random_state=0)
        
    for train, test in kfold.split(inputs, targets):
        print(f'------------------------------------------------------------------------\nTraining for fold {fold_no} ...')
        #Store train test split
        fold_info.append([train,test])
        train_inf.append(train)
        test_inf.append(test)
        print(f'Length of train: {len(train)} \t length of test: {len(test)}')

        # -- Neural Network --
        tf.keras.backend.clear_session()
        cv_nn_model = get_model(inputs.shape[1])

        #Train model using train split.
        train_model(cv_nn_model, inputs.iloc[train], targets.iloc[train])

        # -- Test --
        nn_scores = cv_nn_model.evaluate(inputs.iloc[test], targets.iloc[test], verbose=0)
        nn_acc_per_fold.append(nn_scores[1] * 100)

        print(f'NN: Score for fold {fold_no}: {cv_nn_model.metrics_names[1]} of {nn_scores[1]*100}%; {cv_nn_model.metrics_names[0]} of {nn_scores[0]}')



        # -- Logistic regressor --
        #Fit logistic regressor on train split
        lr_model = LogisticRegression(solver='liblinear')#, random_state=0)
        lr_model.fit(inputs.iloc[train], targets.iloc[train])

        #Get accuracy
        output_lr = lr_model.predict(inputs.iloc[test])
        lr_scores = accuracy_score(targets.iloc[test], output_lr)
        lr_acc_per_fold.append(lr_scores*100)

        print(f'LR: Score for fold {fold_no}: binary_accuracy of {lr_scores*100}%')

        # Increase fold number
        fold_no = fold_no + 1

        
    # == Provide average scores ==
    #Write NN to file
    printResults("NeuralNetwork", nn_acc_per_fold, train_inf, test_inf)
    
    #Write LR to file
    printResults("LinearLogisticRegression", lr_acc_per_fold, train_inf,test_inf)
    
    #CorrectedResampledPairedT_Test(nn_acc_per_fold, lr_acc_per_fold, fold_info)
    
    return nn_acc_per_fold, lr_acc_per_fold, train_inf, test_inf, fold_info
  
nn_acc, lr_acc, trains, tests, f_inf = ten_ten_fold_cross_val()   

(542, 5)
------------------------------------------------------------------------
Training for fold 1 ...
Length of train: 487 	 length of test: 55
NN: Score for fold 1: binary_accuracy of 83.63636136054993%; loss of 0.34699463844299316
LR: Score for fold 1: binary_accuracy of 80.0%
------------------------------------------------------------------------
Training for fold 2 ...
Length of train: 487 	 length of test: 55
NN: Score for fold 2: binary_accuracy of 87.27272748947144%; loss of 0.35817596316337585
LR: Score for fold 2: binary_accuracy of 80.0%
------------------------------------------------------------------------
Training for fold 3 ...
Length of train: 488 	 length of test: 54
NN: Score for fold 3: binary_accuracy of 83.33333134651184%; loss of 0.31542453169822693
LR: Score for fold 3: binary_accuracy of 83.33333333333334%
------------------------------------------------------------------------
Training for fold 4 ...
Length of train: 488 	 length of test: 54
NN: Score for 

In [7]:
# from sklearn.model_selection import KFold
# #based on: 
# # https://github.com/christianversloot/machine-learning-articles/blob/main/how-to-use-k-fold-cross-validation-with-keras.md

# from sklearn.linear_model import LogisticRegression
# from sklearn.metrics import accuracy_score

# def ten_ten_fold_cross_val():
#     # Combine all of training data for cross_validation
#     fold_info = []
#     train_inf = []
#     test_inf = []
    
#     nn_acc_per_fold= []    
#     lr_acc_per_fold= []

#     # K-fold Cross Validation model evaluation
#     fold_no = 1
#     for i in range(10):
        
#         inputs = train_data.drop(columns="Success")
#         targets = train_data["Success"]
        
#         print(inputs.shape)
        
#         kfold = KFold(n_splits=10, shuffle=True)
        
#         for train, test in kfold.split(inputs, targets):
#             print(f'------------------------------------------------------------------------\nTraining for fold {fold_no} ...')
#             #Store train test split
#             #fold_info.append([train,test])
#             train_inf.append(train)
#             test_inf.append(test)
#             print(f'Length of train: {len(train)} \t length of test: {len(test)}')
            
#             # -- Neural Network --
#             tf.keras.backend.clear_session()
#             cv_nn_model = get_model(inputs.shape[1])

#             #Train model using train split.
#             train_model(cv_nn_model, inputs.iloc[train], targets.iloc[train])

#             # -- Test --
#             nn_scores = cv_nn_model.evaluate(inputs.iloc[test], targets.iloc[test], verbose=0)
#             nn_acc_per_fold.append(nn_scores[1] * 100)
            
#             print(f'NN: Score for fold {fold_no}: {cv_nn_model.metrics_names[1]} of {nn_scores[1]*100}%; {cv_nn_model.metrics_names[0]} of {nn_scores[0]}')



#             # -- Logistic regressor --
#             #Fit logistic regressor on train split
#             lr_model = LogisticRegression(solver='liblinear', random_state=0)
#             lr_model.fit(inputs.iloc[train], targets.iloc[train])

#             #Get accuracy
#             output_lr = lr_model.predict(inputs.iloc[test])
#             lr_scores = accuracy_score(targets.iloc[test], output_lr)
#             lr_acc_per_fold.append(lr_scores*100)
            
#             print(f'LR: Score for fold {fold_no}: binary_accuracy of {lr_scores*100}%')

#             # Increase fold number
#             fold_no = fold_no + 1

        
#     # == Provide average scores ==
#     #Write NN to file
#     printResults("NeuralNetwork", nn_acc_per_fold, train_inf, test_inf)
    
#     #Write LR to file
#     printResults("LinearLogisticRegression", lr_acc_per_fold, train_inf,test_inf)
    
#     #CorrectedResampledPairedT_Test(nn_acc_per_fold, lr_acc_per_fold, fold_info)
    
#     return nn_acc_per_fold, lr_acc_per_fold, train_inf, test_inf
  
# nn_acc, lr_acc, trains, tests = ten_ten_fold_cross_val()   

(542, 5)
------------------------------------------------------------------------
Training for fold 1 ...
Length of train: 487 	 length of test: 55
(487, 5)
NN: Score for fold 1: binary_accuracy of 83.63636136054993%; loss of 0.2975699305534363
LR: Score for fold 1: binary_accuracy of 85.45454545454545%
------------------------------------------------------------------------
Training for fold 2 ...
Length of train: 487 	 length of test: 55
(487, 5)
NN: Score for fold 2: binary_accuracy of 83.63636136054993%; loss of 0.38385334610939026
LR: Score for fold 2: binary_accuracy of 81.81818181818183%
------------------------------------------------------------------------
Training for fold 3 ...
Length of train: 488 	 length of test: 54
(488, 5)
NN: Score for fold 3: binary_accuracy of 90.74074029922485%; loss of 0.31074997782707214
LR: Score for fold 3: binary_accuracy of 83.33333333333334%
------------------------------------------------------------------------
Training for fold 4 ...
Leng

KeyboardInterrupt: 

In [8]:
# #print(len(fold_inf))
# tests = []
# for i in range(len(fold_inf)):
#    tests.append(fold_inf[i][0])
#    #print(fold_inf[i][0])
    
# tests = np.array(fold_inf, dtype=int, copy=True)
# print(tests)
# #print(fold_inf[0])

In [8]:
trains_len =[]
tests_len = []
for i in range(len(trains)):
    trains_len.append(len(trains[i]))
    tests_len.append(len(tests[i]))


In [9]:
# print(fold_inf)

# print(trains.shape)
# print(tests)


CorrectedResampledPairedT_Test(nn_acc, lr_acc, trains_len, tests_len)

n1:487.8 n2:54.2
1.9665526132386482
0.052034673938422094
T 0.973982663030789


In [5]:
# def NN_train_and_results(model, inputs, targets, train, test):
#     return


# from sklearn.model_selection import KFold
# #based on: 
# # https://github.com/christianversloot/machine-learning-articles/blob/main/how-to-use-k-fold-cross-validation-with-keras.md

# from sklearn.linear_model import LogisticRegression
# from sklearn.metrics import accuracy_score

# def ten_ten_fold_cross_val():
#     # Combine all of training data for cross_validation
#     inputs = train_data.drop(columns="Success")
#     targets = train_data["Success"]
#     print(inputs.shape)
    
#     kfold = KFold(n_splits=10, shuffle=True)
    
#     nn_acc_per_fold= []
#     nn_loss_per_fold= []
    
#     lr_acc_per_fold= []

#     # K-fold Cross Validation model evaluation
#     fold_no = 1
#     for train, test in kfold.split(inputs, targets):
#         print(f'------------------------------------------------------------------------\nTraining for fold {fold_no} ...')
    
#         # -- Neural Network
#         tf.keras.backend.clear_session()
#         cv_nn_model = get_model(inputs.shape[1])
        
#         #Train model using train split.
#         train_model(cv_nn_model, inputs.iloc[train], targets.iloc[train])
        
#         # -- Test --
#         nn_scores = cv_nn_model.evaluate(inputs.iloc[test], targets.iloc[test], verbose=0)
#         print(f'NN: Score for fold {fold_no}: {cv_nn_model.metrics_names[1]} of {nn_scores[1]*100}%; {cv_nn_model.metrics_names[0]} of {nn_scores[0]}')
#         nn_acc_per_fold.append(nn_scores[1] * 100)
#         nn_loss_per_fold.append(nn_scores[0])
        
    
#         # -- Logistic regressor --
        
#         #Fit logistic regressor on train split
#         lr_model = LogisticRegression(solver='liblinear', random_state=0)
#         lr_model.fit(inputs.iloc[train], targets.iloc[train])
        
#         #Get accuracy
#         output_lr = lr_model.predict(inputs.iloc[test])
#         lr_scores = accuracy_score(targets.iloc[test], output_lr)
        
#         print(f'LR: Score for fold {fold_no}: binary_accuracy of {lr_scores*100}%')
#         lr_acc_per_fold.append(lr_scores)

        
#         # Increase fold number
#         fold_no = fold_no + 1

        
#     # == Provide average scores ==
#     #Write NN to file
#     nn_output_file = open('CrossValidationResults\\NeuralNetwork.csv', "w")
#     nn_output_file.write('Fold,Accuracy,loss'+'\n')
#     print('------------------------------------------------------------------------\nScore per fold\nNeuralNetwork:')
#     for i in range(0, len(nn_acc_per_fold)):
#         print('------------------------------------------------------------------------')
#         print(f'> Fold {i+1} - Loss: {nn_loss_per_fold[i]} - Accuracy: {nn_acc_per_fold[i]}%')
#         nn_output_file.write(f'{i+1},{nn_acc_per_fold[i]},{nn_loss_per_fold[i]}'+'\n')
        
#     nn_output_file.write(f'{np.mean(nn_acc_per_fold)},{np.std(nn_acc_per_fold)},{np.mean(nn_loss_per_fold)}'+'\n')
#     nn_output_file.close()
        
#     print('------------------------------------------------------------------------\nAverage scores for all folds\nnNeuralNetwork:')
#     print(f'> Accuracy: {np.mean(nn_acc_per_fold)} (+- {np.std(nn_acc_per_fold)})')
#     print(f'> Loss: {np.mean(nn_loss_per_fold)}')
    
    
#     #Write LR to file
#     lr_output_file = open('CrossValidationResults\LinearLogisticRegression.csv', "w")
#     lr_output_file.write('Fold,Accuracy'+'\n')
#     print('------------------------------------------------------------------------\nLogisticRegression\nScore per fold:')
#     for i in range(0, len(lr_acc_per_fold)):
#         print('------------------------------------------------------------------------')
#         print(f'> Fold {i+1} - Accuracy: {lr_acc_per_fold[i]}%')
#         lr_output_file.write(f'{i+1},{lr_acc_per_fold[i]}'+'\n')

#     print('------------------------------------------------------------------------\Average scores for all folds:')
#     print(f'> Accuracy: {np.mean(lr_acc_per_fold)} (+- {np.std(lr_acc_per_fold)})')
#     print('------------------------------------------------------------------------')
    
#     lr_output_file.write(f'{np.mean(lr_acc_per_fold)},{np.std(lr_acc_per_fold)}'+'\n')
#     lr_output_file.close()
    
    
#     #Compute the difference between the results
#     #diff = [y - x for y, x in zip(RFC_score, SVM_score)]
#     diff = [y - x for y, x in zip(nn_acc_per_fold, lr_acc_per_fold)]
    
#     #Comopute the mean of differences
#     d_bar = np.mean(diff)
    
#     #compute the variance of differences
#     sigma2 = np.var(diff)
    
#     #compute the number of data points used for training 
#     #n1 = len(y_train)
#     n1 = len(targets.iloc[train])
    
#     #compute the number of data points used for testing 
#     #n2 = len(y_test)
#     n2 = len(targets.iloc[test])
    
#     #compute the total number of data points
#     #n = len(y)
#     n = len(targets)
    
#     #compute the modified variance
#     sigma2_mod = sigma2 * (1/n + n2/n1)
    
#     #compute the t_static
#     t_static =  d_bar / np.sqrt(sigma2_mod)
    
#     from scipy.stats import t
    
#     #Compute p-value and plot the results 
#     Pvalue = ((1 - t.cdf(t_static, n-1))*200)
    
#     Pvalue

  
# ten_ten_fold_cross_val()   

(403, 5)
------------------------------------------------------------------------
Training for fold 1 ...
(362, 5)
NN: Score for fold 1: binary_accuracy of 75.60975551605225%; loss of 0.5046401619911194
LR: Score for fold 1: binary_accuracy of 75.60975609756098%
------------------------------------------------------------------------
Training for fold 2 ...
(362, 5)
NN: Score for fold 2: binary_accuracy of 82.92682766914368%; loss of 0.3634245991706848
LR: Score for fold 2: binary_accuracy of 85.36585365853658%
------------------------------------------------------------------------
Training for fold 3 ...
(362, 5)
NN: Score for fold 3: binary_accuracy of 87.80487775802612%; loss of 0.3177565038204193
LR: Score for fold 3: binary_accuracy of 80.48780487804879%
------------------------------------------------------------------------
Training for fold 4 ...
(363, 5)
NN: Score for fold 4: binary_accuracy of 85.00000238418579%; loss of 0.29204481840133667
LR: Score for fold 4: binary_accur

In [None]:
from sklearn.model_selection import KFold
from sklearn.model_selection import RepeatedStratifiedKFold

#based on: 
# https://github.com/christianversloot/machine-learning-articles/blob/main/how-to-use-k-fold-cross-validation-with-keras.md

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

def ten_ten_fold_cross_val():
    # Combine all of training data for cross_validation
    fold_info = []
    train_inf = []
    test_inf = []
    
    nn_acc_per_fold= []    
    lr_acc_per_fold= []

    # K-fold Cross Validation model evaluation
    fold_no = 1
    
    inputs = train_data.drop(columns="Success")
    targets = train_data["Success"]

    print(inputs.shape)

    #kfold = KFold(n_splits=10, shuffle=True)
    kfold = RepeatedStratifiedKFold(n_splits=10, n_repeats=10)#,random_state=0)
        
    for train, test in kfold.split(inputs, targets):
        print(f'------------------------------------------------------------------------\nTraining for fold {fold_no} ...')
        #Store train test split
        
        oversample = SMOTE()
        train_in, train_targ = oversample.fit_resample(inputs.iloc[train], targets.iloc[train])
        
        fold_info.append([train,test])
        train_inf.append(train)
        test_inf.append(test)
        print(f'Length of train: {len(train)} \t length of test: {len(test)}')

        # -- Neural Network --
        tf.keras.backend.clear_session()
        cv_nn_model = get_model(inputs.shape[1])

        #Train model using train split.
        train_model(cv_nn_model, inputs.iloc[train], targets.iloc[train])

        # -- Test --
        nn_scores = cv_nn_model.evaluate(inputs.iloc[test], targets.iloc[test], verbose=0)
        nn_acc_per_fold.append(nn_scores[1] * 100)

        print(f'NN: Score for fold {fold_no}: {cv_nn_model.metrics_names[1]} of {nn_scores[1]*100}%; {cv_nn_model.metrics_names[0]} of {nn_scores[0]}')



        # -- Logistic regressor --
        #Fit logistic regressor on train split
        lr_model = LogisticRegression(solver='liblinear')#, random_state=0)
        lr_model.fit(inputs.iloc[train], targets.iloc[train])

        #Get accuracy
        output_lr = lr_model.predict(inputs.iloc[test])
        lr_scores = accuracy_score(targets.iloc[test], output_lr)
        lr_acc_per_fold.append(lr_scores*100)

        print(f'LR: Score for fold {fold_no}: binary_accuracy of {lr_scores*100}%')

        # Increase fold number
        fold_no = fold_no + 1

        
    # == Provide average scores ==
    #Write NN to file
    printResults("NeuralNetwork", nn_acc_per_fold, train_inf, test_inf)
    
    #Write LR to file
    printResults("LinearLogisticRegression", lr_acc_per_fold, train_inf,test_inf)
    
    #CorrectedResampledPairedT_Test(nn_acc_per_fold, lr_acc_per_fold, fold_info)
    
    return nn_acc_per_fold, lr_acc_per_fold, train_inf, test_inf, fold_info
  
nn_acc, lr_acc, trains, tests, f_inf = ten_ten_fold_cross_val()   