In [1]:
# load train and test data
def load_train_test_data():
    '''
    This function can be used to load the preprocessed dataset and output the same training and testing data within our 
    different notebooks.

    We decided to do a 85-15 split since our dataset is not very big and we want to maximize the training data while 
    preserving the test data to some extent. <font color='red'>We have a bit more data due to the oversampling now. 
    
    returns: X_train, X_test, y_train, y_test
    '''
    import pandas as pd
    from sklearn.model_selection import train_test_split
    
    # load the preprocessed dataset
    df = pd.read_csv('data/diabetes_dataset_preprocessed.csv')
    
    # split the dataset into features and target
    y = df['Diabetic']
    X = df.drop('Diabetic', axis=1)
    
    # perform the train test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42, stratify=y)

    # print information about the datasets
    print('A snippet of our training data:')
    display(X_train.head())
    print("There are {} entries with {} columns in our training data.".format(X_train.shape[0], X_train.shape[1]))
    print("There are {} entries with {} columns in our testing data.".format(X_test.shape[0], X_test.shape[1]))

    return (X_train, X_test, y_train, y_test)

In [6]:
#model evaluation
def model_eval(model, X_train, X_test, y_train, y_test,acc=True,f1=True,recall=True,precision=True,aoc=True):
    '''
    This function will evaluate most models with the standart metrics selected from the list:
    accuracy | f1 score | recall | precision | aoc 
    
    returns: accuracy,f1 score,recall,precision,aoc 
    '''
    from sklearn.metrics import accuracy_score
    from sklearn.metrics import f1_score
    from sklearn.metrics import recall_score 
    from sklearn.metrics import precision_score 

    active = [acc,f1,recall,precision]
    functions = [accuracy_score,f1_score,recall_score,precision_score]
    
    for i, func in zip(active,functions):
        if i:
            print(f'\nEvaluation: {func.__name__}')
            v_train =  func(y_train, model.predict(X_train))
            v_test = func(y_test, model.predict(X_test))
            print('{0:.2%} for the train data'.format(v_train))
            print('{0:.2%} for the test data'.format(v_test))
        
    return 