In [10]:
import pandas as pd
import numpy as np
import json
import os
import string
import re
import random

import nltk
from nltk import word_tokenize
from nltk.corpus import (wordnet, stopwords)
from nltk.stem.snowball import SnowballStemmer

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import matplotlib.pyplot as plt

from sklearn.linear_model import (LogisticRegression, LogisticRegressionCV)
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.feature_selection import RFE
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import (confusion_matrix, 
                             recall_score, 
                             f1_score, 
                             accuracy_score, 
                             precision_score,
                             roc_curve, auc, roc_auc_score)

import warnings
warnings.filterwarnings('ignore')

from datetime import *
from bisect import bisect

In [11]:
email = pd.read_csv('features_id_text.csv')
#email.index = email['Unnamed: 0']
email = email.drop('Unnamed: 0', axis = 1)
list(email)

['likes',
 'n_tweets',
 'replies',
 'retweets',
 'time_business',
 'time_early',
 'time_evening',
 'time_late',
 'weekday_mean',
 'wkday_0',
 'wkday_1',
 'wkday_2',
 'wkday_3',
 'wkday_4',
 'wkday_5',
 'wkday_6',
 'LDA_0',
 'LDA_1',
 'LDA_2',
 'LDA_3',
 'LDA_4',
 'LDA_5',
 'LDA_6',
 'LDA_7',
 'LDA_8',
 'LDA_9',
 'LDA_10',
 'LDA_11',
 'LDA_12',
 'LDA_13',
 'LDA_14',
 'LDA_15',
 'LDA_16',
 'LDA_17',
 'LDA_18',
 'LDA_19',
 'act',
 'actual',
 'aint',
 'alreadi',
 'alway',
 'amaz',
 'ani',
 'annoy',
 'anoth',
 'answer',
 'anymor',
 'anyon',
 'anyth',
 'ask',
 'ass',
 'away',
 'babi',
 'bad',
 'beat',
 'beauti',
 'becaus',
 'becom',
 'bed',
 'befor',
 'believ',
 'best',
 'better',
 'big',
 'birthday',
 'bitch',
 'black',
 'bless',
 'block',
 'bodi',
 'bore',
 'bout',
 'boy',
 'break',
 'bring',
 'bro',
 'broke',
 'busi',
 'buy',
 'came',
 'car',
 'care',
 'catch',
 'caus',
 'chang',
 'check',
 'child',
 'chill',
 'close',
 'cold',
 'color',
 'come',
 'cool',
 'coupl',
 'cousin',
 'crazi',
 '

In [12]:
full_data = email
#drop the user in turkish
full_data = full_data.drop(full_data[full_data.index == 2697].index)
#drop not common tweeters
#full_data = full_data.drop(full_data[full_data.n_tweets == 1].index)
#full_data = full_data.drop(full_data[full_data.n_tweets > 800].index)

In [13]:
def create_folds(full_data, n_folds, rand = 15): 
    X = full_data.drop(['variable', 'binary_label'], axis = 1).as_matrix()
    y = full_data.as_matrix(['binary_label'])
    ints = [list(test_index) 
            for train_index, test_index in StratifiedKFold(n_folds, shuffle = True, random_state = rand).split(X, y)]
    return [full_data.iloc[ints[i],:] for i in range(n_folds)]

In [14]:
def binary_metrics (test, label, pred):
    acc = accuracy_score(test[label], test[pred])
    f1 = f1_score(test[label], test[pred])
    prec = precision_score(test[label], test[pred])
    rec = recall_score(test[label], test[pred])
    roc_auc = roc_auc_score(test[label], test[pred])
    
    return roc_auc, acc, prec, rec, f1

def plot_metrics (test, label, pred):
    tn, fp, fn, tp = confusion_matrix(test[label], test[pred]).ravel()
    roc_auc, acc, prec, rec, f1 = binary_metrics(test, label, pred)
    roc = roc_curve(test[label], test[pred])
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(test[label].nunique()):
        fpr[i], tpr[i], _ = roc_curve(test[label], test[pred])
        roc_auc[i] = auc(fpr[i], tpr[i])

    # Compute micro-average ROC curve and ROC area
    fpr["micro"], tpr["micro"], _ = roc_curve(test[label].ravel(), test[pred].ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

    plt.figure(figsize=(8,5))
    lw = 2
    plt.plot(fpr[1], tpr[1], color='gold',
             lw=lw, label='ROC curve (area = %0.2f)' % roc_auc[1])
    plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic Example')
    plt.legend(loc="lower right")
    plt.show()
    
    show_confusion_matrix(test, label, pred)
    
    return roc_auc[0], acc, prec, rec, f1

def show_confusion_matrix(test, label, pred):
    
    C = confusion_matrix(test[label], test[pred])
    tn, fp, fn, tp = C.ravel()
    
    NP = fn+tp # Num positive examples
    NN = tn+fp # Num negative examples
    N  = NP+NN

    fig = plt.figure(figsize=(8,8))
    ax  = fig.add_subplot(111)
    ax.imshow(C, cmap=plt.cm.gray)

    # Draw the grid boxes
    ax.set_xlim(-0.5,2.5)
    ax.set_ylim(2.5,-0.5)
    ax.plot([-0.5,2.5],[0.5,0.5], '-k', lw=2)
    ax.plot([-0.5,2.5],[1.5,1.5], '-k', lw=2)
    ax.plot([0.5,0.5],[-0.5,2.5], '-k', lw=2)
    ax.plot([1.5,1.5],[-0.5,2.5], '-k', lw=2)


    # Set xlabels
    ax.set_xlabel('Predicted Label', fontsize=16)
    ax.set_xticks([0,1,2])
    ax.set_xticklabels(['Show', 'No Show'])
    ax.xaxis.set_label_position('top')
    ax.xaxis.tick_top()
    # These coordinate might require some tinkering. Ditto for y, below.
    ax.xaxis.set_label_coords(0.34,1.06)

    # Set ylabels
    ax.set_ylabel('True Label', fontsize=16, rotation=90)
    ax.set_yticklabels(['Bad', 'Good'])
    ax.set_yticks([0,1,2])
    ax.yaxis.set_label_coords(-0.09,0.65)


    # Fill in initial metrics: tp, tn, etc...
    ax.text(0,0,'True Negatives: %d\n(Total Negatives: %d)'%(tn,NN),
            va='center', ha='center', bbox=dict(fc='w',boxstyle='round,pad=1'))
    ax.text(0,1,'False Negatives: %d'%fn,
            va='center', ha='center', bbox=dict(fc='w',boxstyle='round,pad=1'))
    ax.text(1,0,'False Positives: %d'%fp,
           va='center', ha='center', bbox=dict(fc='w',boxstyle='round,pad=1'))
    ax.text(1,1,'True Positives: %d\n(Total Positives: %d)'%(tp,NP),
            va='center', ha='center', bbox=dict(fc='w',boxstyle='round,pad=1'))
    # Fill in secondary metrics: accuracy, true pos rate, etc...
    ax.text(2,0,'True Negative Rate' + '\n' +'(Specificity):%.2f'%(tn / (fp+tn+0.)),
            va='center', ha='center', bbox=dict(fc='w',boxstyle='round,pad=1'))
    ax.text(2,1,'True Positive Rate' + '\n' + '(Sensitivity):%.2f'%(tp / (tp+fn+0.)),
            va='center', ha='center', bbox=dict(fc='w',boxstyle='round,pad=1'))
    ax.text(2,2,'F-1 Score: %.2f'%(round(2*tp/((2*tp) + fp + fn),3)),
            va='center', ha='center', bbox=dict(fc='w',boxstyle='round,pad=1'))
    ax.text(0,2,'Negative Predictive ' + '\n' + 'Value: %.2f'%(1-fn/(fn+tn+0.)),
           va='center', ha='center', bbox=dict(fc='w',boxstyle='round,pad=1'))
    ax.text(1,2,'Positive Predictive ' + '\n' + 'Value: %.2f'%(tp/(tp+fp+0.)),
            va='center', ha='center', bbox=dict(fc='w',boxstyle='round,pad=1'))
    plt.tight_layout()
    plt.show()
    
    return None

In [15]:
def run_model_rfc (full, test, label, pred, plot = False):
    train = pd.concat([full, test]).drop_duplicates(keep=False)
    trainArr = train.drop(['variable', 'binary_label'], axis = 1).as_matrix()
    trainRes = train.as_matrix(['binary_label'])
    testArr = test.drop(['variable', 'binary_label'], axis = 1).as_matrix()
    
    param_test = {'n_estimators':np.arange(20,111,10).tolist(), 
                   'max_features':np.arange(0.1,1,0.1).tolist()
                   , 'class_weight':['balanced', None]
                   ,'criterion':['gini', 'entropy']
                  }
    gridsearch = GridSearchCV(estimator = RandomForestClassifier(random_state=10),
                              param_grid = param_test, scoring = 'roc_auc', n_jobs=4,iid=False, cv=5)

    gridsearch.fit(trainArr,trainRes)
    
    predictions = gridsearch.predict(testArr)
    
    data = pd.DataFrame(list(test[label]),columns=[label], index = test.index)
    data[pred] = pd.Series(list(predictions), index=data.index) 
    
    if plot:
        roc_auc, acc, prec, rec, f1 = plot_metrics(data, label, pred)
    else:
        roc_auc, acc, prec, rec, f1 = binary_metrics(data, label, pred)    
    
    return data, [roc_auc, acc, prec, rec, f1]

In [16]:
def lr_model(full, test, label, pred, plot = False, CV = False, rfe = True):
    train = pd.concat([full, test]).drop_duplicates(keep=False)
    trainArr = train.drop(['variable', 'binary_label'], axis = 1).as_matrix()
    trainRes = train.as_matrix(['binary_label'])
    testArr = test.drop(['variable', 'binary_label'], axis = 1).as_matrix()
    
    if CV:
        estimator = LogisticRegressionCV(scoring = 'roc_auc')
    else:
        param_test = {'C':[0.0001,0.001,0.01,0.1,1,10,100,1000],
                       'solver':['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']}
        gridsearch = GridSearchCV(estimator = LogisticRegression(class_weight = 'balanced'),
                                  param_grid = param_test, scoring = 'roc_auc').fit(trainArr,trainRes)
        estimator = LogisticRegression(C=list(gridsearch.best_params_.values())[0],
                                       solver=list(gridsearch.best_params_.values())[1])
    if rfe:
        selector = RFE(estimator, step=1)
    
    else:
        selector = estimator
    
    selector = selector.fit(trainArr, trainRes)
    predictions = selector.predict(testArr)
        
    data = pd.DataFrame(list(test[label]),columns=[label], index = test.index)
    data[pred] = pd.Series(list(predictions), index=data.index)                
    
    if plot:
        roc_auc, acc, prec, rec, f1 = plot_metrics(data, label, pred)
    else:
        roc_auc, acc, prec, rec, f1 = binary_metrics(data, label, pred)
    
    return data, [roc_auc, acc, prec, rec, f1]

In [17]:
fold1, fold2, fold3, fold4, fold5 = create_folds(full_data,5, rand = 12)

In [19]:
label, pred = 'binary_label', 'predictions'

t1, b1 = run_model_rfc(full_data, fold1, label, pred)
t2, b2 = run_model_rfc(full_data, fold2, label, pred)
t3, b3 = run_model_rfc(full_data, fold3, label, pred)
t4, b4 = run_model_rfc(full_data, fold4, label, pred)
t5, b5 = run_model_rfc(full_data, fold5, label, pred)

binary = pd.DataFrame([b1, b2, b3, b4, b5])
binary.columns = ['ROC AUC Score','Accuracy', 'Recall', 'Precision', 'F-1 Score']
binary.index = ['Fold 1', 'Fold 2', 'Fold 3', 'Fold 4', 'Fold 5']
binary.loc['Mean'] = binary.mean()
binary

Unnamed: 0,ROC AUC Score,Accuracy,Recall,Precision,F-1 Score
Fold 1,0.478022,0.481481,0.5,0.571429,0.533333
Fold 2,0.521978,0.518519,0.545455,0.428571,0.48
Fold 3,0.557692,0.555556,0.583333,0.5,0.538462
Fold 4,0.521978,0.518519,0.545455,0.428571,0.48
Fold 5,0.593407,0.592593,0.615385,0.571429,0.592593
Mean,0.534615,0.533333,0.557925,0.5,0.524877


In [20]:
t1, b1 = lr_model(full_data, fold1, label, pred, rfe = False)
t2, b2 = lr_model(full_data, fold2, label, pred, rfe = False)
t3, b3 = lr_model(full_data, fold3, label, pred, rfe = False)
t4, b4 = lr_model(full_data, fold4, label, pred, rfe = False)
t5, b5 = lr_model(full_data, fold5, label, pred, rfe = False)

binary = pd.DataFrame([b1, b2, b3, b4, b5])
binary.columns = ['ROC AUC Score','Accuracy', 'Recall', 'Precision', 'F-1 Score']
binary.index = ['Fold 1', 'Fold 2', 'Fold 3', 'Fold 4', 'Fold 5']
binary.loc['Mean'] = binary.mean()
binary

### inter fold variation
### new models to try

Unnamed: 0,ROC AUC Score,Accuracy,Recall,Precision,F-1 Score
Fold 1,0.478022,0.481481,0.5,0.571429,0.533333
Fold 2,0.521978,0.518519,0.545455,0.428571,0.48
Fold 3,0.601648,0.592593,0.714286,0.357143,0.47619
Fold 4,0.5,0.518519,0.518519,1.0,0.682927
Fold 5,0.538462,0.555556,0.538462,1.0,0.7
Mean,0.528022,0.533333,0.563344,0.671429,0.57449


In [21]:
t1, b1 = lr_model(full_data, fold1, label, pred)
t2, b2 = lr_model(full_data, fold2, label, pred)
t3, b3 = lr_model(full_data, fold3, label, pred)
t4, b4 = lr_model(full_data, fold4, label, pred)
t5, b5 = lr_model(full_data, fold5, label, pred)

binary = pd.DataFrame([b1, b2, b3, b4, b5])
binary.columns = ['ROC AUC Score','Accuracy', 'Recall', 'Precision', 'F-1 Score']
binary.index = ['Fold 1', 'Fold 2', 'Fold 3', 'Fold 4', 'Fold 5']
binary.loc['Mean'] = binary.mean()
binary

Unnamed: 0,ROC AUC Score,Accuracy,Recall,Precision,F-1 Score
Fold 1,0.508242,0.518519,0.52381,0.785714,0.628571
Fold 2,0.480769,0.481481,0.5,0.5,0.5
Fold 3,0.601648,0.592593,0.714286,0.357143,0.47619
Fold 4,0.5,0.518519,0.518519,1.0,0.682927
Fold 5,0.538462,0.555556,0.538462,1.0,0.7
Mean,0.525824,0.533333,0.559015,0.728571,0.597538


In [22]:
from sklearn.ensemble import GradientBoostingClassifier

def model_gbc (full, test, label, pred, plot = False, CV = False, rfe = True):
    train = pd.concat([full, test]).drop_duplicates(keep=False)
    trainArr = train.drop(['variable', 'binary_label'], axis = 1).as_matrix()
    trainRes = train.as_matrix(['binary_label'])
    testArr = test.drop(['variable', 'binary_label'], axis = 1).as_matrix()
    
    lr = 1
    min_samples_leaf = 1
    
    #NUMBER OF ESTIMATORS
    param_test1 = {'n_estimators':np.arange(500,4000,500).tolist()}
    gsearch1 = GridSearchCV(estimator = GradientBoostingClassifier(learning_rate=lr, min_samples_split=500,
                              min_samples_leaf=50,max_depth=8,max_features='sqrt', subsample=0.8,random_state=10), 
                   param_grid = param_test1, scoring='roc_auc',n_jobs=4,iid=False, cv=5)
    gsearch1.fit(trainArr, trainRes)
    n_estimators = list(gsearch1.best_params_.values())[0]
    display(gsearch1.best_params_)

    param_test2 = {'max_depth':list(range(1,16,1)), 'min_samples_split':list(range(2,10,1))}
    gsearch2 = GridSearchCV(estimator = GradientBoostingClassifier(learning_rate=lr, n_estimators=n_estimators,
                                            max_features='sqrt', subsample=0.8, random_state=10), 
                   param_grid = param_test2, scoring='roc_auc',n_jobs=4,iid=False, cv=5)
    gsearch2.fit(trainArr, trainRes)
    display(gsearch2.best_params_)
    max_depth=list(gsearch2.best_params_.values())[0]
    min_samples_split = list(gsearch2.best_params_.values())[1]

    #Grid seach on subsample and max_features
    param_test3 = {'learning_rate':[0.001, 0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]}
    gsearch3 = GridSearchCV(estimator = GradientBoostingClassifier(learning_rate=lr, n_estimators=n_estimators,max_depth=max_depth, min_samples_split=min_samples_split,
                                                max_features='sqrt', subsample=0.8, random_state=10), 
                   param_grid = param_test3, scoring='roc_auc',n_jobs=4,iid=False, cv=5)
    gsearch3.fit(trainArr, trainRes)
    display(gsearch3.best_params_)
    lr = list(gsearch3.best_params_.values())[0]

    #Grid seach on subsample and max_features
    param_test4 = {'max_features':[0.001, 0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]}
    gsearch4 = GridSearchCV(estimator = GradientBoostingClassifier(learning_rate=lr, n_estimators=n_estimators,max_depth=max_depth, min_samples_split=min_samples_split,
                                                min_samples_leaf=min_samples_leaf, subsample=0.8, random_state=10), 
                   param_grid = param_test4, scoring='roc_auc',n_jobs=4,iid=False, cv=5)
    gsearch4.fit(trainArr, trainRes)
    display(gsearch4.best_params_)
    max_features = list(gsearch4.best_params_.values())[0]

    #Grid seach on subsample and max_features
    param_test5 = {'subsample':[0.6,0.7,0.75,0.8,0.85,0.9]}
    gsearch5=GridSearchCV(estimator = GradientBoostingClassifier(learning_rate=lr, n_estimators=n_estimators,max_depth=max_depth, min_samples_split=min_samples_split,
                                                min_samples_leaf=min_samples_leaf,max_features=max_features, random_state=10), 
                   param_grid = param_test5, scoring='roc_auc',n_jobs=4,iid=False, cv=5)
    gsearch5.fit(trainArr, trainRes)
    display(gsearch5.best_params_)
    subsample = list(gsearch5.best_params_.values())[0]
    
    selector=GradientBoostingClassifier(learning_rate=lr, n_estimators=n_estimators*100, max_depth=max_depth, min_samples_split=min_samples_split,
                                                subsample=subsample, max_features=max_features, min_samples_leaf=min_samples_leaf, random_state=10)
    
    selector = selector.fit(trainArr, trainRes)
    predictions = selector.predict(testArr)
        
    data = pd.DataFrame(list(test[label]),columns=[label], index = test.index)
    data[pred] = pd.Series(list(predictions), index=data.index)                
    
    if plot:
        roc_auc, acc, prec, rec, f1 = plot_metrics(data, label, pred)
    else:
        roc_auc, acc, prec, rec, f1 = binary_metrics(data, label, pred)
    
    return data, [roc_auc, acc, prec, rec, f1]

In [23]:
t1, b1 = model_gbc(full_data, fold1, label, pred)
t2, b2 = model_gbc(full_data, fold2, label, pred)
t3, b3 = model_gbc(full_data, fold3, label, pred)
t4, b4 = model_gbc(full_data, fold4, label, pred)
t5, b5 = model_gbc(full_data, fold5, label, pred)

binary = pd.DataFrame([b1, b2, b3, b4, b5])
binary.columns = ['ROC AUC Score','Accuracy', 'Recall', 'Precision', 'F-1 Score']
binary.index = ['Fold 1', 'Fold 2', 'Fold 3', 'Fold 4', 'Fold 5']
binary.loc['Mean'] = binary.mean()
binary

{'n_estimators': 500}

{'max_depth': 2, 'min_samples_split': 3}

{'learning_rate': 1.0}

{'max_features': 0.7}

{'subsample': 0.8}

{'n_estimators': 500}

{'max_depth': 5, 'min_samples_split': 7}

{'learning_rate': 1.0}

{'max_features': 0.9}

{'subsample': 0.8}

{'n_estimators': 500}

{'max_depth': 10, 'min_samples_split': 7}

{'learning_rate': 1.0}

{'max_features': 0.8}

{'subsample': 0.8}

{'n_estimators': 500}

{'max_depth': 10, 'min_samples_split': 2}

{'learning_rate': 0.6}

{'max_features': 0.5}

{'subsample': 0.8}

{'n_estimators': 500}

{'max_depth': 7, 'min_samples_split': 4}

{'learning_rate': 1.0}

{'max_features': 0.9}

{'subsample': 0.8}

Unnamed: 0,ROC AUC Score,Accuracy,Recall,Precision,F-1 Score
Fold 1,0.585165,0.592593,0.578947,0.785714,0.666667
Fold 2,0.596154,0.592593,0.636364,0.5,0.56
Fold 3,0.524725,0.518519,0.555556,0.357143,0.434783
Fold 4,0.409341,0.407407,0.416667,0.357143,0.384615
Fold 5,0.552198,0.555556,0.5625,0.642857,0.6
Mean,0.533516,0.533333,0.550007,0.528571,0.529213


In [26]:
def gb_model (full, test, label, pred, plot = False, CV = False):
    train = pd.concat([full, test]).drop_duplicates(keep=False)
    trainArr = train.drop(['variable', 'binary_label'], axis = 1).as_matrix()
    trainRes = train.as_matrix(['binary_label'])
    testArr = test.drop(['variable', 'binary_label'], axis = 1).as_matrix()
    
    if CV:
        #GRIDSEARCH
        params = {'n_estimators':np.arange(100,15000,500).tolist(),
                  #'max_depth':list(range(1,16,1)), 
                  #'min_samples_split':list(range(2,10,1)),
                  #'max_features':[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
                  #'subsample':[0.6,0.7,0.75,0.8,0.85,0.9],
                  'learning_rate': np.arange(0.05,1,0.05).tolist()}
        estimator = GridSearchCV(estimator = GradientBoostingClassifier(criterion = 'mse', random_state = 15), scoring='roc_auc', param_grid = params).fit(trainArr, trainRes)
        
        print('Gridsearch completed! Parameters are:')
        print(estimator.best_params_)
    else:
        estimator = GradientBoostingClassifier(n_estimators = 5000,
                                               learning_rate = 0.005,
                                               random_state = 18).fit(trainArr, trainRes)
        #print('Gridsearch not run. Parameters are:')
        #print(estimator.get_params)
    
    predictions = estimator.predict(testArr)
        
    data = pd.DataFrame(list(test[label]),columns=[label], index = test.index)
    data[pred] = pd.Series(list(predictions), index=data.index)                
    
    if plot:
        roc_auc, acc, prec, rec, f1 = plot_metrics(data, label, pred)
    else:
        roc_auc, acc, prec, rec, f1 = binary_metrics(data, label, pred)
    
    return data, [roc_auc, acc, prec, rec, f1]

In [27]:
t1, b1 = gb_model(full_data, fold1, label, pred)
t2, b2 = gb_model(full_data, fold2, label, pred)
t3, b3 = gb_model(full_data, fold3, label, pred)
t4, b4 = gb_model(full_data, fold4, label, pred)
t5, b5 = gb_model(full_data, fold5, label, pred)

binary = pd.DataFrame([b1, b2, b3, b4, b5])
binary.columns = ['ROC AUC Score','Accuracy', 'Recall', 'Precision', 'F-1 Score']
binary.index = ['Fold 1', 'Fold 2', 'Fold 3', 'Fold 4', 'Fold 5']
binary.loc['Mean'] = binary.mean()
binary

Unnamed: 0,ROC AUC Score,Accuracy,Recall,Precision,F-1 Score
Fold 1,0.620879,0.62963,0.6,0.857143,0.705882
Fold 2,0.598901,0.592593,0.666667,0.428571,0.521739
Fold 3,0.629121,0.62963,0.642857,0.642857,0.642857
Fold 4,0.409341,0.407407,0.416667,0.357143,0.384615
Fold 5,0.667582,0.666667,0.692308,0.642857,0.666667
Mean,0.585165,0.585185,0.6037,0.585714,0.584352
