### Final simulation for testing the best model on a batch of increasing 100 samples in the train set and test on the next 100 batch

In [1]:
# Importing the required libraries
import numpy as np
import os
import pandas as pd
from ast import literal_eval

from sklearn import tree
from sklearn.linear_model import LogisticRegression
from sklearn.cross_validation import train_test_split
from sklearn import metrics
from sklearn.metrics import confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import LinearSVC
from sklearn.naive_bayes import GaussianNB

# Libraries for xgboost,light gradient boosting and Cat boosting
import xgboost as xgb
import lightgbm as lgb
import catboost as cb

# We can build some calibaration curve
from sklearn.calibration import calibration_curve

import matplotlib
matplotlib.use("TkAgg")
import matplotlib.pyplot as plt

import random
# Chaning back to the resource directory if it is not there
if("Res" not in os.getcwd()):
    os.chdir("..")



In [2]:
# Reading the input file for this notebook called attributes_df.csv containing the attributes of each mail
# extracted earlier
attributes_hot_encode = pd.read_csv("Res/processed_data/attributes_df.csv",sep=",")

# Removing those mails that have no text inside them such as Res/enron1/spam/1733.2004-07-29.GP.spam.txt
attributes_hot_encode.drop(attributes_hot_encode.index\
                           [np.where(np.isnan(attributes_hot_encode[["median_useful_token_len"]]))[0]],inplace=True)
col_attributes = []
with open("Res/processed_data/attributes_names.txt", 'r') as f:
    col_attributes.extend(f.read().split())

In [4]:
# Change this value from "enron1" to "enron6" to build all the simulations
enron_dataset_list = ["enron1","enron2","enron3","enron4","enron5","enron6"]
for enron_dataset in enron_dataset_list:
    print("Running simulation for "+enron_dataset+"...")
    attributes_hot_encode_ordered = attributes_hot_encode.loc[attributes_hot_encode["dataset"]==enron_dataset].sort_values("date_mail")
    attributes_hot_encode_1_spam = attributes_hot_encode_ordered.loc[(attributes_hot_encode_ordered["dataset"]==enron_dataset) &
                                                             (attributes_hot_encode_ordered["mail_label"]=="spam")]
    attributes_hot_encode_1_ham = attributes_hot_encode_ordered.loc[(attributes_hot_encode_ordered["dataset"]==enron_dataset) &
                                                             (attributes_hot_encode_ordered["mail_label"]=="ham")]

    attributes_hot_encoded_df = pd.DataFrame(columns = list(attributes_hot_encode_ordered))
    i = 0
    if(len(attributes_hot_encode_1_ham)>len(attributes_hot_encode_1_spam)):
        for j in range(len(attributes_hot_encode_1_spam)):
            ham_count = np.random.choice([0,1,2,3,4,5,6,7],p=[0.025,0.15,0.25,0.25,0.15,0.10,0.05,0.025])
            try:
                attributes_hot_encoded_df = pd.concat([attributes_hot_encoded_df,
                                                       attributes_hot_encode_1_spam.iloc[j:j+1],
                                                       attributes_hot_encode_1_ham.iloc[i:i+ham_count]])
                i+=ham_count
                if(j==len(attributes_hot_encode_1_spam)-1):
                    attributes_hot_encoded_df = pd.concat([attributes_hot_encoded_df,
                                                           attributes_hot_encode_1_ham.iloc[i:]])
            except:
                attributes_hot_encoded_df = pd.concat([attributes_hot_encoded_df,
                                                       attributes_hot_encode_1_spam.iloc[j:]])
                break
    else:
        for j in range(len(attributes_hot_encode_1_ham)):
            spam_count = np.random.choice([0,1,2,3,4,5,6,7],p=[0.025,0.15,0.25,0.25,0.15,0.10,0.05,0.025])
            try:
                attributes_hot_encoded_df = pd.concat([attributes_hot_encoded_df,
                                                       attributes_hot_encode_1_ham.iloc[j:j+1],
                                                       attributes_hot_encode_1_spam.iloc[i:i+spam_count]])
                i+=spam_count
                if(j==len(attributes_hot_encode_1_ham)-1):
                    attributes_hot_encoded_df = pd.concat([attributes_hot_encoded_df,
                                                           attributes_hot_encode_1_spam.iloc[i:]])
            except:
                attributes_hot_encoded_df = pd.concat([attributes_hot_encoded_df,
                                                       attributes_hot_encode_1_ham.iloc[j:]])
                break
    f = list(attributes_hot_encoded_df["mail_label"])
    train_df = pd.DataFrame(columns=list(attributes_hot_encoded_df))
    measure_after_iteration = pd.DataFrame(columns = ["Train_spam_count","Train_ham_count","Test_spam_count",
                                                      "Test_ham_count","True_negative","False_positive",
                                                      "False_negative","True_positive","ratio_per_batch"])
    for i in range((len(f)/100)-1):
        train_df = pd.concat([train_df,attributes_hot_encoded_df.iloc[i*100:((i+1)*100)]])
        if(i==(len(f)/100)-2):
            test_df = attributes_hot_encoded_df.iloc[(i+1)*100:]
        else:
            test_df = attributes_hot_encoded_df.iloc[(i+1)*100:((i+2)*100)]
        print("train = {}, test = {}".format(len(train_df),len(test_df)))

        baseline_X_features = ['line_count', 'token_count', 'punctuations_count', 'single_char_count', 
                             'number_token_count', 'year_count', 'stopword_count','median_useful_token_len', 
                             'avg_useful_token_len', 'rareword_count','attributes_len']
        for col in baseline_X_features:
            train_df[col] = train_df[col].astype(float)
            test_df[col] = test_df[col].astype(float)
        baseline_X_features.extend(col_attributes)
        Y_feature = "mail_label"

        X_train = train_df[baseline_X_features]
        y_train = train_df[Y_feature]

        X_test = test_df[baseline_X_features]
        y_test = test_df[Y_feature]

        y_train["label"] = y_train.apply(lambda x: 1.0 if x=="spam" else 0.0)
        y_test["label"] = y_test.apply(lambda x: 1.0 if x=="spam" else 0.0)

        dtrain = xgb.DMatrix(X_train, label=y_train["label"])
        dtest = xgb.DMatrix(X_test, label=y_test["label"])

        param = {'max_depth': 100, 'eta': 1, 'verbosity': 0, 'objective': 'binary:logistic',"n_estimators": [200]}
        param['nthread'] = 4
        param['eval_metric'] =  ['auc']

        evallist = [(dtest, 'eval'), (dtrain, 'train')]

        num_round = 30
        try:
            bst = xgb.train(param, dtrain, num_round, evallist)
        except:
            break
        y_pred = bst.predict(dtest)
        y_pred = pd.Series(y_pred).apply(lambda x: 1.0 if x>0.5 else 0.0)

        tn, fp, fn, tp = confusion_matrix(y_test["label"], y_pred).ravel()
        print("True Positives(spam) = {}".format(tp))
        print("True Negatives(ham)  = {}".format(tn))
        print("False Positives(actual=ham, predicted=spam) = {}".format(fp))
        print("False Negatives(actual=spam,predicted=ham) = {}".format(fn))
        print("Accuracy = {:.2f}%".format((tp+tn)/float(len(y_test))*100))
        print("Recall = {:.2f}%".format((tp/float(tp+fn))*100))
        print("Specificity = {:.2f}%".format((tn/float(tn+fp))*100))
        test_ham_count = len(test_df.loc[test_df["mail_label"]=="ham"])
        test_spam_count = len(test_df.loc[test_df["mail_label"]=="spam"])
        measure_after_iteration.loc[i] = [len(train_df.loc[train_df["mail_label"]=="spam"]),\
                                          len(train_df.loc[train_df["mail_label"]=="ham"]),\
                                          test_spam_count,test_ham_count,tn, fp, fn, tp,
                                          float(test_ham_count)/test_spam_count]
        
    measure_after_iteration["Recall"] = ((measure_after_iteration["True_positive"]/(measure_after_iteration["True_positive"]+
                                                                    measure_after_iteration["False_negative"]))*100.0)
    measure_after_iteration["Specificity"] = ((measure_after_iteration["True_negative"]/(measure_after_iteration["True_negative"]+
                                                                            measure_after_iteration["False_positive"]))*100.0)

    measure_after_iteration.to_csv("Res/processed_data/measure_after_iteration_"+enron_dataset+".csv",sep=",",index=False)
    attributes_hot_encoded_df.to_csv("Res/processed_data/attributes_hot_encoded_df_"+enron_dataset+".csv",sep=",",index=False)

    # Plotting the graphs for each simulation
    plt.close()
    plt.plot(range(2,len(measure_after_iteration)+1),measure_after_iteration["Recall"][1:],"-or")
    plt.plot(range(2,len(measure_after_iteration)+1),measure_after_iteration["Specificity"][1:],"-ob")
    plt.xlabel("Number of emails X 100")
    plt.legend(("Spam Recall","Ham Recall"),loc='lower right')
    plt.title(enron_dataset)
    plt.grid()
    plt.savefig("Res/processed_data/plot_simulation_"+enron_dataset+".jpg")

    plt.close()
    plt.plot(range(2,len(measure_after_iteration)+1),measure_after_iteration["ratio_per_batch"][1:],"-og")
    plt.xlabel("Batch Number")
    plt.title(enron_dataset+" - ham:spam ratio per batch")
    plt.grid()
    plt.savefig("Res/processed_data/plot_simulation_"+enron_dataset+"_fluctuation.jpg")
    print("Done !")

Running simulation for enron3...
train = 100, test = 100


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


[0]	eval-auc:0.701143	train-auc:0.983279


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


[1]	eval-auc:0.852911	train-auc:0.998904
[2]	eval-auc:0.901507	train-auc:1
[3]	eval-auc:0.942308	train-auc:1
[4]	eval-auc:0.956341	train-auc:1
[5]	eval-auc:0.955821	train-auc:1
[6]	eval-auc:0.949584	train-auc:1
[7]	eval-auc:0.948025	train-auc:1
[8]	eval-auc:0.954262	train-auc:1
[9]	eval-auc:0.948025	train-auc:1
[10]	eval-auc:0.943867	train-auc:1
[11]	eval-auc:0.945946	train-auc:1
[12]	eval-auc:0.945426	train-auc:1
[13]	eval-auc:0.948025	train-auc:1
[14]	eval-auc:0.950104	train-auc:1
[15]	eval-auc:0.950624	train-auc:1
[16]	eval-auc:0.949584	train-auc:1
[17]	eval-auc:0.952183	train-auc:1
[18]	eval-auc:0.951143	train-auc:1
[19]	eval-auc:0.952703	train-auc:1
[20]	eval-auc:0.949584	train-auc:1
[21]	eval-auc:0.949584	train-auc:1
[22]	eval-auc:0.952183	train-auc:1
[23]	eval-auc:0.950104	train-auc:1
[24]	eval-auc:0.951663	train-auc:1
[25]	eval-auc:0.952703	train-auc:1
[26]	eval-auc:0.952703	train-auc:1
[27]	eval-auc:0.952703	train-auc:1
[28]	eval-auc:0.953742	train-auc:1
[29]	eval-auc:0.954782

[16]	eval-auc:0.984533	train-auc:1
[17]	eval-auc:0.9856	train-auc:1
[18]	eval-auc:0.986667	train-auc:1
[19]	eval-auc:0.9872	train-auc:1
[20]	eval-auc:0.986133	train-auc:1
[21]	eval-auc:0.986133	train-auc:1
[22]	eval-auc:0.986133	train-auc:1
[23]	eval-auc:0.988267	train-auc:1
[24]	eval-auc:0.9872	train-auc:1
[25]	eval-auc:0.987733	train-auc:1
[26]	eval-auc:0.9872	train-auc:1
[27]	eval-auc:0.988267	train-auc:1
[28]	eval-auc:0.988267	train-auc:1
[29]	eval-auc:0.986667	train-auc:1
True Positives(spam) = 23
True Negatives(ham)  = 75
False Positives(actual=ham, predicted=spam) = 0
False Negatives(actual=spam,predicted=ham) = 2
Accuracy = 97.03%
Recall = 92.00%
Specificity = 100.00%
train = 800, test = 100
[0]	eval-auc:0.9232	train-auc:0.997265
[1]	eval-auc:0.9888	train-auc:0.999841
[2]	eval-auc:0.994933	train-auc:1
[3]	eval-auc:0.995467	train-auc:1
[4]	eval-auc:0.996533	train-auc:1
[5]	eval-auc:0.996533	train-auc:1
[6]	eval-auc:0.9952	train-auc:1
[7]	eval-auc:0.9968	train-auc:1
[8]	eval-auc:

train = 1400, test = 100
[0]	eval-auc:0.958933	train-auc:0.996354
[1]	eval-auc:0.974133	train-auc:0.99976
[2]	eval-auc:0.9752	train-auc:1
[3]	eval-auc:0.9744	train-auc:1
[4]	eval-auc:0.978667	train-auc:1
[5]	eval-auc:0.9808	train-auc:1
[6]	eval-auc:0.9824	train-auc:1
[7]	eval-auc:0.983467	train-auc:1
[8]	eval-auc:0.986133	train-auc:1
[9]	eval-auc:0.9888	train-auc:1
[10]	eval-auc:0.989867	train-auc:1
[11]	eval-auc:0.988267	train-auc:1
[12]	eval-auc:0.9888	train-auc:1
[13]	eval-auc:0.988267	train-auc:1
[14]	eval-auc:0.988267	train-auc:1
[15]	eval-auc:0.989333	train-auc:1
[16]	eval-auc:0.989333	train-auc:1
[17]	eval-auc:0.989333	train-auc:1
[18]	eval-auc:0.989333	train-auc:1
[19]	eval-auc:0.9888	train-auc:1
[20]	eval-auc:0.988267	train-auc:1
[21]	eval-auc:0.9888	train-auc:1
[22]	eval-auc:0.9888	train-auc:1
[23]	eval-auc:0.9888	train-auc:1
[24]	eval-auc:0.9888	train-auc:1
[25]	eval-auc:0.9888	train-auc:1
[26]	eval-auc:0.9888	train-auc:1
[27]	eval-auc:0.989333	train-auc:1
[28]	eval-auc:0.98

[26]	eval-auc:0.998441	train-auc:1
[27]	eval-auc:0.997921	train-auc:1
[28]	eval-auc:0.997921	train-auc:1
[29]	eval-auc:0.997921	train-auc:1
True Positives(spam) = 25
True Negatives(ham)  = 74
False Positives(actual=ham, predicted=spam) = 0
False Negatives(actual=spam,predicted=ham) = 1
Accuracy = 98.02%
Recall = 96.15%
Specificity = 100.00%
train = 2100, test = 100
[0]	eval-auc:0.986667	train-auc:0.997546
[1]	eval-auc:0.9968	train-auc:0.999861
[2]	eval-auc:0.996267	train-auc:0.999957
[3]	eval-auc:0.997333	train-auc:0.999996
[4]	eval-auc:0.999467	train-auc:0.999999
[5]	eval-auc:0.998933	train-auc:1
[6]	eval-auc:0.998933	train-auc:1
[7]	eval-auc:0.998933	train-auc:1
[8]	eval-auc:0.998933	train-auc:1
[9]	eval-auc:0.999467	train-auc:1
[10]	eval-auc:0.999467	train-auc:1
[11]	eval-auc:0.999467	train-auc:1
[12]	eval-auc:0.999467	train-auc:1
[13]	eval-auc:0.999467	train-auc:1
[14]	eval-auc:0.998933	train-auc:1
[15]	eval-auc:0.999467	train-auc:1
[16]	eval-auc:0.998933	train-auc:1
[17]	eval-auc:

[10]	eval-auc:0.989605	train-auc:1
[11]	eval-auc:0.988566	train-auc:1
[12]	eval-auc:0.990125	train-auc:1
[13]	eval-auc:0.990125	train-auc:1
[14]	eval-auc:0.991164	train-auc:1
[15]	eval-auc:0.992204	train-auc:1
[16]	eval-auc:0.992723	train-auc:1
[17]	eval-auc:0.992723	train-auc:1
[18]	eval-auc:0.992204	train-auc:1
[19]	eval-auc:0.993243	train-auc:1
[20]	eval-auc:0.992204	train-auc:1
[21]	eval-auc:0.991684	train-auc:1
[22]	eval-auc:0.991684	train-auc:1
[23]	eval-auc:0.991164	train-auc:1
[24]	eval-auc:0.991164	train-auc:1
[25]	eval-auc:0.991164	train-auc:1
[26]	eval-auc:0.991164	train-auc:1
[27]	eval-auc:0.991164	train-auc:1
[28]	eval-auc:0.991164	train-auc:1
[29]	eval-auc:0.991164	train-auc:1
True Positives(spam) = 25
True Negatives(ham)  = 70
False Positives(actual=ham, predicted=spam) = 4
False Negatives(actual=spam,predicted=ham) = 1
Accuracy = 94.06%
Recall = 96.15%
Specificity = 94.59%
train = 2800, test = 100
[0]	eval-auc:0.971203	train-auc:0.998092
[1]	eval-auc:0.979672	train-auc:

[0]	eval-auc:0.997661	train-auc:0.998335
[1]	eval-auc:0.994802	train-auc:0.999855
[2]	eval-auc:0.99948	train-auc:0.99999
[3]	eval-auc:1	train-auc:0.999998
[4]	eval-auc:1	train-auc:1
[5]	eval-auc:1	train-auc:1
[6]	eval-auc:1	train-auc:1
[7]	eval-auc:1	train-auc:1
[8]	eval-auc:1	train-auc:1
[9]	eval-auc:1	train-auc:1
[10]	eval-auc:1	train-auc:1
[11]	eval-auc:1	train-auc:1
[12]	eval-auc:1	train-auc:1
[13]	eval-auc:1	train-auc:1
[14]	eval-auc:1	train-auc:1
[15]	eval-auc:1	train-auc:1
[16]	eval-auc:1	train-auc:1
[17]	eval-auc:1	train-auc:1
[18]	eval-auc:1	train-auc:1
[19]	eval-auc:1	train-auc:1
[20]	eval-auc:1	train-auc:1
[21]	eval-auc:1	train-auc:1
[22]	eval-auc:1	train-auc:1
[23]	eval-auc:1	train-auc:1
[24]	eval-auc:1	train-auc:1
[25]	eval-auc:1	train-auc:1
[26]	eval-auc:1	train-auc:1
[27]	eval-auc:1	train-auc:1
[28]	eval-auc:1	train-auc:1
[29]	eval-auc:1	train-auc:1
True Positives(spam) = 25
True Negatives(ham)  = 74
False Positives(actual=ham, predicted=spam) = 0
False Negatives(actual=

[24]	eval-auc:1	train-auc:1
[25]	eval-auc:1	train-auc:1
[26]	eval-auc:1	train-auc:1
[27]	eval-auc:1	train-auc:1
[28]	eval-auc:1	train-auc:1
[29]	eval-auc:1	train-auc:1
True Positives(spam) = 28
True Negatives(ham)  = 72
False Positives(actual=ham, predicted=spam) = 0
False Negatives(actual=spam,predicted=ham) = 0
Accuracy = 99.01%
Recall = 100.00%
Specificity = 100.00%
train = 4100, test = 100
[0]	eval-auc:0.988344	train-auc:0.99858
[1]	eval-auc:0.987858	train-auc:0.999874
[2]	eval-auc:0.98713	train-auc:0.999994
[3]	eval-auc:0.98543	train-auc:0.999999
[4]	eval-auc:0.986887	train-auc:1
[5]	eval-auc:0.986644	train-auc:1
[6]	eval-auc:0.986158	train-auc:1
[7]	eval-auc:0.986158	train-auc:1
[8]	eval-auc:0.986158	train-auc:1
[9]	eval-auc:0.986401	train-auc:1
[10]	eval-auc:0.985915	train-auc:1
[11]	eval-auc:0.986401	train-auc:1
[12]	eval-auc:0.986887	train-auc:1
[13]	eval-auc:0.986887	train-auc:1
[14]	eval-auc:0.986887	train-auc:1
[15]	eval-auc:0.986887	train-auc:1
[16]	eval-auc:0.986887	train

[11]	eval-auc:1	train-auc:1
[12]	eval-auc:1	train-auc:1
[13]	eval-auc:1	train-auc:1
[14]	eval-auc:1	train-auc:1
[15]	eval-auc:1	train-auc:1
[16]	eval-auc:1	train-auc:1
[17]	eval-auc:1	train-auc:1
[18]	eval-auc:1	train-auc:1
[19]	eval-auc:1	train-auc:1
[20]	eval-auc:1	train-auc:1
[21]	eval-auc:1	train-auc:1
[22]	eval-auc:1	train-auc:1
[23]	eval-auc:1	train-auc:1
[24]	eval-auc:1	train-auc:1
[25]	eval-auc:1	train-auc:1
[26]	eval-auc:1	train-auc:1
[27]	eval-auc:1	train-auc:1
[28]	eval-auc:1	train-auc:1
[29]	eval-auc:1	train-auc:1
True Positives(spam) = 26
True Negatives(ham)  = 74
False Positives(actual=ham, predicted=spam) = 0
False Negatives(actual=spam,predicted=ham) = 0
Accuracy = 99.01%
Recall = 100.00%
Specificity = 100.00%
train = 4800, test = 100
[0]	eval-auc:0.963724	train-auc:0.99854
[1]	eval-auc:0.984272	train-auc:0.999903
[2]	eval-auc:0.98554	train-auc:0.999988
[3]	eval-auc:0.987823	train-auc:1
[4]	eval-auc:0.985794	train-auc:1
[5]	eval-auc:0.987823	train-auc:1
[6]	eval-auc:0.9

True Positives(spam) = 61
True Negatives(ham)  = 37
False Positives(actual=ham, predicted=spam) = 1
False Negatives(actual=spam,predicted=ham) = 1
Accuracy = 97.03%
Recall = 98.39%
Specificity = 97.37%
train = 5400, test = 102
Done !
Running simulation for enron4...
train = 100, test = 100
[0]	eval-auc:0.855249	train-auc:0.99036
[1]	eval-auc:0.920738	train-auc:0.999746
[2]	eval-auc:0.950884	train-auc:1
[3]	eval-auc:0.951923	train-auc:1
[4]	eval-auc:0.946985	train-auc:1
[5]	eval-auc:0.95738	train-auc:1
[6]	eval-auc:0.95738	train-auc:1
[7]	eval-auc:0.952703	train-auc:1
[8]	eval-auc:0.953222	train-auc:1
[9]	eval-auc:0.952183	train-auc:1
[10]	eval-auc:0.951143	train-auc:1
[11]	eval-auc:0.955301	train-auc:1
[12]	eval-auc:0.951663	train-auc:1
[13]	eval-auc:0.951663	train-auc:1
[14]	eval-auc:0.951143	train-auc:1
[15]	eval-auc:0.952183	train-auc:1
[16]	eval-auc:0.949064	train-auc:1
[17]	eval-auc:0.950104	train-auc:1
[18]	eval-auc:0.947505	train-auc:1
[19]	eval-auc:0.950624	train-auc:1
[20]	eva

[7]	eval-auc:0.984101	train-auc:1
[8]	eval-auc:0.984101	train-auc:1
[9]	eval-auc:0.986294	train-auc:1
[10]	eval-auc:0.986294	train-auc:1
[11]	eval-auc:0.986842	train-auc:1
[12]	eval-auc:0.986294	train-auc:1
[13]	eval-auc:0.986842	train-auc:1
[14]	eval-auc:0.986842	train-auc:1
[15]	eval-auc:0.986842	train-auc:1
[16]	eval-auc:0.98739	train-auc:1
[17]	eval-auc:0.987939	train-auc:1
[18]	eval-auc:0.988487	train-auc:1
[19]	eval-auc:0.987939	train-auc:1
[20]	eval-auc:0.98739	train-auc:1
[21]	eval-auc:0.986842	train-auc:1
[22]	eval-auc:0.986294	train-auc:1
[23]	eval-auc:0.98739	train-auc:1
[24]	eval-auc:0.987939	train-auc:1
[25]	eval-auc:0.98739	train-auc:1
[26]	eval-auc:0.988487	train-auc:1
[27]	eval-auc:0.989035	train-auc:1
[28]	eval-auc:0.989035	train-auc:1
[29]	eval-auc:0.989035	train-auc:1
True Positives(spam) = 73
True Negatives(ham)  = 21
False Positives(actual=ham, predicted=spam) = 3
False Negatives(actual=spam,predicted=ham) = 3
Accuracy = 93.07%
Recall = 96.05%
Specificity = 87.50%


[22]	eval-auc:0.996032	train-auc:1
[23]	eval-auc:0.996032	train-auc:1
[24]	eval-auc:0.996528	train-auc:1
[25]	eval-auc:0.996528	train-auc:1
[26]	eval-auc:0.996528	train-auc:1
[27]	eval-auc:0.996032	train-auc:1
[28]	eval-auc:0.995536	train-auc:1
[29]	eval-auc:0.996032	train-auc:1
True Positives(spam) = 71
True Negatives(ham)  = 25
False Positives(actual=ham, predicted=spam) = 3
False Negatives(actual=spam,predicted=ham) = 1
Accuracy = 95.05%
Recall = 98.61%
Specificity = 89.29%
train = 1400, test = 100
[0]	eval-auc:0.958916	train-auc:0.996385
[1]	eval-auc:0.990676	train-auc:0.999828
[2]	eval-auc:0.988345	train-auc:0.999991
[3]	eval-auc:0.99359	train-auc:0.999992
[4]	eval-auc:0.994173	train-auc:1
[5]	eval-auc:0.996503	train-auc:1
[6]	eval-auc:0.997086	train-auc:1
[7]	eval-auc:0.996503	train-auc:1
[8]	eval-auc:0.996503	train-auc:1
[9]	eval-auc:0.995921	train-auc:1
[10]	eval-auc:0.996503	train-auc:1
[11]	eval-auc:0.995921	train-auc:1
[12]	eval-auc:0.997086	train-auc:1
[13]	eval-auc:0.99708

train = 2000, test = 100
[0]	eval-auc:0.990933	train-auc:0.996707
[1]	eval-auc:0.992533	train-auc:0.999745
[2]	eval-auc:0.997333	train-auc:0.999985
[3]	eval-auc:0.997333	train-auc:1
[4]	eval-auc:0.9984	train-auc:1
[5]	eval-auc:0.9968	train-auc:1
[6]	eval-auc:0.9984	train-auc:1
[7]	eval-auc:0.998933	train-auc:1
[8]	eval-auc:0.998933	train-auc:1
[9]	eval-auc:0.998933	train-auc:1
[10]	eval-auc:0.998933	train-auc:1
[11]	eval-auc:0.999467	train-auc:1
[12]	eval-auc:0.998933	train-auc:1
[13]	eval-auc:0.998933	train-auc:1
[14]	eval-auc:0.998933	train-auc:1
[15]	eval-auc:0.998933	train-auc:1
[16]	eval-auc:0.998933	train-auc:1
[17]	eval-auc:0.998933	train-auc:1
[18]	eval-auc:0.998933	train-auc:1
[19]	eval-auc:0.998933	train-auc:1
[20]	eval-auc:0.999467	train-auc:1
[21]	eval-auc:0.999467	train-auc:1
[22]	eval-auc:0.999467	train-auc:1
[23]	eval-auc:0.999467	train-auc:1
[24]	eval-auc:0.999467	train-auc:1
[25]	eval-auc:0.999467	train-auc:1
[26]	eval-auc:0.999467	train-auc:1
[27]	eval-auc:0.999467	tr

[12]	eval-auc:0.999504	train-auc:1
[13]	eval-auc:0.999504	train-auc:1
[14]	eval-auc:0.999504	train-auc:1
[15]	eval-auc:1	train-auc:1
[16]	eval-auc:1	train-auc:1
[17]	eval-auc:0.999504	train-auc:1
[18]	eval-auc:0.998512	train-auc:1
[19]	eval-auc:0.998512	train-auc:1
[20]	eval-auc:0.998512	train-auc:1
[21]	eval-auc:0.998512	train-auc:1
[22]	eval-auc:0.998512	train-auc:1
[23]	eval-auc:0.998512	train-auc:1
[24]	eval-auc:0.998512	train-auc:1
[25]	eval-auc:0.999504	train-auc:1
[26]	eval-auc:1	train-auc:1
[27]	eval-auc:1	train-auc:1
[28]	eval-auc:1	train-auc:1
[29]	eval-auc:1	train-auc:1
True Positives(spam) = 69
True Negatives(ham)  = 28
False Positives(actual=ham, predicted=spam) = 0
False Negatives(actual=spam,predicted=ham) = 3
Accuracy = 96.04%
Recall = 95.83%
Specificity = 100.00%
train = 2700, test = 100
[0]	eval-auc:0.990125	train-auc:0.997588
[1]	eval-auc:0.998441	train-auc:0.999874
[2]	eval-auc:0.99896	train-auc:0.999991
[3]	eval-auc:1	train-auc:0.999996
[4]	eval-auc:1	train-auc:1
[

[17]	eval-auc:1	train-auc:1
[18]	eval-auc:1	train-auc:1
[19]	eval-auc:1	train-auc:1
[20]	eval-auc:1	train-auc:1
[21]	eval-auc:1	train-auc:1
[22]	eval-auc:1	train-auc:1
[23]	eval-auc:1	train-auc:1
[24]	eval-auc:1	train-auc:1
[25]	eval-auc:1	train-auc:1
[26]	eval-auc:1	train-auc:1
[27]	eval-auc:1	train-auc:1
[28]	eval-auc:1	train-auc:1
[29]	eval-auc:1	train-auc:1
True Positives(spam) = 71
True Negatives(ham)  = 25
False Positives(actual=ham, predicted=spam) = 0
False Negatives(actual=spam,predicted=ham) = 4
Accuracy = 95.05%
Recall = 94.67%
Specificity = 100.00%
train = 3400, test = 100
[0]	eval-auc:1	train-auc:0.997984
[1]	eval-auc:1	train-auc:0.999865
[2]	eval-auc:1	train-auc:0.99998
[3]	eval-auc:1	train-auc:0.999999
[4]	eval-auc:1	train-auc:0.999998
[5]	eval-auc:1	train-auc:0.999999
[6]	eval-auc:1	train-auc:1
[7]	eval-auc:1	train-auc:1
[8]	eval-auc:1	train-auc:1
[9]	eval-auc:1	train-auc:1
[10]	eval-auc:1	train-auc:1
[11]	eval-auc:1	train-auc:1
[12]	eval-auc:1	train-auc:1
[13]	eval-auc

[28]	eval-auc:1	train-auc:1
[29]	eval-auc:1	train-auc:1
True Positives(spam) = 75
True Negatives(ham)  = 25
False Positives(actual=ham, predicted=spam) = 0
False Negatives(actual=spam,predicted=ham) = 0
Accuracy = 99.01%
Recall = 100.00%
Specificity = 100.00%
train = 4100, test = 100
[0]	eval-auc:1	train-auc:0.998594
[1]	eval-auc:1	train-auc:0.999826
[2]	eval-auc:1	train-auc:0.999976
[3]	eval-auc:1	train-auc:0.999999
[4]	eval-auc:1	train-auc:0.999999
[5]	eval-auc:1	train-auc:0.999999
[6]	eval-auc:1	train-auc:1
[7]	eval-auc:1	train-auc:1
[8]	eval-auc:1	train-auc:1
[9]	eval-auc:1	train-auc:1
[10]	eval-auc:1	train-auc:1
[11]	eval-auc:1	train-auc:1
[12]	eval-auc:1	train-auc:1
[13]	eval-auc:1	train-auc:1
[14]	eval-auc:1	train-auc:1
[15]	eval-auc:1	train-auc:1
[16]	eval-auc:1	train-auc:1
[17]	eval-auc:1	train-auc:1
[18]	eval-auc:1	train-auc:1
[19]	eval-auc:1	train-auc:1
[20]	eval-auc:1	train-auc:1
[21]	eval-auc:1	train-auc:1
[22]	eval-auc:1	train-auc:1
[23]	eval-auc:1	train-auc:1
[24]	eval-a

[5]	eval-auc:1	train-auc:1
[6]	eval-auc:1	train-auc:1
[7]	eval-auc:1	train-auc:1
[8]	eval-auc:1	train-auc:1
[9]	eval-auc:1	train-auc:1
[10]	eval-auc:1	train-auc:1
[11]	eval-auc:1	train-auc:1
[12]	eval-auc:1	train-auc:1
[13]	eval-auc:1	train-auc:1
[14]	eval-auc:1	train-auc:1
[15]	eval-auc:1	train-auc:1
[16]	eval-auc:1	train-auc:1
[17]	eval-auc:1	train-auc:1
[18]	eval-auc:1	train-auc:1
[19]	eval-auc:1	train-auc:1
[20]	eval-auc:1	train-auc:1
[21]	eval-auc:1	train-auc:1
[22]	eval-auc:1	train-auc:1
[23]	eval-auc:1	train-auc:1
[24]	eval-auc:1	train-auc:1
[25]	eval-auc:1	train-auc:1
[26]	eval-auc:1	train-auc:1
[27]	eval-auc:1	train-auc:1
[28]	eval-auc:1	train-auc:1
[29]	eval-auc:1	train-auc:1
True Positives(spam) = 76
True Negatives(ham)  = 24
False Positives(actual=ham, predicted=spam) = 0
False Negatives(actual=spam,predicted=ham) = 0
Accuracy = 99.01%
Recall = 100.00%
Specificity = 100.00%
train = 4900, test = 100
[0]	eval-auc:0.977333	train-auc:0.998799
[1]	eval-auc:0.9984	train-auc:0.999

[17]	eval-auc:1	train-auc:1
[18]	eval-auc:1	train-auc:1
[19]	eval-auc:1	train-auc:1
[20]	eval-auc:1	train-auc:1
[21]	eval-auc:1	train-auc:1
[22]	eval-auc:1	train-auc:1
[23]	eval-auc:1	train-auc:1
[24]	eval-auc:1	train-auc:1
[25]	eval-auc:1	train-auc:1
[26]	eval-auc:1	train-auc:1
[27]	eval-auc:1	train-auc:1
[28]	eval-auc:1	train-auc:1
[29]	eval-auc:1	train-auc:1
True Positives(spam) = 74
True Negatives(ham)  = 25
False Positives(actual=ham, predicted=spam) = 0
False Negatives(actual=spam,predicted=ham) = 1
Accuracy = 98.02%
Recall = 98.67%
Specificity = 100.00%
train = 5600, test = 100
[0]	eval-auc:1	train-auc:0.999101
[1]	eval-auc:1	train-auc:0.999889
[2]	eval-auc:1	train-auc:0.999977
[3]	eval-auc:1	train-auc:0.999992
[4]	eval-auc:1	train-auc:0.999995
[5]	eval-auc:1	train-auc:0.999998
[6]	eval-auc:1	train-auc:0.999999
[7]	eval-auc:1	train-auc:1
[8]	eval-auc:1	train-auc:1
[9]	eval-auc:1	train-auc:1
[10]	eval-auc:1	train-auc:1
[11]	eval-auc:1	train-auc:1
[12]	eval-auc:1	train-auc:1
[13]	

[14]	eval-auc:0.986486	train-auc:1
[15]	eval-auc:0.987006	train-auc:1
[16]	eval-auc:0.985447	train-auc:1
[17]	eval-auc:0.986486	train-auc:1
[18]	eval-auc:0.988566	train-auc:1
[19]	eval-auc:0.988566	train-auc:1
[20]	eval-auc:0.989085	train-auc:1
[21]	eval-auc:0.988046	train-auc:1
[22]	eval-auc:0.988566	train-auc:1
[23]	eval-auc:0.988566	train-auc:1
[24]	eval-auc:0.988046	train-auc:1
[25]	eval-auc:0.988046	train-auc:1
[26]	eval-auc:0.988046	train-auc:1
[27]	eval-auc:0.988566	train-auc:1
[28]	eval-auc:0.989085	train-auc:1
[29]	eval-auc:0.988566	train-auc:1
True Positives(spam) = 73
True Negatives(ham)  = 19
False Positives(actual=ham, predicted=spam) = 7
False Negatives(actual=spam,predicted=ham) = 1
Accuracy = 91.09%
Recall = 98.65%
Specificity = 73.08%
train = 500, test = 100
[0]	eval-auc:0.864309	train-auc:0.997528
[1]	eval-auc:0.979167	train-auc:1
[2]	eval-auc:0.966831	train-auc:1
[3]	eval-auc:0.975877	train-auc:1
[4]	eval-auc:0.97807	train-auc:1
[5]	eval-auc:0.971765	train-auc:1
[6]	

[9]	eval-auc:0.997463	train-auc:1
[10]	eval-auc:0.997971	train-auc:1
[11]	eval-auc:0.997971	train-auc:1
[12]	eval-auc:0.997463	train-auc:1
[13]	eval-auc:0.997971	train-auc:1
[14]	eval-auc:0.997971	train-auc:1
[15]	eval-auc:0.998478	train-auc:1
[16]	eval-auc:0.998478	train-auc:1
[17]	eval-auc:0.998478	train-auc:1
[18]	eval-auc:0.998478	train-auc:1
[19]	eval-auc:0.998478	train-auc:1
[20]	eval-auc:0.997971	train-auc:1
[21]	eval-auc:0.998478	train-auc:1
[22]	eval-auc:0.998478	train-auc:1
[23]	eval-auc:0.998478	train-auc:1
[24]	eval-auc:0.998478	train-auc:1
[25]	eval-auc:0.998478	train-auc:1
[26]	eval-auc:0.998478	train-auc:1
[27]	eval-auc:0.998478	train-auc:1
[28]	eval-auc:0.998478	train-auc:1
[29]	eval-auc:0.997971	train-auc:1
True Positives(spam) = 73
True Negatives(ham)  = 25
False Positives(actual=ham, predicted=spam) = 2
False Negatives(actual=spam,predicted=ham) = 0
Accuracy = 97.03%
Recall = 100.00%
Specificity = 92.59%
train = 1200, test = 100
[0]	eval-auc:0.898275	train-auc:0.9991

[25]	eval-auc:0.993763	train-auc:1
[26]	eval-auc:0.993763	train-auc:1
[27]	eval-auc:0.993243	train-auc:1
[28]	eval-auc:0.992723	train-auc:1
[29]	eval-auc:0.992723	train-auc:1
True Positives(spam) = 74
True Negatives(ham)  = 22
False Positives(actual=ham, predicted=spam) = 4
False Negatives(actual=spam,predicted=ham) = 0
Accuracy = 95.05%
Recall = 100.00%
Specificity = 84.62%
train = 1800, test = 100
[0]	eval-auc:0.976154	train-auc:0.998922
[1]	eval-auc:0.993658	train-auc:0.999873
[2]	eval-auc:0.996956	train-auc:0.999992
[3]	eval-auc:1	train-auc:1
[4]	eval-auc:1	train-auc:1
[5]	eval-auc:1	train-auc:1
[6]	eval-auc:1	train-auc:1
[7]	eval-auc:1	train-auc:1
[8]	eval-auc:1	train-auc:1
[9]	eval-auc:1	train-auc:1
[10]	eval-auc:1	train-auc:1
[11]	eval-auc:1	train-auc:1
[12]	eval-auc:1	train-auc:1
[13]	eval-auc:1	train-auc:1
[14]	eval-auc:1	train-auc:1
[15]	eval-auc:1	train-auc:1
[16]	eval-auc:1	train-auc:1
[17]	eval-auc:1	train-auc:1
[18]	eval-auc:1	train-auc:1
[19]	eval-auc:1	train-auc:1
[20]	

[18]	eval-auc:1	train-auc:1
[19]	eval-auc:1	train-auc:1
[20]	eval-auc:1	train-auc:1
[21]	eval-auc:1	train-auc:1
[22]	eval-auc:1	train-auc:1
[23]	eval-auc:1	train-auc:1
[24]	eval-auc:1	train-auc:1
[25]	eval-auc:1	train-auc:1
[26]	eval-auc:1	train-auc:1
[27]	eval-auc:1	train-auc:1
[28]	eval-auc:1	train-auc:1
[29]	eval-auc:1	train-auc:1
True Positives(spam) = 76
True Negatives(ham)  = 24
False Positives(actual=ham, predicted=spam) = 0
False Negatives(actual=spam,predicted=ham) = 0
Accuracy = 99.01%
Recall = 100.00%
Specificity = 100.00%
train = 2500, test = 100
[0]	eval-auc:0.896267	train-auc:0.999065
[1]	eval-auc:0.958133	train-auc:0.999915
[2]	eval-auc:0.978667	train-auc:1
[3]	eval-auc:0.9824	train-auc:1
[4]	eval-auc:0.9856	train-auc:1
[5]	eval-auc:0.9888	train-auc:1
[6]	eval-auc:0.984533	train-auc:1
[7]	eval-auc:0.985067	train-auc:1
[8]	eval-auc:0.984	train-auc:1
[9]	eval-auc:0.987733	train-auc:1
[10]	eval-auc:0.9872	train-auc:1
[11]	eval-auc:0.9856	train-auc:1
[12]	eval-auc:0.985067	t

train = 3100, test = 100
[0]	eval-auc:0.93789	train-auc:0.998784
[1]	eval-auc:0.968815	train-auc:0.999843
[2]	eval-auc:0.996881	train-auc:0.999967
[3]	eval-auc:0.99896	train-auc:0.999998
[4]	eval-auc:0.99896	train-auc:0.999999
[5]	eval-auc:0.99896	train-auc:1
[6]	eval-auc:0.99948	train-auc:1
[7]	eval-auc:0.99896	train-auc:1
[8]	eval-auc:0.99896	train-auc:1
[9]	eval-auc:0.99896	train-auc:1
[10]	eval-auc:0.99896	train-auc:1
[11]	eval-auc:0.99948	train-auc:1
[12]	eval-auc:1	train-auc:1
[13]	eval-auc:1	train-auc:1
[14]	eval-auc:1	train-auc:1
[15]	eval-auc:1	train-auc:1
[16]	eval-auc:1	train-auc:1
[17]	eval-auc:1	train-auc:1
[18]	eval-auc:1	train-auc:1
[19]	eval-auc:1	train-auc:1
[20]	eval-auc:1	train-auc:1
[21]	eval-auc:1	train-auc:1
[22]	eval-auc:1	train-auc:1
[23]	eval-auc:1	train-auc:1
[24]	eval-auc:1	train-auc:1
[25]	eval-auc:1	train-auc:1
[26]	eval-auc:1	train-auc:1
[27]	eval-auc:1	train-auc:1
[28]	eval-auc:1	train-auc:1
[29]	eval-auc:1	train-auc:1
True Positives(spam) = 74
True Negat

train = 3800, test = 100
[0]	eval-auc:0.975901	train-auc:0.999115
[1]	eval-auc:0.993404	train-auc:0.99995
[2]	eval-auc:0.997971	train-auc:1
[3]	eval-auc:0.998985	train-auc:1
[4]	eval-auc:0.999493	train-auc:1
[5]	eval-auc:1	train-auc:1
[6]	eval-auc:1	train-auc:1
[7]	eval-auc:0.999493	train-auc:1
[8]	eval-auc:0.998478	train-auc:1
[9]	eval-auc:0.999493	train-auc:1
[10]	eval-auc:1	train-auc:1
[11]	eval-auc:1	train-auc:1
[12]	eval-auc:1	train-auc:1
[13]	eval-auc:1	train-auc:1
[14]	eval-auc:1	train-auc:1
[15]	eval-auc:1	train-auc:1
[16]	eval-auc:0.999493	train-auc:1
[17]	eval-auc:0.999493	train-auc:1
[18]	eval-auc:1	train-auc:1
[19]	eval-auc:0.999493	train-auc:1
[20]	eval-auc:0.997971	train-auc:1
[21]	eval-auc:0.997971	train-auc:1
[22]	eval-auc:0.999493	train-auc:1
[23]	eval-auc:0.997971	train-auc:1
[24]	eval-auc:0.997971	train-auc:1
[25]	eval-auc:0.998985	train-auc:1
[26]	eval-auc:0.998478	train-auc:1
[27]	eval-auc:0.998985	train-auc:1
[28]	eval-auc:0.998478	train-auc:1
[29]	eval-auc:0.9989

[15]	eval-auc:0.985746	train-auc:1
[16]	eval-auc:0.986842	train-auc:1
[17]	eval-auc:0.985197	train-auc:1
[18]	eval-auc:0.986294	train-auc:1
[19]	eval-auc:0.98739	train-auc:1
[20]	eval-auc:0.987939	train-auc:1
[21]	eval-auc:0.98739	train-auc:1
[22]	eval-auc:0.986842	train-auc:1
[23]	eval-auc:0.986294	train-auc:1
[24]	eval-auc:0.986294	train-auc:1
[25]	eval-auc:0.986294	train-auc:1
[26]	eval-auc:0.985197	train-auc:1
[27]	eval-auc:0.984101	train-auc:1
[28]	eval-auc:0.984649	train-auc:1
[29]	eval-auc:0.985197	train-auc:1
True Positives(spam) = 74
True Negatives(ham)  = 21
False Positives(actual=ham, predicted=spam) = 3
False Negatives(actual=spam,predicted=ham) = 2
Accuracy = 94.06%
Recall = 97.37%
Specificity = 87.50%
train = 4500, test = 100
[0]	eval-auc:0.986294	train-auc:0.999125
[1]	eval-auc:0.997807	train-auc:0.999929
[2]	eval-auc:0.996711	train-auc:0.999994
[3]	eval-auc:0.996162	train-auc:1
[4]	eval-auc:0.998355	train-auc:1
[5]	eval-auc:0.997259	train-auc:1
[6]	eval-auc:0.998355	tra

[29]	eval-auc:0.972095	train-auc:1
True Positives(spam) = 72
True Negatives(ham)  = 23
False Positives(actual=ham, predicted=spam) = 4
False Negatives(actual=spam,predicted=ham) = 1
Accuracy = 94.06%
Recall = 98.63%
Specificity = 85.19%
train = 200, test = 100
[0]	eval-auc:0.915018	train-auc:0.991512
[1]	eval-auc:0.966007	train-auc:1
[2]	eval-auc:0.978945	train-auc:1
[3]	eval-auc:0.986301	train-auc:1
[4]	eval-auc:0.987316	train-auc:1
[5]	eval-auc:0.987823	train-auc:1
[6]	eval-auc:0.992897	train-auc:1
[7]	eval-auc:0.992897	train-auc:1
[8]	eval-auc:0.990868	train-auc:1
[9]	eval-auc:0.994419	train-auc:1
[10]	eval-auc:0.993912	train-auc:1
[11]	eval-auc:0.99239	train-auc:1
[12]	eval-auc:0.994419	train-auc:1
[13]	eval-auc:0.993912	train-auc:1
[14]	eval-auc:0.993404	train-auc:1
[15]	eval-auc:0.993404	train-auc:1
[16]	eval-auc:0.991882	train-auc:1
[17]	eval-auc:0.990868	train-auc:1
[18]	eval-auc:0.990868	train-auc:1
[19]	eval-auc:0.991882	train-auc:1
[20]	eval-auc:0.990868	train-auc:1
[21]	eva

[8]	eval-auc:0.996612	train-auc:1
[9]	eval-auc:0.996047	train-auc:1
[10]	eval-auc:0.996612	train-auc:1
[11]	eval-auc:0.994918	train-auc:1
[12]	eval-auc:0.995483	train-auc:1
[13]	eval-auc:0.994353	train-auc:1
[14]	eval-auc:0.996047	train-auc:1
[15]	eval-auc:0.997177	train-auc:1
[16]	eval-auc:0.997177	train-auc:1
[17]	eval-auc:0.997177	train-auc:1
[18]	eval-auc:0.997177	train-auc:1
[19]	eval-auc:0.996612	train-auc:1
[20]	eval-auc:0.996612	train-auc:1
[21]	eval-auc:0.996047	train-auc:1
[22]	eval-auc:0.996047	train-auc:1
[23]	eval-auc:0.995483	train-auc:1
[24]	eval-auc:0.994918	train-auc:1
[25]	eval-auc:0.994918	train-auc:1
[26]	eval-auc:0.996047	train-auc:1
[27]	eval-auc:0.996047	train-auc:1
[28]	eval-auc:0.995483	train-auc:1
[29]	eval-auc:0.996047	train-auc:1
True Positives(spam) = 75
True Negatives(ham)  = 22
False Positives(actual=ham, predicted=spam) = 1
False Negatives(actual=spam,predicted=ham) = 2
Accuracy = 96.04%
Recall = 97.40%
Specificity = 95.65%
train = 900, test = 100
[0]	ev

[22]	eval-auc:0.992	train-auc:1
[23]	eval-auc:0.992	train-auc:1
[24]	eval-auc:0.992	train-auc:1
[25]	eval-auc:0.992	train-auc:1
[26]	eval-auc:0.992	train-auc:1
[27]	eval-auc:0.992	train-auc:1
[28]	eval-auc:0.992	train-auc:1
[29]	eval-auc:0.992533	train-auc:1
True Positives(spam) = 71
True Negatives(ham)  = 23
False Positives(actual=ham, predicted=spam) = 2
False Negatives(actual=spam,predicted=ham) = 4
Accuracy = 93.07%
Recall = 94.67%
Specificity = 92.00%
train = 1500, test = 100
[0]	eval-auc:0.974133	train-auc:0.997862
[1]	eval-auc:0.993867	train-auc:0.999864
[2]	eval-auc:0.995733	train-auc:0.999991
[3]	eval-auc:0.994133	train-auc:1
[4]	eval-auc:0.9952	train-auc:1
[5]	eval-auc:0.9952	train-auc:1
[6]	eval-auc:0.997333	train-auc:1
[7]	eval-auc:0.997333	train-auc:1
[8]	eval-auc:0.9968	train-auc:1
[9]	eval-auc:0.996267	train-auc:1
[10]	eval-auc:0.995733	train-auc:1
[11]	eval-auc:0.996267	train-auc:1
[12]	eval-auc:0.996267	train-auc:1
[13]	eval-auc:0.9968	train-auc:1
[14]	eval-auc:0.99733

[2]	eval-auc:0.988838	train-auc:0.999999
[3]	eval-auc:0.990868	train-auc:1
[4]	eval-auc:0.993404	train-auc:1
[5]	eval-auc:0.992897	train-auc:1
[6]	eval-auc:0.993912	train-auc:1
[7]	eval-auc:0.993912	train-auc:1
[8]	eval-auc:0.993404	train-auc:1
[9]	eval-auc:0.993912	train-auc:1
[10]	eval-auc:0.992897	train-auc:1
[11]	eval-auc:0.993912	train-auc:1
[12]	eval-auc:0.993404	train-auc:1
[13]	eval-auc:0.993404	train-auc:1
[14]	eval-auc:0.99239	train-auc:1
[15]	eval-auc:0.99239	train-auc:1
[16]	eval-auc:0.993404	train-auc:1
[17]	eval-auc:0.99239	train-auc:1
[18]	eval-auc:0.991882	train-auc:1
[19]	eval-auc:0.99239	train-auc:1
[20]	eval-auc:0.99239	train-auc:1
[21]	eval-auc:0.991882	train-auc:1
[22]	eval-auc:0.991882	train-auc:1
[23]	eval-auc:0.993404	train-auc:1
[24]	eval-auc:0.993404	train-auc:1
[25]	eval-auc:0.992897	train-auc:1
[26]	eval-auc:0.991882	train-auc:1
[27]	eval-auc:0.99239	train-auc:1
[28]	eval-auc:0.99239	train-auc:1
[29]	eval-auc:0.991375	train-auc:1
True Positives(spam) = 71
Tr

[20]	eval-auc:1	train-auc:1
[21]	eval-auc:1	train-auc:1
[22]	eval-auc:1	train-auc:1
[23]	eval-auc:1	train-auc:1
[24]	eval-auc:1	train-auc:1
[25]	eval-auc:1	train-auc:1
[26]	eval-auc:1	train-auc:1
[27]	eval-auc:1	train-auc:1
[28]	eval-auc:1	train-auc:1
[29]	eval-auc:1	train-auc:1
True Positives(spam) = 77
True Negatives(ham)  = 22
False Positives(actual=ham, predicted=spam) = 1
False Negatives(actual=spam,predicted=ham) = 0
Accuracy = 98.02%
Recall = 100.00%
Specificity = 95.65%
train = 2800, test = 100
[0]	eval-auc:0.939878	train-auc:0.998113
[1]	eval-auc:0.987316	train-auc:0.999671
[2]	eval-auc:0.986809	train-auc:0.999903
[3]	eval-auc:0.989345	train-auc:0.99998
[4]	eval-auc:0.99036	train-auc:0.999997
[5]	eval-auc:0.987823	train-auc:1
[6]	eval-auc:0.984779	train-auc:1
[7]	eval-auc:0.977676	train-auc:1
[8]	eval-auc:0.974632	train-auc:1
[9]	eval-auc:0.967529	train-auc:1
[10]	eval-auc:0.975139	train-auc:1
[11]	eval-auc:0.977169	train-auc:1
[12]	eval-auc:0.979198	train-auc:1
[13]	eval-auc:

[3]	eval-auc:0.977676	train-auc:0.999999
[4]	eval-auc:0.976662	train-auc:1
[5]	eval-auc:0.967022	train-auc:1
[6]	eval-auc:0.972603	train-auc:1
[7]	eval-auc:0.98072	train-auc:1
[8]	eval-auc:0.984779	train-auc:1
[9]	eval-auc:0.982243	train-auc:1
[10]	eval-auc:0.984272	train-auc:1
[11]	eval-auc:0.985287	train-auc:1
[12]	eval-auc:0.983765	train-auc:1
[13]	eval-auc:0.985287	train-auc:1
[14]	eval-auc:0.986301	train-auc:1
[15]	eval-auc:0.986301	train-auc:1
[16]	eval-auc:0.985287	train-auc:1
[17]	eval-auc:0.987316	train-auc:1
[18]	eval-auc:0.987823	train-auc:1
[19]	eval-auc:0.989345	train-auc:1
[20]	eval-auc:0.988331	train-auc:1
[21]	eval-auc:0.990868	train-auc:1
[22]	eval-auc:0.991882	train-auc:1
[23]	eval-auc:0.99239	train-auc:1
[24]	eval-auc:0.993404	train-auc:1
[25]	eval-auc:0.993404	train-auc:1
[26]	eval-auc:0.993912	train-auc:1
[27]	eval-auc:0.994419	train-auc:1
[28]	eval-auc:0.994926	train-auc:1
[29]	eval-auc:0.994419	train-auc:1
True Positives(spam) = 73
True Negatives(ham)  = 22
False

[18]	eval-auc:1	train-auc:1
[19]	eval-auc:1	train-auc:1
[20]	eval-auc:1	train-auc:1
[21]	eval-auc:1	train-auc:1
[22]	eval-auc:1	train-auc:1
[23]	eval-auc:1	train-auc:1
[24]	eval-auc:1	train-auc:1
[25]	eval-auc:1	train-auc:1
[26]	eval-auc:1	train-auc:1
[27]	eval-auc:1	train-auc:1
[28]	eval-auc:1	train-auc:1
[29]	eval-auc:1	train-auc:1
True Positives(spam) = 73
True Negatives(ham)  = 26
False Positives(actual=ham, predicted=spam) = 1
False Negatives(actual=spam,predicted=ham) = 0
Accuracy = 98.02%
Recall = 100.00%
Specificity = 96.30%
train = 4100, test = 100
[0]	eval-auc:0.972332	train-auc:0.998038
[1]	eval-auc:0.985319	train-auc:0.999839
[2]	eval-auc:0.990966	train-auc:0.999971
[3]	eval-auc:0.998871	train-auc:1
[4]	eval-auc:0.995483	train-auc:0.999999
[5]	eval-auc:0.997177	train-auc:1
[6]	eval-auc:0.998306	train-auc:1
[7]	eval-auc:0.998871	train-auc:1
[8]	eval-auc:0.998871	train-auc:1
[9]	eval-auc:0.998871	train-auc:1
[10]	eval-auc:1	train-auc:1
[11]	eval-auc:1	train-auc:1
[12]	eval-au

[10]	eval-auc:0.996711	train-auc:1
[11]	eval-auc:0.996711	train-auc:1
[12]	eval-auc:0.995066	train-auc:1
[13]	eval-auc:0.995614	train-auc:1
[14]	eval-auc:0.996711	train-auc:1
[15]	eval-auc:0.996711	train-auc:1
[16]	eval-auc:0.996711	train-auc:1
[17]	eval-auc:0.996162	train-auc:1
[18]	eval-auc:0.996711	train-auc:1
[19]	eval-auc:0.996711	train-auc:1
[20]	eval-auc:0.996711	train-auc:1
[21]	eval-auc:0.996711	train-auc:1
[22]	eval-auc:0.997259	train-auc:1
[23]	eval-auc:0.997259	train-auc:1
[24]	eval-auc:0.997259	train-auc:1
[25]	eval-auc:0.997259	train-auc:1
[26]	eval-auc:0.997259	train-auc:1
[27]	eval-auc:0.997259	train-auc:1
[28]	eval-auc:0.996711	train-auc:1
[29]	eval-auc:0.996711	train-auc:1
True Positives(spam) = 73
True Negatives(ham)  = 22
False Positives(actual=ham, predicted=spam) = 2
False Negatives(actual=spam,predicted=ham) = 3
Accuracy = 94.06%
Recall = 96.05%
Specificity = 91.67%
train = 4800, test = 100
[0]	eval-auc:0.997288	train-auc:0.998276
[1]	eval-auc:0.998794	train-auc:

[28]	eval-auc:0.998985	train-auc:1
[29]	eval-auc:0.998985	train-auc:1
True Positives(spam) = 73
True Negatives(ham)  = 24
False Positives(actual=ham, predicted=spam) = 3
False Negatives(actual=spam,predicted=ham) = 0
Accuracy = 96.04%
Recall = 100.00%
Specificity = 88.89%
train = 5400, test = 100
[0]	eval-auc:0.921518	train-auc:0.998257
[1]	eval-auc:0.983368	train-auc:0.999797
[2]	eval-auc:0.991684	train-auc:0.999978
[3]	eval-auc:0.994802	train-auc:0.999991
[4]	eval-auc:0.997401	train-auc:1
[5]	eval-auc:0.997921	train-auc:1
[6]	eval-auc:0.998441	train-auc:1
[7]	eval-auc:0.998441	train-auc:1
[8]	eval-auc:0.998441	train-auc:1
[9]	eval-auc:0.998441	train-auc:1
[10]	eval-auc:0.99896	train-auc:1
[11]	eval-auc:0.998441	train-auc:1
[12]	eval-auc:0.998441	train-auc:1
[13]	eval-auc:0.998441	train-auc:1
[14]	eval-auc:0.99896	train-auc:1
[15]	eval-auc:0.99896	train-auc:1
[16]	eval-auc:0.99896	train-auc:1
[17]	eval-auc:0.99896	train-auc:1
[18]	eval-auc:0.99896	train-auc:1
[19]	eval-auc:0.99896	tra

In [8]:
# Run when all the 6 enron datasets have been made to run
# Average of all the batches for each enron dataset
for i in range(6):
    enron_dataset="enron"+str(i+1)
    measure_after_iteration = pd.read_csv("Res/processed_data/measure_after_iteration_"+enron_dataset+".csv",sep=",")
    print(enron_dataset,np.round(np.mean(measure_after_iteration["Recall"]),1),np.round(np.mean(measure_after_iteration["Specificity"]),1))

('enron1', 88.1, 97.6)
('enron2', 89.8, 97.7)
('enron3', 91.9, 98.2)
('enron4', 98.5, 93.5)
('enron5', 98.9, 91.2)
('enron6', 98.8, 89.1)


In [6]:
print("Completed !")

Completed !
