In [1]:
# suppress future warnings
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=DeprecationWarning)

import pandas as pd
import numpy as np
import csv
import glob
import random
import ntpath

import sklearn
from sklearn import preprocessing
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier

## Load Training Data

In [2]:
def load_visuals_data(path, agg = {'none', 'sum', 'mean', 'max', 'min', 'first', 'last'}):
    """ 
    Load all visuals data files and combine them into a single Pandas DataFrame.
    
    Parameter agg:
    'none': keep both rows as single row
    'sum': take the columnwise sum of the rows
    'mean': take the columnwise mean of the rows
    'max': take the columnwise max of the rows
    'min': take the columnwise min of the rows
    'first': keep only the first row
    'last': keep only the last row
    
    Returns
    --------
    visuals_data: data frame containing the visuals data
    """
    
    # create a list of all csv files
    all_files = glob.glob(path + "/*.csv")

    data = []
    df = pd.DataFrame()

    if agg is 'none':
        for filename in all_files:
            li = pd.read_csv(filename, index_col = None, header = None)
            df = pd.DataFrame(li.values.flatten()).transpose()
            #df['file_name'] = filename.rsplit('\\', 1)[1].rsplit('.', 1)[0]
            df['file_name'] = ntpath.basename(filename).split('.', 1)[0]
            data.append(df)
    
    elif agg is 'sum':
        for filename in all_files:
            df = pd.read_csv(filename, index_col = None, header = None)
            df.loc['file_name',:]= df.sum(axis=0)
            #df['file_name'] = filename.rsplit('\\', 1)[1].rsplit('.', 1)[0]
            df['file_name'] = ntpath.basename(filename).split('.', 1)[0]
            df = pd.DataFrame(df.drop([0,1]))
            data.append(df)
            
    elif agg is 'mean':
        for filename in all_files:
            df = pd.read_csv(filename, index_col = None, header = None)
            df.loc['file_name',:]= df.mean(axis=0)
            #df['file_name'] = filename.rsplit('\\', 1)[1].rsplit('.', 1)[0]
            df['file_name'] = ntpath.basename(filename).split('.', 1)[0]
            df = pd.DataFrame(df.drop([0,1]))
            data.append(df)
       
    elif agg is 'max':
        for filename in all_files:
            df = pd.read_csv(filename, index_col = None, header = None)
            df.loc['file_name',:]= df.max(axis=0)
            #df['file_name'] = filename.rsplit('\\', 1)[1].rsplit('.', 1)[0]
            df['file_name'] = ntpath.basename(filename).split('.', 1)[0]
            df = pd.DataFrame(df.drop([0,1]))
            data.append(df)
            
    elif agg is 'min':
        for filename in all_files:
            df = pd.read_csv(filename, index_col = None, header = None)
            df.loc['file_name',:]= df.min(axis=0)
            #df['file_name'] = filename.rsplit('\\', 1)[1].rsplit('.', 1)[0]
            df['file_name'] = ntpath.basename(filename).split('.', 1)[0]
            df = pd.DataFrame(df.drop([0,1]))
            data.append(df)
            
    elif agg is 'first':
        for filename in all_files:
            df = pd.read_csv(filename, index_col = None, header = None)
            #df['file_name'] = filename.rsplit('\\', 1)[1].rsplit('.', 1)[0]
            df['file_name'] = ntpath.basename(filename).split('.', 1)[0]
            df = pd.DataFrame(df.drop([1]))
            data.append(df)
            
    elif agg is 'last':
        for filename in all_files:
            df = pd.read_csv(filename, index_col = None, header = None)
            #df['file_name'] = filename.rsplit('\\', 1)[1].rsplit('.', 1)[0]
            df['file_name'] = ntpath.basename(filename).split('.', 1)[0]
            df = pd.DataFrame(df.drop([0]))
            data.append(df)
            
    else:
        print('Parameter \'agg\' needs to be one of the following strings:' )
        print('\'none\', \'sum\', \'mean\', \'max\', \'min\', \'first\', \'last\'')
        return
            

    visuals_data = pd.concat(data, axis=0, ignore_index=True)
    visuals_data.set_index(['file_name'], inplace = True)
    
    return visuals_data

# load visuals files
path_vis_train = r'./data/Dev_Set/vis_descriptors'

data_visuals = load_visuals_data(path_vis_train, agg = 'none')
data_visuals.head()

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,1642,1643,1644,1645,1646,1647,1648,1649,1650,1651
file_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Words_and_Pictures,0.26733,0.31877,0.30814,0.25255,0.30517,0.32915,0.33434,0.33807,0.27418,0.24486,...,767.16,477.38,22.941,19.609,17.061,19.442,132420.0,19124.0,36269.0,19072.0
Welcome_to_Me,0.30717,0.33422,0.33112,0.33124,0.31114,0.33644,0.33616,0.34479,0.16983,0.27379,...,397.26,168.23,20.426,21.3,18.608,21.182,83171.0,13714.0,32774.0,13780.0
Wild_Tales,0.30768,0.3376,0.34787,0.33882,0.3162,0.33023,0.33942,0.36068,0.11968,0.2602,...,368.64,177.21,22.276,22.18,19.554,21.984,83572.0,14376.0,30306.0,14468.0
V_for_Vendetta,0.038715,0.13579,0.15695,0.052015,0.1034,0.24479,0.32119,0.080424,0.11021,0.18988,...,6684.4,2339.7,0.40354,0.49571,0.38531,0.48153,10770.0,3543.4,9500.8,3256.9
Transformers__Age_of_Extinction,0.19996,0.26934,0.27986,0.23725,0.30844,0.33242,0.32998,0.325,0.30735,0.33431,...,1120.6,669.56,15.086,14.7,14.859,14.723,211630.0,24019.0,48339.0,24090.0


In [3]:
data_visuals.shape

(95, 1652)

In [4]:
def load_train_test_data(path):
    """ 
    Load all training or testing data files and combine them into a single Pandas DataFrame.
    
    Returns
    --------
    train_test_data: data frame containing the training or testing data
    """
    
    train_test_data = pd.read_csv(path, index_col = None, header = 0)
    train_test_data = train_test_data.drop("movie_name", axis=1)
    train_test_data.set_index(['file_name'], inplace = True)
    
    return train_test_data


# load training data
path_train = r'./data/Dev_Set/CoeDevelopmentTrainingdata.csv'

data_training = load_train_test_data(path_train)
data_training.head()

Unnamed: 0_level_0,goodforairplanes
file_name,Unnamed: 1_level_1
Seventh_Son,1
2_States,0
Welcome_to_Me,0
The_Judge,0
Transformers__Age_of_Extinction,0


In [5]:
# merge training data with visuals data
train = pd.merge(data_visuals, data_training, left_index = True, right_index=True)
train.head()

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,1643,1644,1645,1646,1647,1648,1649,1650,1651,goodforairplanes
file_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Words_and_Pictures,0.26733,0.31877,0.30814,0.25255,0.30517,0.32915,0.33434,0.33807,0.27418,0.24486,...,477.38,22.941,19.609,17.061,19.442,132420.0,19124.0,36269.0,19072.0,1
Welcome_to_Me,0.30717,0.33422,0.33112,0.33124,0.31114,0.33644,0.33616,0.34479,0.16983,0.27379,...,168.23,20.426,21.3,18.608,21.182,83171.0,13714.0,32774.0,13780.0,0
Wild_Tales,0.30768,0.3376,0.34787,0.33882,0.3162,0.33023,0.33942,0.36068,0.11968,0.2602,...,177.21,22.276,22.18,19.554,21.984,83572.0,14376.0,30306.0,14468.0,0
V_for_Vendetta,0.038715,0.13579,0.15695,0.052015,0.1034,0.24479,0.32119,0.080424,0.11021,0.18988,...,2339.7,0.40354,0.49571,0.38531,0.48153,10770.0,3543.4,9500.8,3256.9,0
Transformers__Age_of_Extinction,0.19996,0.26934,0.27986,0.23725,0.30844,0.33242,0.32998,0.325,0.30735,0.33431,...,669.56,15.086,14.7,14.859,14.723,211630.0,24019.0,48339.0,24090.0,0


## Load Testing Data

In [6]:
# load visuals data for testing
path_vis_test = r'./data/Test_Set/vis_descriptors'

data_visuals_test = load_visuals_data(path_vis_test, agg = 'none')
data_visuals_test.head()

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,1642,1643,1644,1645,1646,1647,1648,1649,1650,1651
file_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,518400.0,269700.0,6.1035e-07,0.001645,2e-06,0.001645,1638400.0,269700.0,518400.0,269700.0
12_Years_a_Slave,0.0,0.193,0.19299,0.0,0.0,0.2711,0.2711,0.0,0.0,0.27024,...,30830.0,15454.0,15.294,13.422,13.418,13.307,71633.0,21855.0,48314.0,24434.0
21_Jump_Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,230400.0,119790.0,1.3776e-06,0.00247,4e-06,0.00247,725900.0,119790.0,230400.0,119790.0
2_States,0.033181,0.08976,0.073469,0.012184,0.10137,0.28969,0.26856,0.02602,0.071337,0.3302,...,629.31,476.71,4.4431,3.5334,2.7159,3.7869,203660.0,20777.0,29696.0,19740.0
A_Bug's_Life,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,81.909,43.269,2.9645,3.7378,2.4635,3.9526,4444.8,2405.4,5602.6,2611.1


In [7]:
# load testing data
path_test = r'./data/Dev_Set/CoeDevelopmentTestdata.csv'

data_testing = load_train_test_data(path_test)
data_testing.head()

Unnamed: 0_level_0,goodforairplanes
file_name,Unnamed: 1_level_1
Belle_de_Jour,1
Big_Game,0
Birdman__Or_(The_Unexpected_Virtue_of_Ignorance).mp4,1
Dances_with_Wolves,0
Dilwale_Dulhania_Le_Jayenge,0


In [8]:
# merge testing data with visuals data
test = pd.merge(data_visuals_test, data_testing, left_index = True, right_index=True)
test.head()

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,1643,1644,1645,1646,1647,1648,1649,1650,1651,goodforairplanes
file_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Belle_de_Jour,0.005963,0.023951,0.02851,0.061558,0.044613,0.049806,0.032554,0.070085,0.040795,0.04645,...,121.67,2.1137,1.9939,1.0164,2.1264,6797.7,2722.4,8510.8,2390.5,1
Big_Game,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,269700.0,6.1035e-07,0.001645,2e-06,0.001645,1638400.0,269700.0,518400.0,269700.0,0
Dances_with_Wolves,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,428.99,1.4983,1.6961,1.4805,1.8075,10665.0,3592.4,7577.9,3450.6,0
Dilwale_Dulhania_Le_Jayenge,0.40341,0.23464,0.26453,0.116,0.27926,0.20721,0.2538,0.11875,0.055308,0.14537,...,4403.8,4.5156,6.0129,6.0129,6.0268,40098.0,8133.8,16631.0,6912.5,0
Dorsvloer_vol_confetti,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,108440.0,2.5038,0.83142,1.2636,1.883,549320.0,131340.0,242430.0,111750.0,1


# Prediction Models

In [9]:
def Classifier(tX, ty, vX, vy, CV = 10,
              classifier = {'knn', 'decision_tree', 'logisticregression', 'svm',
                            'random_forest', 'adaboost', 'gradient_boost'}):
    
    if classifier is 'knn':
        clf = KNeighborsClassifier()
        
    elif classifier is 'decision_tree':
        clf = DecisionTreeClassifier()
        
    elif classifier is 'logisticregression':
        clf = LogisticRegression()
        
    elif classifier is 'svm':
        clf = SVC(kernel = 'rbf')
        
    elif classifier is 'random_forest':
        clf = RandomForestClassifier()
        
    elif classifier is 'adaboost':
        clf = AdaBoostClassifier()
        
    elif classifier is 'gradient_boost':
        clf = GradientBoostingClassifier()
        
    else:
        print('Parameter \'classifier\' needs to be one of the following strings:' )
        print('\'knn\', \'decision_tree\', \'logisticregression\', \'svm\', \'random_forest\', \'adaboost\', \'gradient_boost\'')
        return

    
    # CV
    precision = cross_val_score(clf, tX, ty, cv = CV, scoring = 'precision')
    recall = cross_val_score(clf, tX, ty, cv = CV, scoring = 'recall')
    f1 = cross_val_score(clf, tX, ty, cv = CV, scoring = 'f1')
    
    scores = dict()
    scores['Precision'] = np.mean(precision)
    scores['Recall'] = np.mean(recall)
    scores['F1'] = np.mean(f1)
    
    return scores

# Las Vegas Wrapper - Feature Selection

In [10]:
def LVW(tX, ty, vX, vy, K, original_features,
       classifier = {'knn', 'decision_tree', 'logisticregression', 'svm',
                     'random_forest', 'adaboost', 'gradient_boost'}):
    
    
    acc = 0
    k = 0
    C = len(original_features)
    
    while k < K:
        #print('k: ', k)
        ran_choice = range(1,len(original_features)-1)
        S1 = random.sample(original_features, random.choice(ran_choice))
        C1 = len(S1)
        
        x_train = tX[tX.columns.intersection(S1)]
        x_test = vX[vX.columns.intersection(S1)]
        
        acc1 = Classifier(x_train, ty, x_test, vy, 10, classifier)['F1']
        
        if (acc1 > acc) or (acc1 == acc and C1 < C):
            k = 0
            acc = acc1
            C = C1
            S = S1
        
        else:
            k += 1
            
    return S

# Load Data and Perform Train/Test Split

In [11]:
def load_train_test_split(classifier = {'knn', 'decision_tree', 'logisticregression', 'svm',
                                        'random_forest', 'adaboost', 'gradient_boost'},
                          agg = {'none', 'sum', 'mean', 'max', 'min', 'first', 'last'}):   

    
    # laod visuals data
    data_visuals = load_visuals_data(path_vis_train, agg = agg)
    
    # merge training data with visuals data
    train = pd.merge(data_visuals, data_training, left_index = True, right_index=True)

    # same with testing set
    data_visuals_test = load_visuals_data(path_vis_test, agg = agg)

    # merge training data with visuals data
    test = pd.merge(data_visuals_test, data_testing, left_index = True, right_index=True)
    
    # train/test split
    X_train = train.iloc[:, :-1]
    Y_train = train.iloc[:,-1]
    
    X_test = test.iloc[:, :-1]
    Y_test = test.iloc[:,-1]
       
    # LVW
    if classifier is not 'random_forest':
        
        features = LVW(X_train, Y_train, X_test, Y_test, 50, range(0, X_train.shape[1]-1), classifier)

        # train/test split
        X_train = X_train[X_train.columns.intersection(features)]
        X_test = X_test[X_test.columns.intersection(features)]

    
    return X_train, Y_train, X_test, Y_test

## KNN
Perform Feature Selection (LVW) for all models

In [12]:
# set seed
np.random.seed(1)

# Keep both Rows
tX, ty, vX, vy = load_train_test_split(classifier = 'knn', agg = 'none')
knn_scores = Classifier(tX, ty, vX, vy, classifier = 'knn')

# Sum
tX_sum, ty_sum, vX_sum, vy_sum = load_train_test_split(classifier = 'knn', agg = 'sum')
knn_scores_sum = Classifier(tX_sum, ty_sum, vX_sum, vy_sum, classifier = 'knn')

# Mean
tX_mean, ty_mean, vX_mean, vy_mean = load_train_test_split(classifier = 'knn', agg = 'mean')
knn_scores_mean = Classifier(tX_mean, ty_mean, vX_mean, vy_mean, classifier = 'knn')

# Max
tX_max, ty_max, vX_max, vy_max = load_train_test_split(classifier = 'knn', agg = 'max')
knn_scores_max = Classifier(tX_max, ty_max, vX_max, vy_max, classifier = 'knn')

# Min
tX_min, ty_min, vX_min, vy_min = load_train_test_split(classifier = 'knn', agg = 'min')
knn_scores_min = Classifier(tX_min, ty_min, vX_min, vy_min, classifier = 'knn')

# Keep only the first row
tX_first, ty_first, vX_first, vy_first = load_train_test_split(classifier = 'knn', agg = 'first')
knn_scores_first = Classifier(tX_first, ty_first, vX_first, vy_first, classifier = 'knn')

# Keep only the second row
tX_last, ty_last, vX_last, vy_last = load_train_test_split(classifier = 'knn', agg = 'last')
knn_scores_last = Classifier(tX_last, ty_last, vX_last, vy_last, classifier = 'knn')

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

#### Print Precision, Recall, F1

In [13]:
print("KNN using LVW Feature Selection")
print("---------------------------------------------------------------------------------------------------")

# none
print("KNN - Keep both rows")
print(knn_scores)
print("---------------------------------------------------------------------------------------------------")

# sum
print("KNN - Keep only columnwise sum")
print(knn_scores_sum)
print("---------------------------------------------------------------------------------------------------")

# mean
print("KNN - Keep only columnwise mean")
print(knn_scores_mean)
print("---------------------------------------------------------------------------------------------------")

# max
print("KNN - Keep only columnwise max")
print(knn_scores_max)
print("---------------------------------------------------------------------------------------------------")

# min
print("KNN - Keep only columnwise min")
print(knn_scores_min)
print("---------------------------------------------------------------------------------------------------")

# first
print("KNN - Keep only the first row")
print(knn_scores_first)
print("---------------------------------------------------------------------------------------------------")

# last
print("KNN - Keep only the last row")
print(knn_scores_last)
print("---------------------------------------------------------------------------------------------------")

KNN using LVW Feature Selection
---------------------------------------------------------------------------------------------------
KNN - Keep both rows
{'Precision': 0.6582142857142858, 'Recall': 0.8, 'F1': 0.7172261072261072}
---------------------------------------------------------------------------------------------------
KNN - Keep only columnwise sum
{'Precision': 0.7414285714285714, 'Recall': 0.86, 'F1': 0.7891919191919192}
---------------------------------------------------------------------------------------------------
KNN - Keep only columnwise mean
{'Precision': 0.700952380952381, 'Recall': 0.8400000000000001, 'F1': 0.760909090909091}
---------------------------------------------------------------------------------------------------
KNN - Keep only columnwise max
{'Precision': 0.7121031746031746, 'Recall': 0.8600000000000001, 'F1': 0.766028416028416}
---------------------------------------------------------------------------------------------------
KNN - Keep only columnwis

## Decision Tree
Perform Feature Selection (LVW) for all models

In [14]:
# set seed
np.random.seed(1)

# Keep both Rows
tX, ty, vX, vy = load_train_test_split(classifier = 'decision_tree', agg = 'none')
tree_scores = Classifier(tX, ty, vX, vy, classifier = 'decision_tree')

# Sum
tX_sum, ty_sum, vX_sum, vy_sum = load_train_test_split(classifier = 'decision_tree', agg = 'sum')
tree_scores_sum = Classifier(tX_sum, ty_sum, vX_sum, vy_sum, classifier = 'decision_tree')

# Mean
tX_mean, ty_mean, vX_mean, vy_mean = load_train_test_split(classifier = 'decision_tree', agg = 'mean')
tree_scores_mean = Classifier(tX_mean, ty_mean, vX_mean, vy_mean, classifier = 'decision_tree')

# Max
tX_max, ty_max, vX_max, vy_max = load_train_test_split(classifier = 'decision_tree', agg = 'max')
tree_scores_max = Classifier(tX_max, ty_max, vX_max, vy_max, classifier = 'decision_tree')

# Min
tX_min, ty_min, vX_min, vy_min = load_train_test_split(classifier = 'decision_tree', agg = 'min')
tree_scores_min = Classifier(tX_min, ty_min, vX_min, vy_min, classifier = 'decision_tree')

# Keep only the first row
tX_first, ty_first, vX_first, vy_first = load_train_test_split(classifier = 'decision_tree', agg = 'first')
tree_scores_first = Classifier(tX_first, ty_first, vX_first, vy_first, classifier = 'decision_tree')

# Keep only the second row
tX_last, ty_last, vX_last, vy_last = load_train_test_split(classifier = 'decision_tree', agg = 'last')
tree_scores_last = Classifier(tX_last, ty_last, vX_last, vy_last, classifier = 'decision_tree')

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


#### Print Precision, Recall, F1

In [15]:
print("Decision Tree using LVW Feature Selection")
print("---------------------------------------------------------------------------------------------------")

# none
print("Decision Tree - Keep both rows")
print(tree_scores)
print("---------------------------------------------------------------------------------------------------")

# sum
print("Decision Tree - Keep only columnwise sum")
print(tree_scores_sum)
print("---------------------------------------------------------------------------------------------------")

# mean
print("Decision Tree - Keep only columnwise mean")
print(tree_scores_mean)
print("---------------------------------------------------------------------------------------------------")

# max
print("Decision Tree - Keep only columnwise max")
print(tree_scores_max)
print("---------------------------------------------------------------------------------------------------")

# last
print("Decision Tree - Keep only columnwise min")
print(tree_scores_min)
print("---------------------------------------------------------------------------------------------------")

# first
print("Decision Tree - Keep only the first row")
print(tree_scores_first)
print("---------------------------------------------------------------------------------------------------")

# last
print("Decision Tree - Keep only the last row")
print(tree_scores_last)
print("---------------------------------------------------------------------------------------------------")

Decision Tree using LVW Feature Selection
---------------------------------------------------------------------------------------------------
Decision Tree - Keep both rows
{'Precision': 0.6221428571428571, 'Recall': 0.72, 'F1': 0.6706526806526806}
---------------------------------------------------------------------------------------------------
Decision Tree - Keep only columnwise sum
{'Precision': 0.7492857142857142, 'Recall': 0.7, 'F1': 0.6922727272727273}
---------------------------------------------------------------------------------------------------
Decision Tree - Keep only columnwise mean
{'Precision': 0.6916666666666667, 'Recall': 0.7, 'F1': 0.6814568764568764}
---------------------------------------------------------------------------------------------------
Decision Tree - Keep only columnwise max
{'Precision': 0.6441666666666667, 'Recall': 0.7, 'F1': 0.6219230769230768}
---------------------------------------------------------------------------------------------------
De

## Logistic Regression
Perform Feature Selection (LVW) for all models

In [16]:
# set seed
np.random.seed(1)

# Keep both Rows
tX, ty, vX, vy = load_train_test_split(classifier = 'logisticregression', agg = 'none')
reg_scores = Classifier(tX, ty, vX, vy, classifier = 'logisticregression')

# Sum
tX_sum, ty_sum, vX_sum, vy_sum = load_train_test_split(classifier = 'logisticregression', agg = 'sum')
reg_scores_sum = Classifier(tX_sum, ty_sum, vX_sum, vy_sum, classifier = 'logisticregression')

# Mean
tX_mean, ty_mean, vX_mean, vy_mean = load_train_test_split(classifier = 'logisticregression', agg = 'mean')
reg_scores_mean = Classifier(tX_mean, ty_mean, vX_mean, vy_mean, classifier = 'logisticregression')

# Max
tX_max, ty_max, vX_max, vy_max = load_train_test_split(classifier = 'logisticregression', agg = 'max')
reg_scores_max = Classifier(tX_max, ty_max, vX_max, vy_max, classifier = 'logisticregression')

# Min
tX_min, ty_min, vX_min, vy_min = load_train_test_split(classifier = 'logisticregression', agg = 'min')
reg_scores_min = Classifier(tX_min, ty_min, vX_min, vy_min, classifier = 'logisticregression')

# Keep only the first row
tX_first, ty_first, vX_first, vy_first = load_train_test_split(classifier = 'logisticregression', agg = 'first')
reg_scores_first = Classifier(tX_first, ty_first, vX_first, vy_first, classifier = 'logisticregression')

# Keep only the second row
tX_last, ty_last, vX_last, vy_last = load_train_test_split(classifier = 'logisticregression', agg = 'last')
reg_scores_last = Classifier(tX_last, ty_last, vX_last, vy_last, classifier = 'logisticregression')



























  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


























  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)




  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)






#### Print Precision, Recall, F1

In [17]:
print("Logistic Regression using LVW Feature Selection")
print("---------------------------------------------------------------------------------------------------")

# none
print("Logistic Regression - Keep both rows")
print(reg_scores)
print("---------------------------------------------------------------------------------------------------")

# sum
print("Logistic Regression - Keep only columnwise sum")
print(reg_scores_sum)
print("---------------------------------------------------------------------------------------------------")

# mean
print("Logistic Regression - Keep only columnwise mean")
print(reg_scores_mean)
print("---------------------------------------------------------------------------------------------------")

# max
print("Logistic Regression - Keep only columnwise max")
print(reg_scores_max)
print("---------------------------------------------------------------------------------------------------")

# last
print("Logistic Regression - Keep only columnwise min")
print(reg_scores_min)
print("---------------------------------------------------------------------------------------------------")

# first
print("Logistic Regression - Keep only the first row")
print(reg_scores_first)
print("---------------------------------------------------------------------------------------------------")

# last
print("Logistic Regression - Keep only the last row")
print(reg_scores_last)
print("---------------------------------------------------------------------------------------------------")

Logistic Regression using LVW Feature Selection
---------------------------------------------------------------------------------------------------
Logistic Regression - Keep both rows
{'Precision': 0.6703571428571429, 'Recall': 0.78, 'F1': 0.7128826728826729}
---------------------------------------------------------------------------------------------------
Logistic Regression - Keep only columnwise sum
{'Precision': 0.6922619047619047, 'Recall': 0.76, 'F1': 0.710003885003885}
---------------------------------------------------------------------------------------------------
Logistic Regression - Keep only columnwise mean
{'Precision': 0.5994047619047619, 'Recall': 0.8400000000000001, 'F1': 0.6974358974358974}
---------------------------------------------------------------------------------------------------
Logistic Regression - Keep only columnwise max
{'Precision': 0.5388888888888889, 'Recall': 1.0, 'F1': 0.7}
------------------------------------------------------------------------

## SVM (Gaussian Kernel)
Perform Feature Selection (LVW) for all models

In [18]:
# set seed
np.random.seed(1)

# Keep both Rows
tX, ty, vX, vy = load_train_test_split(classifier = 'svm', agg = 'none')
svm_scores = Classifier(tX, ty, vX, vy, classifier = 'svm')

# Sum
tX_sum, ty_sum, vX_sum, vy_sum = load_train_test_split(classifier = 'svm', agg = 'sum')
svm_scores_sum = Classifier(tX_sum, ty_sum, vX_sum, vy_sum, classifier = 'svm')

# Mean
tX_mean, ty_mean, vX_mean, vy_mean = load_train_test_split(classifier = 'svm', agg = 'mean')
svm_scores_mean = Classifier(tX_mean, ty_mean, vX_mean, vy_mean, classifier = 'svm')

# Max
tX_max, ty_max, vX_max, vy_max = load_train_test_split(classifier = 'svm', agg = 'max')
svm_scores_max = Classifier(tX_max, ty_max, vX_max, vy_max, classifier = 'svm')

# Min
tX_min, ty_min, vX_min, vy_min = load_train_test_split(classifier = 'svm', agg = 'min')
svm_scores_min = Classifier(tX_min, ty_min, vX_min, vy_min, classifier = 'svm')

# Keep only the first row
tX_first, ty_first, vX_first, vy_first = load_train_test_split(classifier = 'svm', agg = 'first')
svm_scores_first = Classifier(tX_first, ty_first, vX_first, vy_first, classifier = 'svm')

# Keep only the second row
tX_last, ty_last, vX_last, vy_last = load_train_test_split(classifier = 'svm', agg = 'last')
svm_scores_last = Classifier(tX_last, ty_last, vX_last, vy_last, classifier = 'svm')

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


#### Print Precision, Recall, F1

In [19]:
print("SVM using LVW Feature Selection")
print("---------------------------------------------------------------------------------------------------")

# none
print("SVM - Keep both rows")
print(svm_scores)
print("---------------------------------------------------------------------------------------------------")

# sum
print("SVM - Keep only columnwise sum")
print(svm_scores_sum)
print("---------------------------------------------------------------------------------------------------")

# mean
print("SVM - Keep only columnwise mean")
print(svm_scores_mean)
print("---------------------------------------------------------------------------------------------------")

# max
print("SVM - Keep only columnwise max")
print(svm_scores_max)
print("---------------------------------------------------------------------------------------------------")

# last
print("SVM - Keep only columnwise min")
print(svm_scores_min)
print("---------------------------------------------------------------------------------------------------")

# first
print("SVM - Keep only the first row")
print(svm_scores_first)
print("---------------------------------------------------------------------------------------------------")

# last
print("SVM - Keep only the last row")
print(svm_scores_last)
print("---------------------------------------------------------------------------------------------------")

SVM using LVW Feature Selection
---------------------------------------------------------------------------------------------------
SVM - Keep both rows
{'Precision': 0.5388888888888889, 'Recall': 1.0, 'F1': 0.7}
---------------------------------------------------------------------------------------------------
SVM - Keep only columnwise sum
{'Precision': 0.5080555555555555, 'Recall': 0.82, 'F1': 0.6173809523809524}
---------------------------------------------------------------------------------------------------
SVM - Keep only columnwise mean
{'Precision': 0.5, 'Recall': 0.6200000000000001, 'F1': 0.49093406593406586}
---------------------------------------------------------------------------------------------------
SVM - Keep only columnwise max
{'Precision': 0.5388888888888889, 'Recall': 1.0, 'F1': 0.7}
---------------------------------------------------------------------------------------------------
SVM - Keep only columnwise min
{'Precision': 0.5402777777777777, 'Recall': 0.9400

## Random Forest
Use all the features and perform feature selection in the model

In [20]:
# set seed
np.random.seed(1)

# Keep both Rows
tX, ty, vX, vy = load_train_test_split(classifier = 'random_forest', agg = 'none')
forest_scores = Classifier(tX, ty, vX, vy, classifier = 'random_forest')

# Sum
tX_sum, ty_sum, vX_sum, vy_sum = load_train_test_split(classifier = 'random_forest', agg = 'sum')
forest_scores_sum = Classifier(tX_sum, ty_sum, vX_sum, vy_sum, classifier = 'random_forest')

# Mean
tX_mean, ty_mean, vX_mean, vy_mean = load_train_test_split(classifier = 'random_forest', agg = 'mean')
forest_scores_mean = Classifier(tX_mean, ty_mean, vX_mean, vy_mean, classifier = 'random_forest')

# Max
tX_max, ty_max, vX_max, vy_max = load_train_test_split(classifier = 'random_forest', agg = 'max')
forest_scores_max = Classifier(tX_max, ty_max, vX_max, vy_max, classifier = 'random_forest')

# Min
tX_min, ty_min, vX_min, vy_min = load_train_test_split(classifier = 'random_forest', agg = 'min')
forest_scores_min = Classifier(tX_min, ty_min, vX_min, vy_min, classifier = 'random_forest')

# Keep only the first row
tX_first, ty_first, vX_first, vy_first = load_train_test_split(classifier = 'random_forest', agg = 'first')
forest_scores_first = Classifier(tX_first, ty_first, vX_first, vy_first, classifier = 'random_forest')

# Keep only the second row
tX_last, ty_last, vX_last, vy_last = load_train_test_split(classifier = 'random_forest', agg = 'last')
forest_scores_last = Classifier(tX_last, ty_last, vX_last, vy_last, classifier = 'random_forest')

#### Print Precision, Recall, F1

In [21]:
print("Random Forest using all features")
print("---------------------------------------------------------------------------------------------------")

# none
print("Random Forest - Keep both rows")
print(forest_scores)
print("---------------------------------------------------------------------------------------------------")

# sum
print("Random Forest - Keep only columnwise sum")
print(forest_scores_sum)
print("---------------------------------------------------------------------------------------------------")

# mean
print("Random Forest - Keep only columnwise mean")
print(forest_scores_mean)
print("---------------------------------------------------------------------------------------------------")

# max
print("Random Forest - Keep only columnwise max")
print(forest_scores_max)
print("---------------------------------------------------------------------------------------------------")

# last
print("Random Forest - Keep only columnwise min")
print(forest_scores_min)
print("---------------------------------------------------------------------------------------------------")

# first
print("Random Forest - Keep only the first row")
print(forest_scores_first)
print("---------------------------------------------------------------------------------------------------")

# last
print("Random Forest - Keep only the last row")
print(forest_scores_last)
print("---------------------------------------------------------------------------------------------------")

Random Forest using all features
---------------------------------------------------------------------------------------------------
Random Forest - Keep both rows
{'Precision': 0.5292857142857144, 'Recall': 0.5999999999999999, 'F1': 0.5912393162393162}
---------------------------------------------------------------------------------------------------
Random Forest - Keep only columnwise sum
{'Precision': 0.5326190476190477, 'Recall': 0.5399999999999999, 'F1': 0.5625990675990676}
---------------------------------------------------------------------------------------------------
Random Forest - Keep only columnwise mean
{'Precision': 0.5943650793650793, 'Recall': 0.58, 'F1': 0.5057142857142857}
---------------------------------------------------------------------------------------------------
Random Forest - Keep only columnwise max
{'Precision': 0.5954761904761904, 'Recall': 0.52, 'F1': 0.5257198357198357}
--------------------------------------------------------------------------------

## AdaBoost
Perform Feature Selection (LVW) for all models  

In [22]:
# set seed
np.random.seed(1)

# Keep both Rows
tX, ty, vX, vy = load_train_test_split(classifier = 'adaboost', agg = 'none')
ada_scores = Classifier(tX, ty, vX, vy, classifier = 'adaboost')

# Sum
tX_sum, ty_sum, vX_sum, vy_sum = load_train_test_split(classifier = 'adaboost', agg = 'sum')
ada_scores_sum = Classifier(tX_sum, ty_sum, vX_sum, vy_sum, classifier = 'adaboost')

# Mean
tX_mean, ty_mean, vX_mean, vy_mean = load_train_test_split(classifier = 'adaboost', agg = 'mean')
ada_scores_mean = Classifier(tX_mean, ty_mean, vX_mean, vy_mean, classifier = 'adaboost')

# Max
tX_max, ty_max, vX_max, vy_max = load_train_test_split(classifier = 'adaboost', agg = 'max')
ada_scores_max = Classifier(tX_max, ty_max, vX_max, vy_max, classifier = 'adaboost')

# Min
tX_min, ty_min, vX_min, vy_min = load_train_test_split(classifier = 'adaboost', agg = 'min')
ada_scores_min = Classifier(tX_min, ty_min, vX_min, vy_min, classifier = 'adaboost')

# Keep only the first row
tX_first, ty_first, vX_first, vy_first = load_train_test_split(classifier = 'adaboost', agg = 'first')
ada_scores_first = Classifier(tX_first, ty_first, vX_first, vy_first, classifier = 'adaboost')

# Keep only the second row
tX_last, ty_last, vX_last, vy_last = load_train_test_split(classifier = 'adaboost', agg = 'last')
ada_scores_last = Classifier(tX_last, ty_last, vX_last, vy_last, classifier = 'adaboost')

#### Print Precision, Recall, F1

In [23]:
print("AdaBoost using LVW Feature Selection")
print("---------------------------------------------------------------------------------------------------")

# none
print("AdaBoost - Keep both rows")
print(ada_scores)
print("---------------------------------------------------------------------------------------------------")

# sum
print("AdaBoost - Keep only columnwise sum")
print(ada_scores_sum)
print("---------------------------------------------------------------------------------------------------")

# mean
print("AdaBoost - Keep only columnwise mean")
print(ada_scores_mean)
print("---------------------------------------------------------------------------------------------------")

# max
print("AdaBoost - Keep only columnwise max")
print(ada_scores_max)
print("---------------------------------------------------------------------------------------------------")

# last
print("AdaBoost - Keep only columnwise min")
print(ada_scores_min)
print("---------------------------------------------------------------------------------------------------")

# first
print("AdaBoost - Keep only the first row")
print(ada_scores_first)
print("---------------------------------------------------------------------------------------------------")

# last
print("AdaBoost - Keep only the last row")
print(ada_scores_last)
print("---------------------------------------------------------------------------------------------------")

AdaBoost using LVW Feature Selection
---------------------------------------------------------------------------------------------------
AdaBoost - Keep both rows
{'Precision': 0.6814285714285715, 'Recall': 0.78, 'F1': 0.7165656565656565}
---------------------------------------------------------------------------------------------------
AdaBoost - Keep only columnwise sum
{'Precision': 0.6344047619047619, 'Recall': 0.76, 'F1': 0.6729148629148629}
---------------------------------------------------------------------------------------------------
AdaBoost - Keep only columnwise mean
{'Precision': 0.6697619047619048, 'Recall': 0.72, 'F1': 0.6854545454545454}
---------------------------------------------------------------------------------------------------
AdaBoost - Keep only columnwise max
{'Precision': 0.5388888888888889, 'Recall': 1.0, 'F1': 0.7}
---------------------------------------------------------------------------------------------------
AdaBoost - Keep only columnwise min
{'Pr

## Gradient Boosting Tree
Perform Feature Selection (LVW) for all models  

In [24]:
# set seed
np.random.seed(1)

# Keep both Rows
tX, ty, vX, vy = load_train_test_split(classifier = 'gradient_boost', agg = 'none')
grad_scores = Classifier(tX, ty, vX, vy, classifier = 'gradient_boost')

# Sum
tX_sum, ty_sum, vX_sum, vy_sum = load_train_test_split(classifier = 'gradient_boost', agg = 'sum')
grad_scores_sum = Classifier(tX_sum, ty_sum, vX_sum, vy_sum, classifier = 'gradient_boost')

# Mean
tX_mean, ty_mean, vX_mean, vy_mean = load_train_test_split(classifier = 'gradient_boost', agg = 'mean')
grad_scores_mean = Classifier(tX_mean, ty_mean, vX_mean, vy_mean, classifier = 'gradient_boost')

# Max
tX_max, ty_max, vX_max, vy_max = load_train_test_split(classifier = 'gradient_boost', agg = 'max')
grad_scores_max = Classifier(tX_max, ty_max, vX_max, vy_max, classifier = 'gradient_boost')

# Min
tX_min, ty_min, vX_min, vy_min = load_train_test_split(classifier = 'gradient_boost', agg = 'min')
grad_scores_min = Classifier(tX_min, ty_min, vX_min, vy_min, classifier = 'gradient_boost')

# Keep only the first row
tX_first, ty_first, vX_first, vy_first = load_train_test_split(classifier = 'gradient_boost', agg = 'first')
grad_scores_first = Classifier(tX_first, ty_first, vX_first, vy_first, classifier = 'gradient_boost')

# Keep only the second row
tX_last, ty_last, vX_last, vy_last = load_train_test_split(classifier = 'gradient_boost', agg = 'last')
grad_scores_last = Classifier(tX_last, ty_last, vX_last, vy_last, classifier = 'gradient_boost')

#### Print Precision, Recall, F1

In [25]:
print("Gradient Boosting Tree using LVW Feature Selection")
print("---------------------------------------------------------------------------------------------------")

# none
print("Gradient Boosting Tree - Keep both rows")
print(grad_scores)
print("---------------------------------------------------------------------------------------------------")

# sum
print("Gradient Boosting Tree - Keep only columnwise sum")
print(grad_scores_sum)
print("---------------------------------------------------------------------------------------------------")

# mean
print("Gradient Boosting Tree - Keep only columnwise mean")
print(grad_scores_mean)
print("---------------------------------------------------------------------------------------------------")

# max
print("Gradient Boosting Tree - Keep only columnwise max")
print(grad_scores_max)
print("---------------------------------------------------------------------------------------------------")

# last
print("Gradient Boosting Tree - Keep only columnwise min")
print(grad_scores_min)
print("---------------------------------------------------------------------------------------------------")

# first
print("Gradient Boosting Tree - Keep only the first row")
print(grad_scores_first)
print("---------------------------------------------------------------------------------------------------")

# last
print("Gradient Boosting Tree - Keep only the last row")
print(grad_scores_last)
print("---------------------------------------------------------------------------------------------------")

Gradient Boosting Tree using LVW Feature Selection
---------------------------------------------------------------------------------------------------
Gradient Boosting Tree - Keep both rows
{'Precision': 0.6659523809523809, 'Recall': 0.72, 'F1': 0.6903030303030303}
---------------------------------------------------------------------------------------------------
Gradient Boosting Tree - Keep only columnwise sum
{'Precision': 0.6807142857142857, 'Recall': 0.72, 'F1': 0.6810606060606061}
---------------------------------------------------------------------------------------------------
Gradient Boosting Tree - Keep only columnwise mean
{'Precision': 0.6651190476190476, 'Recall': 0.7, 'F1': 0.6896503496503497}
---------------------------------------------------------------------------------------------------
Gradient Boosting Tree - Keep only columnwise max
{'Precision': 0.6216666666666667, 'Recall': 0.6799999999999999, 'F1': 0.6382905982905983}
-----------------------------------------

# Decide which Aggregation to keep

In [26]:
def get_min_difference_aggregation(dict_scores, dict_none, dict_sum, dict_mean, dict_max, dict_min,
                                   dict_first, dict_last):
        
    # subtract the keys
    diff_none = {key: abs(dict_scores[key] - dict_none.get(key, 0)) for key in dict_scores.keys()}
    diff_sum = {key: abs(dict_scores[key] - dict_sum.get(key, 0)) for key in dict_scores.keys()}
    diff_mean = {key: abs(dict_scores[key] - dict_mean.get(key, 0)) for key in dict_scores.keys()}
    diff_max = {key: abs(dict_scores[key] - dict_max.get(key, 0)) for key in dict_scores.keys()}
    diff_min = {key: abs(dict_scores[key] - dict_min.get(key, 0)) for key in dict_scores.keys()}
    diff_first = {key: abs(dict_scores[key] - dict_first.get(key, 0)) for key in dict_scores.keys()}
    diff_last = {key: abs(dict_scores[key] - dict_last.get(key, 0)) for key in dict_scores.keys()}

    # get min of differences
    min_diff = {'None': sum(diff_none.values()),
                'Sum': sum(diff_sum.values()),
                'Mean': sum(diff_mean.values()),
                'Max': sum(diff_max.values()),
                'Min': sum(diff_min.values()),
                'First': sum(diff_first.values()),
                'Last': sum(diff_last.values())}
    
    print('Min difference of aggregations: ' ,min(min_diff, key = min_diff.get))

In [27]:
# dictionary with values from table 2
knn_table2 = {'Precision': 0.582, 'Recall': 0.636, 'F1': 0.608}
tree_table2 = {'Precision': 0.521, 'Recall': 0.550, 'F1': 0.535}
reg_table2 = {'Precision': 0.616, 'Recall': 0.600, 'F1': 0.608}
svm_table2 = {'Precision': 0.511, 'Recall': 0.670, 'F1': 0.580}
forest_table2 = {'Precision': 0.614, 'Recall': 0.664, 'F1': 0.638}
ada_table2 = {'Precision': 0.601, 'Recall': 0.717, 'F1': 0.654}
grad_table2 = {'Precision': 0.561, 'Recall': 0.616, 'F1': 0.587}

print("KNN")
get_min_difference_aggregation(knn_table2, knn_scores, knn_scores_sum, knn_scores_mean, knn_scores_max,
                               knn_scores_min, knn_scores_first, knn_scores_last)
print("Decision Tree")
get_min_difference_aggregation(tree_table2, tree_scores, tree_scores_sum, tree_scores_mean, tree_scores_max,
                               tree_scores_min, tree_scores_first, tree_scores_last)
print("Logistic Regression")
get_min_difference_aggregation(reg_table2, reg_scores, reg_scores_sum, reg_scores_mean, reg_scores_max,
                               reg_scores_min, reg_scores_first, reg_scores_last)
print("SVM")
get_min_difference_aggregation(svm_table2, svm_scores, svm_scores_sum, svm_scores_mean, svm_scores_max,
                               svm_scores_min, svm_scores_first, svm_scores_last)
print("Random Forest (not stable!)")
get_min_difference_aggregation(forest_table2, forest_scores, forest_scores_sum, forest_scores_mean, forest_scores_max,
                               forest_scores_min, forest_scores_first, forest_scores_last)
print("AdaBoost")
get_min_difference_aggregation(ada_table2, ada_scores, ada_scores_sum, ada_scores_mean, ada_scores_max,
                               ada_scores_min, ada_scores_first, ada_scores_last)
print("Gradient Boosting Tree")
get_min_difference_aggregation(grad_table2, grad_scores, grad_scores_sum, grad_scores_mean, grad_scores_max,
                               grad_scores_min, grad_scores_first, grad_scores_last)

KNN
Min difference of aggregations:  First
Decision Tree
Min difference of aggregations:  Max
Logistic Regression
Min difference of aggregations:  Sum
SVM
Min difference of aggregations:  Mean
Random Forest (not stable!)
Min difference of aggregations:  Last
AdaBoost
Min difference of aggregations:  Min
Gradient Boosting Tree
Min difference of aggregations:  Max


Since keeping only the maximum values of each column works best for two of the models we are going to use this aggregation method for the classifier stacking.  
But first, we have to check if the Precision, Recall and F1 scores are all > 0.5:

In [29]:
# check if Precision, Recall and F1 are > 0.5
print(sum(value > 0.5 for value in knn_scores_max.values()) == 3 and
      sum(value > 0.5 for value in tree_scores_max.values()) == 3 and
      sum(value > 0.5 for value in reg_scores_max.values()) == 3 and
      sum(value > 0.5 for value in svm_scores_max.values()) == 3 and
      sum(value > 0.5 for value in forest_scores_max.values()) == 3 and
      sum(value > 0.5 for value in ada_scores_max.values()) == 3 and
      sum(value > 0.5 for value in grad_scores_max.values()) == 3)

True


We compare the scores of our models with the ones from the paper:

In [32]:
print("KNN")
print("Scores from the paper: ", knn_table2)
print("Scores from our models: ", knn_scores_max)
print("---------------------------------------------------------------------------------------------------")

print("Decision Tree")
print("Scores from the paper: ", tree_table2)
print("Scores from our models: ", tree_scores_max)
print("---------------------------------------------------------------------------------------------------")

print("Logistic Regression")
print("Scores from the paper: ", reg_table2)
print("Scores from our models: ", reg_scores_max)
print("---------------------------------------------------------------------------------------------------")

print("SVM")
print("Scores from the paper: ", svm_table2)
print("Scores from our models: ", svm_scores_max)
print("---------------------------------------------------------------------------------------------------")

print("Random Forest (not stable!)")
print("Scores from the paper: ", forest_table2)
print("Scores from our models: ", forest_scores_max)
print("---------------------------------------------------------------------------------------------------")

print("AdaBoost")
print("Scores from the paper: ", ada_table2)
print("Scores from our models: ", ada_scores_max)
print("---------------------------------------------------------------------------------------------------")

print("Gradient Boosting Tree")
print("Scores from the paper: ", grad_table2)
print("Scores from our models: ", grad_scores_max)
print("---------------------------------------------------------------------------------------------------")

KNN
Scores from the paper:  {'Precision': 0.582, 'Recall': 0.636, 'F1': 0.608}
Scores from our models:  {'Precision': 0.7121031746031746, 'Recall': 0.8600000000000001, 'F1': 0.766028416028416}
---------------------------------------------------------------------------------------------------
Decision Tree
Scores from the paper:  {'Precision': 0.521, 'Recall': 0.55, 'F1': 0.535}
Scores from our models:  {'Precision': 0.6441666666666667, 'Recall': 0.7, 'F1': 0.6219230769230768}
---------------------------------------------------------------------------------------------------
Logistic Regression
Scores from the paper:  {'Precision': 0.616, 'Recall': 0.6, 'F1': 0.608}
Scores from our models:  {'Precision': 0.5388888888888889, 'Recall': 1.0, 'F1': 0.7}
---------------------------------------------------------------------------------------------------
SVM
Scores from the paper:  {'Precision': 0.511, 'Recall': 0.67, 'F1': 0.58}
Scores from our models:  {'Precision': 0.5388888888888889, 'Reca