In [None]:
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.linear_model import LogisticRegression

from sklearn.model_selection import cross_val_predict, StratifiedKFold

import pickle, numpy as np

from utilities import get_train_data, get_test_data

PICKLE_FOLDER_PATH = ...

TRAIN_FILENAME = ...
TEST_FILENAME  = ...

In [None]:
X_CNGR_train = pickle.load(open(PICKLE_FOLDER_PATH + "X_CNGR_train.p", "rb" ))
X_CNGR_test  = pickle.load(open(PICKLE_FOLDER_PATH + "X_CNGR_test.p", "rb" ))

X_TNGR_train = pickle.load(open(PICKLE_FOLDER_PATH + "X_TNGR_train.p", "rb" ))
X_TNGR_test  = pickle.load(open(PICKLE_FOLDER_PATH + "X_TNGR_test.p", "rb" ))

X_CIMP_task1_train = pickle.load(open(PICKLE_FOLDER_PATH + "X_CIMP_task1_train.p", "rb" ))
X_CIMP_task1_test  = pickle.load(open(PICKLE_FOLDER_PATH + "X_CIMP_task1_test.p", "rb" ))

X_CIMP_task2_train = pickle.load(open(PICKLE_FOLDER_PATH + "X_CIMP_task2_train.p", "rb" ))
X_CIMP_task2_test  = pickle.load(open(PICKLE_FOLDER_PATH + "X_CIMP_task2_test.p", "rb" ))

X_TIMP_task1_train = pickle.load(open(PICKLE_FOLDER_PATH + "X_TIMP_task1_train.p", "rb" ))
X_TIMP_task1_test  = pickle.load(open(PICKLE_FOLDER_PATH + "X_TIMP_task1_test.p", "rb" ))

X_TIMP_task2_train = pickle.load(open(PICKLE_FOLDER_PATH + "X_TIMP_task2_train.p", "rb" ))
X_TIMP_task2_test  = pickle.load(open(PICKLE_FOLDER_PATH + "X_TIMP_task2_test.p", "rb" ))

X_EMB_train = pickle.load(open(PICKLE_FOLDER_PATH + "X_EMB_train.p", "rb" ))
X_EMB_test  = pickle.load(open(PICKLE_FOLDER_PATH + "X_EMB_test.p", "rb" ))

In [None]:
_, y1, y2 = get_train_data(TRAIN_FILENAME)

In [None]:
def get_META_feats(clf, X_train, X_test, y, seeds=[42]):
    feats_train = []
    for seed in seeds:
        skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
        feats_train.append(cross_val_predict(clf, X_train, y=y, method='predict_proba', cv=skf, n_jobs=-1))
    feats_train = np.mean(feats_train, axis=0)
    
    clf.fit(X_train, y)
    feats_test = clf.predict_proba(X_test)
    
    return feats_train, feats_test

## TASK 1 - Base level predictions

In [None]:
%%time
clfs_task1 = [LogisticRegression(class_weight='balanced'),
              ExtraTreesClassifier(n_estimators=100, criterion='entropy', n_jobs=-1),
              ExtraTreesClassifier(n_estimators=100, criterion='gini', n_jobs=-1)]

base_feats_task1 = [(X_CNGR_train, X_CNGR_test),
                    (X_TNGR_train, X_TNGR_test),
                    (X_CIMP_task1_train, X_CIMP_task1_test),
                    (X_TIMP_task1_train, X_TIMP_task1_test),
                    (X_EMB_train, X_EMB_test)]

X_META_task1_train = []
X_META_task1_test  = []
for X_train, X_test in base_feats_task1:
    for clf in clfs_task1:
        feats = get_META_feats(clf, X_train, X_test, y1)
        X_META_task1_train.append(feats[0])
        X_META_task1_test.append(feats[1])
        
X_META_task1_train = np.concatenate(X_META_task1_train, axis=1)
X_META_task1_test  = np.concatenate(X_META_task1_test, axis=1)

## TASK 2  - Base level predictions

In [None]:
%%time
clfs_task2 = [LogisticRegression(class_weight='balanced'),
              ExtraTreesClassifier(n_estimators=150, criterion='entropy', n_jobs=-1),
              ExtraTreesClassifier(n_estimators=150, criterion='gini', n_jobs=-1)]

base_feats_task2 = [(X_CNGR_train, X_CNGR_test),
                    (X_TNGR_train, X_TNGR_test),
                    (X_CIMP_task2_train, X_CIMP_task2_test),
                    (X_TIMP_task2_train, X_TIMP_task2_test),
                    (X_EMB_train, X_EMB_test)]

X_META_task2_train = []
X_META_task2_test  = []
for X_train, X_test in base_feats_task2:
    for clf in clfs_task2:
        feats = get_META_feats(clf, X_train, X_test, y2)
        X_META_task2_train.append(feats[0])
        X_META_task2_test.append(feats[1])
        
X_META_task2_train = np.concatenate(X_META_task2_train, axis=1)
X_META_task2_test  = np.concatenate(X_META_task2_test, axis=1)

## Meta level predictions

In [None]:
clf_task1 = LogisticRegression(C=0.17, class_weight='balanced')
clf_task1.fit(X_META_task1_train, y1)

clf_task2 = LogisticRegression(C=0.2, class_weight='balanced')
clf_task2.fit(X_META_task2_train, y2)  

preds_task1 = clf_task1.predict(X_META_task1_test)    
preds_task2 = clf_task2.predict(X_META_task2_test)