In [None]:
# Do all necessary preprocessing, calling prepro.py
import utils
from utils import *
importlib.reload(utils)
from utils import *

In [None]:
# Load data
X, X_test_original, y = load_data() 
y = y.ravel()
scores = np.array([])
xtrain = X  # For andreas cross validation
ytrain = y

# NN1 model

In [None]:
class NN1():
    def __init__(self):
        self.mlp = KerasClassifier(build_fn=self.create_model, 
                                   epochs=25, batch_size=35, 
                                   verbose=1)
        return
    
    def score(self, X_, y_):
        pred = self.predict(X_)
        if y_.shape[1] < 2:
            y_normal = y_
        else:
            y_normal = np.argmax(y_, axis=1)
        BMAC = balanced_accuracy_score(y_normal, pred)
        return BMAC
    
    def fit(self, X_, y_):
        # One hot encode data
        y_enc = np.zeros((y_.shape[0], 3))
        y_enc[np.arange(y_.shape[0]), y_] = 1
        
        xscaled = preprocessing.StandardScaler().fit_transform(X_)
        cw = sklearn.utils.class_weight.compute_class_weight('balanced', np.unique(y_), y_)
        class_weight = {0: cw[0], 1: cw[1], 2: cw[2]}
        self.mlp.fit(xscaled, y_enc, class_weight=class_weight)
        return
    
    def predict(self, X_):
        return self.mlp.predict(preprocessing.StandardScaler().fit_transform(X_))
    
    def create_model(self):
        # create model
        neurons = 30
        dropout_rate = 0.7
        weight_constraint = 4
        model = Sequential()
        model.add(Dense(neurons, input_dim=1000, activation='relu', kernel_constraint=maxnorm(weight_constraint)))
        model.add(Dropout(dropout_rate))
        model.add(Dense(neurons, activation='relu', kernel_constraint=maxnorm(weight_constraint)))
        model.add(Dropout(dropout_rate*0.4))
        model.add(Dense(int(neurons/2), activation='relu', kernel_constraint=maxnorm(weight_constraint)))
        model.add(Dropout(dropout_rate*0.2))
        model.add(Dense(3, activation='sigmoid'))
        model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
        return model

    def predict_proba(self, X_):
        return self.mlp.predict_proba(preprocessing.StandardScaler().fit_transform(X_))

# nn = NN1()
# nn.fit(X[1:10], y[1:10])
# ypredy = nn.predict(X[1:10])
# probbb = nn.predict_proba(X[1:10])



# Ensemble dei modelliiiii

In [None]:
# Define the ensemble voting classifier fn:

def ensemby(y1_prob, y2_prob, voting='soft'):
    # Return the argmax of the sum of the probabilities
    return np.argmax(y1_prob + y2_prob, axis=1)

In [None]:
# Big cross val for loop

# Try a nn Estimator with SMOTE

kf = KFold(n_splits=5, shuffle = True)

BMAC_means = np.array([])
BMAC_stds = np.array([])
BMAC_scores = np.array([])
svm_scores = np.array([])
nn_scores = np.array([])
for train_index, test_index in kf.split(xtrain):

    # Prepare the data
    x_train = xtrain[train_index]
    x_test = xtrain[test_index]
    y_train = ytrain[train_index]
    y_test = ytrain[test_index]

    # Prepare the SVM
    andreas_svm = svm.SVC(C=0.5, class_weight='balanced', degree=1, gamma='auto', kernel='rbf', probability=True)
    steps = [("scaler", preprocessing.StandardScaler()), ("classifier", andreas_svm)]
    svm_pipeline = Pipeline(steps = steps)
    
    # Prepare the NN1
    nn = NN1()
    
    # Models to fit
    print("Fitting SVM...")
    svm_pipeline.fit(x_train, y_train.ravel())
    print("Fitting NN...")
    nn.fit(x_train, y_train.ravel())
    
    # Predict and join the predictions
    svm_pred = svm_pipeline.predict(x_test)
    nn_pred = nn.predict(x_test)
    svm_prob = svm_pipeline.predict_proba(x_test)
    nn_prob = nn.predict_proba(x_test)
    ensemble_pred = ensemby(svm_prob, nn_prob)
    
    # Record scores
    BMAC_ensemble = balanced_accuracy_score(y_test, ensemble_pred)
    BMAC_svm = balanced_accuracy_score(y_test, svm_pred)
    BMAC_nn = balanced_accuracy_score(y_test, nn_pred)
    print("BMAC Ensemble Scores: ", BMAC_ensemble)
    print("BMAC SVM Scores: ", BMAC_svm)
    print("BMAC NN Scores: ", BMAC_nn)
    BMAC_scores = np.append(BMAC_scores, BMAC_ensemble)
    svm_scores = np.append(svm_scores, BMAC_svm)
    nn_scores = np.append(nn_scores, BMAC_nn)
    
BMAC_means = np.append(BMAC_means, np.mean(BMAC_scores))
BMAC_stds = np.append(BMAC_stds, np.std(BMAC_scores))

print("Scores:", BMAC_scores)
print("SVM Scores:", svm_scores)
print("NN Scores:", nn_scores)
print("Mean Scores:", BMAC_means)
print("Std Scores:", BMAC_stds)