In [1]:
import numpy as np
from sklearn.decomposition import PCA, TruncatedSVD
from sklearn.preprocessing import scale, MinMaxScaler
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier


  from numpy.core.umath_tests import inner1d


In [2]:
# read data
X = np.loadtxt('train.dat')
y = np.loadtxt('train.labels')

X_R_test = np.loadtxt('test.dat')


In [3]:
def make_prediction(model, X_test, y_test):
    y_pred = model.predict(X_test)
    f11 = f1_score(y_test, y_pred, average = 'micro')
    return y_pred, f11

def MLP_classifier(skf, hidden_layer_sizes=(5,3),max_iter=200,n_components=8, dr='pca'):

    f11_sum = 0
    model = MLPClassifier(hidden_layer_sizes=hidden_layer_sizes,learning_rate_init=0.0001,max_iter=max_iter)

    for train_index, test_index in skf.split(X, y):

        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

#         scale(X_train, copy = False)
#         scale(X_test, copy = False)
#         scaler1 = MinMaxScaler()
#         scaler2 = MinMaxScaler()
#         scaler1.fit_transform(X_train)
#         scaler2.fit_transform(X_test)

        if dr == 'pca':
            pca = PCA(n_components=n_components)
            X_train_reduced = pca.fit_transform(X_train)
            X_test_reduced = pca.fit_transform(X_test)

        elif dr == 'svd':
            svd = TruncatedSVD(n_components=n_components, n_iter=7, random_state=42)
            X_train_reduced = svd.fit_transform(X_train)
            X_test_reduced = svd.fit_transform(X_test)
        # model
        model.fit(X_train_reduced, y_train)

        # predict
        y_pred, f11 = make_prediction(model, X_test_reduced, y_test)
        print('weighted: ', f11)

        f11_sum += f11

    f11 = f11_sum/5
    

    
    return f11, model

In [4]:
skf = StratifiedKFold(n_splits = 5, shuffle = True, random_state = 1)

In [None]:
best_score = 0
best_i_j = 0

hidden_layer_sizes = [(15),(20,5)]
max_iter = [500, 800, 1000]
n_components = [20]
for i in range(len(hidden_layer_sizes)):
    for j in range(len(max_iter)):
        for k in range(len(n_components)):
            f11, model = MLP_classifier(skf, hidden_layer_sizes=hidden_layer_sizes[i], max_iter=max_iter[j], n_components=n_components[k], dr='svd')
            if f11 >= best_score:
                best_score = f11
                best_i_j_k = [hidden_layer_sizes[i], max_iter[j], n_components[k]]
            print(best_score, hidden_layer_sizes[i], max_iter[j], n_components[k])



weighted:  0.5364300872435747




weighted:  0.5285377358490566


In [None]:
# dr = TruncatedSVD(n_components=best_i_j_k[2], n_iter=7, random_state=42)
# # dr = PCA(n_components=best_i_j_k[2])

# X_reduced = dr.fit_transform(X)
# X_R_test_reduced = dr.fit_transform(X_R_test)

In [None]:
# model = RandomForestClassifier(n_estimators=best_i_j_k[0], max_depth=best_i_j_k[1], random_state=0)
# model.fit(X_reduced, y)

# pred = model.predict(X_R_test_reduced)

# with open("result.dat", "w") as f: 
#     for i in pred:
#         f.write(str(int(i)))
#         f.write('\n')

In [None]:
# # [220, 30, 8] 666905
# dr = TruncatedSVD(n_components=8, n_iter=7, random_state=42)
# # dr = PCA(n_components=best_i_j_k[2])

# X_reduced = dr.fit_transform(X)
# X_R_test_reduced = dr.fit_transform(X_R_test)

# model = RandomForestClassifier(n_estimators=220, max_depth=30, random_state=0)
# model.fit(X_reduced, y)

# pred = model.predict(X_R_test_reduced)

# with open("result.dat", "w") as f: 
#     for i in pred:
#         f.write(str(int(i)))
#         f.write('\n')