In [1]:
import pandas as pd
import numpy as np
import sklearn as sk
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier


np.random.seed(12)
no_examples = 100

Y = pd.DataFrame(np.random.randint(0,2,no_examples), index=np.arange(no_examples), columns=['Class'])
X = pd.DataFrame(np.random.randn(no_examples, 6),
   index= np.arange(no_examples), columns=list('ABCDEF'))

df = pd.concat([X,Y], axis=1)

print(df)

           A         B         C         D         E         F  Class
0   0.214976 -0.384359 -0.253904  0.073252 -0.997204 -0.713856      1
1   0.035416 -0.677945 -0.571881 -0.105862  1.335831  0.318665      1
2  -0.337595 -0.585268 -0.114920  2.241818 -3.147417  0.535136      0
3   0.232490  0.867612 -1.148213  2.114344  1.000943 -0.051415      1
4   0.159788 -0.716264  0.050523 -0.143337  0.943575  0.357644      1
..       ...       ...       ...       ...       ...       ...    ...
95 -0.566379  0.023273 -0.837898 -0.792902 -1.451985 -1.126614      1
96 -1.889230  1.151715  0.807165  0.045266 -0.154393  1.489950      1
97  0.519590 -0.709493  0.058287  0.335189  0.284258  0.312743      0
98 -1.676751 -0.786611 -2.006107  0.313083 -0.408450 -1.280312      1
99  2.202103  1.291359 -0.490241  0.835403 -0.610706  0.966344      1

[100 rows x 7 columns]


In [5]:
np.random.seed(12)

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, shuffle=False)
Y_train = np.ravel(Y_train)

def run_classifier(x_train, y_train, x_test, y_test):
    clf = MLPClassifier(hidden_layer_sizes=(10,), solver='sgd', learning_rate_init=0.01, max_iter=10000)
    clf.fit(x_train, y_train)

    preds = clf.predict(x_test)
    score = clf.score(x_test, y_test)

    return score

print(run_classifier(X_train, Y_train, X_test, Y_test))

0.55


In [6]:
np.random.seed(12)

def feature_selection(x_train, y_train, x_test, y_test, no_of_iterations = 100):
    results = {}

    # Iterates through each column
    for column in x_train:
        x_train_copy = x_train.copy()
        score_list = []

        # Shuffles the columns and takes result at n iterations
        for _ in range(no_of_iterations):
            x_train_copy[column] = np.random.permutation(x_train[column].values)

            score = run_classifier(X_train, Y_train, X_test, Y_test)

            score_list.append(score)

        # Updates dictionary that appends list of scores
        results[column] = score_list
    
    return results

score_dict = feature_selection(X_train, Y_train, X_test, Y_test, 5)
print(score_dict)

{'A': [0.4, 0.5, 0.35, 0.5, 0.4], 'B': [0.45, 0.65, 0.4, 0.45, 0.5], 'C': [0.4, 0.5, 0.55, 0.45, 0.4], 'D': [0.6, 0.45, 0.25, 0.5, 0.3], 'E': [0.3, 0.6, 0.7, 0.4, 0.4], 'F': [0.35, 0.45, 0.5, 0.45, 0.45]}
