# Phase 1 Classification

## MLP

In [22]:
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
import scipy.io as sio

mat_data = sio.loadmat('X_train.mat')
X = mat_data['X_train']
mat_data = sio.loadmat('X_test.mat')
X_test = mat_data['X_test']
mat_data = sio.loadmat('label.mat')
y = mat_data['trainLabel']
y = y.reshape(550,1)
param_grid = {
    'activation': ['identity', 'logistic', 'tanh', 'relu'],
    'hidden_layer_sizes': [(i,) for i in range(10, 31)],
    'max_iter': [1200],  # Number of epochs
}

mlp = MLPClassifier()

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

grid_search_phase1 = GridSearchCV(mlp, param_grid, scoring='accuracy', cv=cv, n_jobs=-1)
grid_search_phase1.fit(X, y)

print("Best Hyperparameters: ", grid_search_phase1.best_params_)
print("Best Accuracy: {:.2f}%".format(grid_search_phase1.best_score_ * 100))


  y = column_or_1d(y, warn=True)


Best Hyperparameters:  {'activation': 'tanh', 'hidden_layer_sizes': (13,), 'max_iter': 1200}
Best Accuracy: 84.00%


## RBF Neural Network

In [25]:
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import torch.nn as nn
import torch
import torch.optim as optim
from sklearn.metrics import accuracy_score
from torch.utils.data import DataLoader, TensorDataset
import scipy.io as sio
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import ParameterGrid

mat_data = sio.loadmat('X_train.mat')
X = mat_data['X_train']
mat_data = sio.loadmat('X_test.mat')
X_test = mat_data['X_test']
mat_data = sio.loadmat('label.mat')
y = mat_data['trainLabel']
y = y.reshape(550,1)
for i in range(len(y)):
    if y[i]==-1:
        y[i]=0
X_train_tensor = torch.Tensor(X)
y_train_tensor = torch.LongTensor(y)
X_test_tensor = torch.Tensor(X_test)
y_train = y
# Create DataLoader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

class RBFNet(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, radius):
        super(RBFNet, self).__init__()
        self.centers = nn.Parameter(torch.randn(hidden_dim, input_dim))
        # self.hidden_dim = hidden_dim
        # self.input_dim = input_dim
        self.beta = nn.Parameter(torch.ones(hidden_dim))
        self.linear = nn.Linear(hidden_dim, output_dim)
        self.radius = radius

    def radial_basis(self, x):
        # km = KMeans(n_clusters= self.hidden_dim, random_state=0, n_init='auto')
        # km.fit(x)
        # self.centers = torch.Tensor(np.array(km.cluster_centers_))
        return torch.exp(-self.radius * ((x.unsqueeze(1) - self.centers) ** 2).sum(dim=2))

    def forward(self, x):
        rbf = self.radial_basis(x)
        out = self.linear(rbf)
        return out

best_accuracy = 0.0
best_params = {}

param_grid = {
    'radius': [0.005,0.0125,0.025,0.05,0.1],
    'hidden_dim': [15,30, 40,60,70,75,80]
}
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
for params in ParameterGrid(param_grid):
    rbf_net = RBFNet(input_dim=X.shape[1], output_dim=len(np.unique(y_train)), **params)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(rbf_net.parameters(), lr=0.01)
    acc = 0
    for train_idx, val_idx in cv.split(X, y):
        for epoch in range(300):
            optimizer.zero_grad()
            outputs = rbf_net(X_train_tensor[train_idx])
            loss = criterion(outputs, y_train_tensor[train_idx].squeeze()) 
            loss.backward()
            optimizer.step()

        with torch.no_grad():
            rbf_net.eval()
            y_pred = torch.argmax(rbf_net(X_train_tensor[val_idx]), dim=1).numpy()
            accuracy = accuracy_score(y_train[val_idx], y_pred)
        acc+=accuracy
    accuracy = acc/5
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_params = params

print("Best Parameters:", best_params)
print("Best Accuracy on Validation Set:", best_accuracy)


Best Parameters: {'hidden_dim': 80, 'radius': 0.05}
Best Accuracy on Validation Set: 0.8581818181818182


In [26]:
rbf_net = RBFNet(input_dim=X.shape[1], output_dim=len(np.unique(y_train)), hidden_dim=80, radius=0.05)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(rbf_net.parameters(), lr=0.01)
for epoch in range(300):
    optimizer.zero_grad()
    outputs = rbf_net(X_train_tensor)
    loss = criterion(outputs, y_train_tensor.squeeze()) 
    loss.backward()
    optimizer.step()
with torch.no_grad():
    rbf_net.eval()
    y_pred = torch.argmax(rbf_net(X_test_tensor), dim=1).numpy()
        
Y_test = np.array(y_pred)
for i in range(len(Y_test)):
    if Y_test[i] == 0:
        Y_test[i] = -1

### Here save our TEST Labels for phase 1

In [27]:
#  Here I save test matrix
print(grid_search_phase1.best_params_)
model = grid_search_phase1.best_estimator_
model.fit(X,y)
out = model.predict(X_test)
for i in range(len(out)):
    if out[i] == 0:
        out[i] =-1
sio.savemat('TestLabels_phase1.mat',{'labelMLP':out,'labelRBF':Y_test})

{'activation': 'tanh', 'hidden_layer_sizes': (13,), 'max_iter': 1200}


  y = column_or_1d(y, warn=True)


## Phase 2 GA

In [11]:
import random
from deap import base, creator, tools, algorithms
import scipy.io as sio
import numpy as np

mat_data = sio.loadmat('label.mat')
y = mat_data['trainLabel']
labels = y.reshape(550,1)
mat_data = sio.loadmat('X_train_gen.mat')
data = mat_data['X_train_gen']

creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)

def generate_individual():
    return random.sample(range(0, 600), 60)


def evaluate(individual):
    penalty = len(np.unique(individual)) / 20
    X = data[:, np.unique(individual)]
    x1 = labels == 1
    x2 = labels == -1
    # print(x1.shape)
    X1 = X[x1.squeeze(), :]
    X2 = X[x2.squeeze(), :]

    S1 = np.sum((X1 - np.mean(X1, axis=1)[:, np.newaxis])**2)
    S2 = np.sum((X2 - np.mean(X2, axis=1)[:, np.newaxis])**2)
    fit = S1 + S2 - penalty
    return fit,
    


def modify_individual(individual):
    unique_numbers = list(set(individual))
    while len(unique_numbers) < 60:
        missing_numbers = list(set(range(0, 600)) - set(unique_numbers))
        random.shuffle(missing_numbers)
        unique_numbers.append(missing_numbers.pop())
    return unique_numbers

# Create the toolbox
toolbox = base.Toolbox()
toolbox.register("individual", tools.initIterate, creator.Individual, generate_individual)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("evaluate", evaluate)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutShuffleIndexes, indpb=0.05)
toolbox.register("select", tools.selTournament, tournsize=3)


population_size = 170
generations = 5000
crossover_probability = 0.7
mutation_probability = 0.3
elitism_ratio = 0.05 
population = toolbox.population(n=population_size)

fitness_values = list(map(toolbox.evaluate, population))
for ind, fit in zip(population, fitness_values):
    ind.fitness.values = fit


for gen in range(generations):

    for ind in population:
        ind[:] = modify_individual(ind)

    fitness_values = list(map(toolbox.evaluate, population))
    for ind, fit in zip(population, fitness_values):
        ind.fitness.values = fit

    elites = tools.selBest(population, k=int(elitism_ratio * population_size))
    offspring = algorithms.varAnd(population, toolbox, cxpb=crossover_probability, mutpb=mutation_probability)
    fitness_values = list(map(toolbox.evaluate, offspring))
    for ind, fit in zip(offspring, fitness_values):
        ind.fitness.values = fit

    population = elites + toolbox.select(offspring, k=population_size - len(elites))

# Print the best individual and its fitness value
best_individual = tools.selBest(population, k=1)[0]
print("Best Individual:", best_individual)
print("Best Fitness:", best_individual.fitness.values[0])


Best Individual: [384, 3, 4, 389, 6, 390, 388, 7, 398, 399, 143, 15, 530, 19, 149, 534, 535, 543, 168, 297, 298, 426, 300, 303, 304, 55, 441, 314, 63, 320, 321, 324, 453, 454, 327, 205, 208, 210, 214, 216, 217, 219, 93, 94, 96, 98, 495, 498, 115, 116, 501, 500, 375, 120, 121, 250, 379, 252, 253, 254]
Best Fitness: 82486.85053052753


In [12]:
np.save('best_individual.npy',best_individual)

## Classification PHASE 2

### MLP

In [13]:
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
import scipy.io as sio
import numpy as np

mat_data = sio.loadmat('X_train_gen.mat')
X = mat_data['X_train_gen']
X = X[:,best_individual]
mat_data = sio.loadmat('label.mat')
y = mat_data['trainLabel']
y = y.reshape(550,1)
param_grid = {
    'activation': ['identity', 'logistic', 'tanh', 'relu'],
    'hidden_layer_sizes': [(i,) for i in range(10, 31)],
    'max_iter': [1000],  # Number of epochs
}

mlp = MLPClassifier()

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

grid_search = GridSearchCV(mlp, param_grid, scoring='accuracy', cv=cv, n_jobs=-1)
grid_search.fit(X, y)

print("Best Hyperparameters: ", grid_search.best_params_)
print("Best Accuracy: {:.2f}%".format(grid_search.best_score_ * 100))


  y = column_or_1d(y, warn=True)


Best Hyperparameters:  {'activation': 'tanh', 'hidden_layer_sizes': (25,), 'max_iter': 1000}
Best Accuracy: 87.09%


### RBF

In [14]:
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import torch
import scipy.io as sio

import torch.nn as nn
import torch
import torch.optim as optim
from sklearn.metrics import accuracy_score
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
from sklearn.model_selection import ParameterGrid

mat_data = sio.loadmat('X_train_gen.mat')
X = mat_data['X_train_gen']
X = X[:,best_individual]
mat_data = sio.loadmat('label.mat')
y = mat_data['trainLabel']
y = y.reshape(550,1)
for i in range(len(y)):
    if y[i]==-1:
        y[i]=0
X_train_tensor = torch.Tensor(X)
X_train = X
y_train_tensor = torch.LongTensor(y)
y_train = y
# Create DataLoader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

class RBFNet(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, radius):
        super(RBFNet, self).__init__()
        self.centers = nn.Parameter(torch.randn(hidden_dim, input_dim))
        self.beta = nn.Parameter(torch.ones(hidden_dim))
        self.linear = nn.Linear(hidden_dim, output_dim)
        self.radius = radius

    def radial_basis(self, x):
        return torch.exp(-self.radius * ((x.unsqueeze(1) - self.centers) ** 2).sum(dim=2))

    def forward(self, x):
        rbf = self.radial_basis(x)
        out = self.linear(rbf)
        return out

best_accuracy = 0.0
best_params = {}

param_grid = {
    'radius': [0.005,0.0125,0.025,0.05,0.1],
    'hidden_dim': [15,30, 40,60,70,75,80]
}
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
for params in ParameterGrid(param_grid):
    rbf_net = RBFNet(input_dim=X_train.shape[1], output_dim=len(np.unique(y_train)), **params)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(rbf_net.parameters(), lr=0.01)
    acc = 0
    for train_idx, val_idx in cv.split(X, y):
        for epoch in range(300):
            optimizer.zero_grad()
            outputs = rbf_net(X_train_tensor[train_idx])
            loss = criterion(outputs, y_train_tensor[train_idx].squeeze()) 
            loss.backward()
            optimizer.step()

        with torch.no_grad():
            rbf_net.eval()
            y_pred = torch.argmax(rbf_net(X_train_tensor[val_idx]), dim=1).numpy()
            accuracy = accuracy_score(y_train[val_idx], y_pred)
        acc+=accuracy
    accuracy = acc/5
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_params = params

print("Best Parameters:", best_params)
print("Best Accuracy on Validation Set:", best_accuracy)


Best Parameters: {'hidden_dim': 80, 'radius': 0.025}
Best Accuracy on Validation Set: 0.889090909090909


### Save Test Labels

In [25]:
mat_data = sio.loadmat('X_train_gen.mat')
X = mat_data['X_train_gen']
X = X[:,best_individual]
mat_data = sio.loadmat('label.mat')
y = mat_data['trainLabel']
y = y.reshape(550,1)
for i in range(len(y)):
    if y[i]==-1:
        y[i]=0
X_train_tensor = torch.Tensor(X)
y_train_tensor = torch.LongTensor(y)
y_train = y

mat_data = sio.loadmat('X_test_gen.mat')
X_test = mat_data['X_test_gen']
X_test = X_test[:,best_individual]
X_test_tensor = torch.Tensor(X_test)

rbf_net = RBFNet(input_dim=X.shape[1], output_dim=len(np.unique(y_train)), hidden_dim=80, radius=0.025)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(rbf_net.parameters(), lr=0.01)
for epoch in range(300):
    optimizer.zero_grad()
    outputs = rbf_net(X_train_tensor)
    loss = criterion(outputs, y_train_tensor.squeeze()) 
    loss.backward()
    optimizer.step()
with torch.no_grad():
    rbf_net.eval()
    y_pred = torch.argmax(rbf_net(X_test_tensor), dim=1).numpy()
        
Y_test = np.array(y_pred)
for i in range(len(Y_test)):
    if Y_test[i] == 0:
        Y_test[i] = -1

In [26]:
print(Y_test)

[-1 -1  1  1  1 -1  1 -1  1  1 -1 -1  1  1 -1  1  1  1  1 -1 -1 -1  1 -1
 -1 -1 -1  1 -1  1 -1  1 -1 -1 -1  1 -1  1  1  1  1  1 -1  1 -1 -1 -1 -1
  1  1  1  1  1 -1 -1  1  1  1  1  1 -1  1 -1  1  1 -1 -1  1 -1 -1  1 -1
  1  1 -1 -1  1 -1  1  1 -1 -1 -1 -1  1  1  1 -1 -1 -1  1  1 -1  1 -1 -1
  1 -1  1  1 -1 -1 -1 -1 -1 -1  1  1  1 -1  1  1 -1  1 -1 -1  1 -1 -1 -1
  1 -1  1  1 -1 -1 -1  1 -1 -1  1 -1  1  1  1  1 -1  1  1  1  1  1 -1 -1
  1 -1  1 -1 -1  1  1  1 -1 -1  1  1  1 -1  1]


In [24]:
print(out)

[-1 -1  1  1  1 -1  1 -1  1  1 -1 -1  1  1 -1  1  1  1  1  1 -1 -1  1 -1
 -1 -1  1  1 -1  1 -1  1 -1 -1 -1 -1 -1  1  1  1  1  1 -1  1 -1 -1 -1  1
  1  1  1  1  1 -1 -1  1  1  1  1  1 -1  1 -1  1  1  1 -1  1 -1 -1  1 -1
  1  1 -1  1  1 -1  1  1 -1 -1 -1  1  1  1 -1 -1 -1 -1  1  1 -1 -1 -1 -1
  1 -1  1 -1 -1 -1 -1 -1 -1 -1  1 -1  1 -1  1  1 -1 -1 -1 -1  1 -1 -1 -1
  1 -1  1  1  1 -1 -1  1 -1 -1  1  1 -1  1  1  1 -1  1  1  1  1  1 -1 -1
  1 -1  1  1 -1  1  1  1 -1 -1  1  1  1 -1  1]


In [30]:
#  Here I save test matrix
print(grid_search.best_params_)
model = grid_search.best_estimator_
model.fit(X,y)
out = model.predict(X_test)
for i in range(len(out)):
    if out[i] == 0:
        out[i] =-1
sio.savemat('TestLabels_phase2.mat',{'labelMLP':out,'labelRBF':Y_test})

{'activation': 'tanh', 'hidden_layer_sizes': (25,), 'max_iter': 1000}


  y = column_or_1d(y, warn=True)


In [32]:

print(sum(out == Y_test))

146
