# Reduction dimensions for iris dataset using MDS, Isomp, t-SNE, and PCA

In [1]:
#---------------
# import modules
#---------------

import numpy as np
import joblib as jb
import matplotlib.pyplot as plt

#datasets
from sklearn.datasets import load_iris

from sklearn.metrics import mean_squared_error, accuracy_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from collections import Counter

from sklearn.manifold import MDS
from sklearn.manifold import TSNE
from sklearn.manifold import Isomap
from sklearn.preprocessing import StandardScaler # for PCA
from sklearn.decomposition import PCA

from sklearn.neural_network import MLPClassifier
from neuralNetwork.perceptron import perceptron 

In [2]:
#----------------
# Iris Dataset
#----------------

data = load_iris()
X_org = data.data
y_org = data.target
print('Shape of X: ', X_org.shape)
print('Shape of y: ', y_org.shape)

Shape of X:  (150, 4)
Shape of y:  (150,)


In [3]:
# normalize data (0,1)
t = MinMaxScaler()
t.fit(X_org)
X_org = t.transform(X_org)

In [4]:
opt = {'c1': 0.5, 'c2': 0.3, 'w':0.9}
n_particulas = 100
max_iter = 100
n_training = 10

## MDS reduction

### 3D

In [None]:
embedding = MDS(n_components=3)
X_transformed = embedding.fit_transform(X_org)
print ('Shape of X_train transformed: ', X_transformed.shape)

X_train_bal, X_test_bal, y_train_bal, y_test_bal = train_test_split(X_transformed, y_org, test_size=0.2, random_state=100)
print(f"Training target statistics: {Counter(y_train_bal)}")
print(f"Testing target statistics: {Counter(y_test_bal)}")

X_sample = len(X_train_bal)
X_input = len(X_train_bal[1])
X_class = len(np.unique(y_train_bal))

X_train = X_train_bal
y_train = y_train_bal
X_test = X_test_bal
y_test = y_test_bal

gBest_value = []
gBest = []
cost_test = []
metric_train = []

for i in range(n_training):
    # load perceptron
    nn = perceptron(X_sample, X_input, X_class)
    gBest.append( nn.train(X_train, y_train, opt, n_particulas, max_iter) )
    gBest_value.append(nn.best_cost)
    metric_train.append(nn.h_cost)
    y_test_pred = np.argmax(nn.forward(X_test, gBest[i]), axis=1)
    cost_test.append(mean_squared_error(y_test, y_test_pred))
    print ('Test prediction cost with training: ', cost_test[i]) 
    min_cost = min(cost_test)
    print ("Min test prediction cost: ", min_cost)
    if  cost_test[i] <= min_cost: 
        np.save("iris_gBest_113_100_100_3_mds.npy", gBest[i])
        np.save("iris_gBestIter_113_100_100_3_mds.npy", nn.h_cost)
        np.save("iris_avgBest_113_100_100_3_mds.npy", nn.avg_best_value)
        #np.save("pBest_113_100_100_2_5_1.npy", nn.h_pos)
        
print("=====================================================================")
print("=====================================================================")
print("Saving train metric .... ")
np.save("iris_metric_113_100_100_3_mds.npy", metric_train)
print("The best training is in iteration ", cost_test.index(min(cost_test)))
print("The golbal best value is: ", gBest_value[cost_test.index(min(cost_test))])
print("Test prediction cost: ", min(cost_test))
print("=====================================================================")
print("=====================================================================")

In [6]:
model_load = np.load('iris_gBest_113_100_100_3_mds.npy')

print("=====================================================================")
print("=====================================================================")
y_test_pred_load = np.argmax(nn.forward(X_test, model_load), axis=1)
cost_test_load = mean_squared_error(y_test, y_test_pred_load)
acc_test_load = accuracy_score(y_test, y_test_pred_load)


y_train_pred_load = np.argmax(nn.forward(X_train, model_load), axis=1)
cost_train_load = mean_squared_error(y_train, y_train_pred_load)
acc_train_load = accuracy_score(y_train, y_train_pred_load)

print('Training: MSE = ', cost_train_load, ' ACC score = ', acc_train_load)
print('Testing: MSE = ', cost_test_load, ' ACC score = ', acc_test_load)
print("=====================================================================")
print("=====================================================================")

Training: MSE =  0.016666666666666666  ACC score =  0.9833333333333333
Testing: MSE =  0.03333333333333333  ACC score =  0.9666666666666667


### 2D

In [None]:
embedding = MDS(n_components=2)
X_transformed = embedding.fit_transform(X_org)
print ('Shape of X_train transformed: ', X_transformed.shape)

X_train_bal, X_test_bal, y_train_bal, y_test_bal = train_test_split(X_transformed, y_org, test_size=0.2, random_state=100)
print(f"Training target statistics: {Counter(y_train_bal)}")
print(f"Testing target statistics: {Counter(y_test_bal)}")

X_sample = len(X_train_bal)
X_input = len(X_train_bal[1])
X_class = len(np.unique(y_train_bal))

X_train = X_train_bal
y_train = y_train_bal
X_test = X_test_bal
y_test = y_test_bal

gBest_value = []
gBest = []
cost_test = []
metric_train = []

for i in range(n_training):
    # load perceptron
    nn = perceptron(X_sample, X_input, X_class)
    gBest.append( nn.train(X_train, y_train, opt, n_particulas, max_iter) )
    gBest_value.append(nn.best_cost)
    metric_train.append(nn.h_cost)
    y_test_pred = np.argmax(nn.forward(X_test, gBest[i]), axis=1)
    cost_test.append(mean_squared_error(y_test, y_test_pred))
    print ('Test prediction cost with training: ', cost_test[i]) 
    min_cost = min(cost_test)
    print ("Min test prediction cost: ", min_cost)
    if  cost_test[i] <= min_cost: 
        np.save("iris_gBest_113_100_100_2_mds.npy", gBest[i])
        np.save("iris_gBestIter_113_100_100_2_mds.npy", nn.h_cost)
        np.save("iris_avgBest_113_100_100_2_mds.npy", nn.avg_best_value)
        #np.save("pBest_113_100_100_2_5_1.npy", nn.h_pos)
        
print("=====================================================================")
print("=====================================================================")
print("Saving train metric .... ")
np.save("iris_metric_113_100_100_2_mds.npy", metric_train)
print("The best training is in iteration ", cost_test.index(min(cost_test)))
print("The golbal best value is: ", gBest_value[cost_test.index(min(cost_test))])
print("Test prediction cost: ", min(cost_test))
print("=====================================================================")
print("=====================================================================")

In [8]:
model_load = np.load('iris_gBest_113_100_100_2_mds.npy')

print("=====================================================================")
print("=====================================================================")
y_test_pred_load = np.argmax(nn.forward(X_test, model_load), axis=1)
cost_test_load = mean_squared_error(y_test, y_test_pred_load)
acc_test_load = accuracy_score(y_test, y_test_pred_load)


y_train_pred_load = np.argmax(nn.forward(X_train, model_load), axis=1)
cost_train_load = mean_squared_error(y_train, y_train_pred_load)
acc_train_load = accuracy_score(y_train, y_train_pred_load)

print('Training: MSE = ', cost_train_load, ' ACC score = ', acc_train_load)
print('Testing: MSE = ', cost_test_load, ' ACC score = ', acc_test_load)
print("=====================================================================")
print("=====================================================================")

Training: MSE =  0.0  ACC score =  1.0
Testing: MSE =  0.03333333333333333  ACC score =  0.9666666666666667


### 1D

In [None]:
embedding = MDS(n_components=1)
X_transformed = embedding.fit_transform(X_org)
print ('Shape of X_train transformed: ', X_transformed.shape)

X_train_bal, X_test_bal, y_train_bal, y_test_bal = train_test_split(X_transformed, y_org, test_size=0.2, random_state=100)
print(f"Training target statistics: {Counter(y_train_bal)}")
print(f"Testing target statistics: {Counter(y_test_bal)}")

X_sample = len(X_train_bal)
X_input = len(X_train_bal[1])
X_class = len(np.unique(y_train_bal))

X_train = X_train_bal
y_train = y_train_bal
X_test = X_test_bal
y_test = y_test_bal

gBest_value = []
gBest = []
cost_test = []
metric_train = []

for i in range(n_training):
    # load perceptron
    nn = perceptron(X_sample, X_input, X_class)
    gBest.append( nn.train(X_train, y_train, opt, n_particulas, max_iter) )
    gBest_value.append(nn.best_cost)
    metric_train.append(nn.h_cost)
    y_test_pred = np.argmax(nn.forward(X_test, gBest[i]), axis=1)
    cost_test.append(mean_squared_error(y_test, y_test_pred))
    print ('Test prediction cost with training: ', cost_test[i]) 
    min_cost = min(cost_test)
    print ("Min test prediction cost: ", min_cost)
    if  cost_test[i] <= min_cost: 
        np.save("iris_gBest_113_100_100_1_mds.npy", gBest[i])
        np.save("iris_gBestIter_113_100_100_1_mds.npy", nn.h_cost)
        np.save("iris_avgBest_113_100_100_1_mds.npy", nn.avg_best_value)
        #np.save("pBest_113_100_100_2_5_1.npy", nn.h_pos)
        
print("=====================================================================")
print("=====================================================================")
print("Saving train metric .... ")
np.save("iris_metric_113_100_100_1_mds.npy", metric_train)
print("The best training is in iteration ", cost_test.index(min(cost_test)))
print("The golbal best value is: ", gBest_value[cost_test.index(min(cost_test))])
print("Test prediction cost: ", min(cost_test))
print("=====================================================================")
print("=====================================================================")

In [10]:
model_load = np.load('iris_gBest_113_100_100_1_mds.npy')

print("=====================================================================")
print("=====================================================================")
y_test_pred_load = np.argmax(nn.forward(X_test, model_load), axis=1)
cost_test_load = mean_squared_error(y_test, y_test_pred_load)
acc_test_load = accuracy_score(y_test, y_test_pred_load)


y_train_pred_load = np.argmax(nn.forward(X_train, model_load), axis=1)
cost_train_load = mean_squared_error(y_train, y_train_pred_load)
acc_train_load = accuracy_score(y_train, y_train_pred_load)

print('Training: MSE = ', cost_train_load, ' ACC score = ', acc_train_load)
print('Testing: MSE = ', cost_test_load, ' ACC score = ', acc_test_load)
print("=====================================================================")
print("=====================================================================")

Training: MSE =  0.058333333333333334  ACC score =  0.9416666666666667
Testing: MSE =  0.03333333333333333  ACC score =  0.9666666666666667
