# 1. Loading and organizing data

In [1]:
from load_dataset import datasets
from som import SOM

for dataset in datasets:
    print("Dataset name: {}\nNumber of features: {}\nNumber of samples: {}\n".format(
        dataset, datasets[dataset]["features"].shape[1], datasets[dataset]["features"].shape[0]
    ))

Dataset:  Features.shape:   # of classes:
vc2c      (310, 6)          2
vc3c      (310, 6)          3
wf24f     (5456, 24)        4
wf4f      (5456, 4)         4
wf2f      (5456, 2)         4
pk        (195, 22)         2


Dataset name: vc2c
Number of features: 6
Number of samples: 310

Dataset name: vc3c
Number of features: 6
Number of samples: 310

Dataset name: wf24f
Number of features: 24
Number of samples: 5456

Dataset name: wf4f
Number of features: 4
Number of samples: 5456

Dataset name: wf2f
Number of features: 2
Number of samples: 5456

Dataset name: pk
Number of features: 22
Number of samples: 195



# 2. SOM studies

In [2]:

import plotly.offline as plt
plt.init_notebook_mode(connected=True) # enabling plotly inside jupyter notebook



# Testing consistency

In [3]:
import cupy as cp
x = cp.arange(6).reshape(2, 3).astype('f')
print(x)
print(x.sum(axis=1))

ImportError: CuPy is not correctly installed.

If you are using wheel distribution (cupy-cudaXX), make sure that the version of CuPy you installed matches with the version of CUDA on your host.
Also, confirm that only one CuPy package is installed:
  $ pip freeze

If you are building CuPy from source, please check your environment, uninstall CuPy and reinstall it with:
  $ pip install cupy --no-cache-dir -vvvv

Check the Installation Guide for details:
  https://docs.cupy.dev/en/latest/install.html

original error: DLL load failed while importing driver: Não foi possível encontrar o módulo especificado.

### SSD curves for parkinson dataset:

In [14]:
from utils import scale_feat
%%time
from math import ceil
# choosing and preparing dataset

dataset_name='pk'

data = datasets[dataset_name]
N = len(data['features'].index) # number of datapoints
l = ceil((5*N**.5)**.5) # tamanho do lado da grid quadrada de neurônios
X = data['features'].values.copy()
X1, X2 = scale_feat(X,X,scaleType='min-max')
X=X1

params = {
     "X":         X
    ,"alpha0":    0.1
    ,"sigma0":    3 
    ,"nEpochs":   50
    ,"verboses":  0
    #,"saveNeuronsHist": True
}

som_new = SOM(l, l)

som_new.fit(**params)
# som_old.train(**params, batchSize=len(X))

print("SOM_new:"); som_new.plotSSD()
# print("SOM_old:"); som_old.plotSSD()
# print("SOM_old last SSD: {}\nSOM_new last SSD: {}".format(som_old.ssdHist[-1], som_new.ssdHist[-1]))

TypeError: _amin() got an unexpected keyword argument 'dtype'

### SSD curves and neurons evolution for Wall-Following data set:

In [10]:
%%time
from math import ceil
import datetime
# choosing and preparing dataset

dataset_name='wf2f'

data = datasets[dataset_name]
N = len(data['features'].index) # number of datapoints
l = ceil((5*N**.5)**.5) # tamanho do lado da grid quadrada de neurônios
X = data['features'].values.copy()
X1, X2 = scale_feat(X,X,scaleType='min-max')
X=X1

params = {
     "X":         X
    ,"alpha0":    0.1
    ,"sigma0":    3 
    ,"nEpochs":   100
    ,"verboses":  1
    #,"saveNeuronsHist": True
}

# som_old = SOM_2D(l, l, X.shape[1]); som_old.init(X)
som_new = SOM(l, l)

print("SOM_new training started at {}".format(datetime.datetime.now()))
som_new.fit(**params,saveNeuronsHist=True)
# nEpochs = 10
# Wall time: 1min 17s

SOM_old training started at 2019-07-04 18:21:47.579465
End of epoch 1
End of epoch 2
End of epoch 3
End of epoch 4
End of epoch 5
End of epoch 6
End of epoch 7
End of epoch 8
End of epoch 9
End of epoch 10
End of epoch 11
End of epoch 12
End of epoch 13
End of epoch 14
End of epoch 15
End of epoch 16
End of epoch 17
End of epoch 18
End of epoch 19
End of epoch 20
End of epoch 21
End of epoch 22
End of epoch 23
End of epoch 24
End of epoch 25
End of epoch 26
End of epoch 27
End of epoch 28
End of epoch 29
End of epoch 30
End of epoch 31
End of epoch 32
End of epoch 33
End of epoch 34
End of epoch 35
End of epoch 36
End of epoch 37
End of epoch 38
End of epoch 39
End of epoch 40
End of epoch 41
End of epoch 42
End of epoch 43
End of epoch 44
End of epoch 45
End of epoch 46
End of epoch 47
End of epoch 48
End of epoch 49
End of epoch 50
End of epoch 51
End of epoch 52
End of epoch 53
End of epoch 54
End of epoch 55
End of epoch 56
End of epoch 57
End of epoch 58
End of epoch 59
End of epo

In [12]:
print("SOM_new:")
som_new.plotSSD()

SOM_old:


SOM_new:


## Performance comparison with old SOM:

In [18]:
from som import SOM
%%time
import time
import datetime
from math import ceil

# choosing and preparing dataset
dataset_name='pk'
data = datasets[dataset_name]
N = len(data['features'].index) # number of datapoints
l = ceil((5*N**.5)**.5) # tamanho do lado da grid quadrada de neurônios
X = data['features'].values.copy()

X1, X2 = scale_feat(X,X,scaleType='min-max')
X = X1

n = 100

params = {
     "X":         X
    ,"alpha0":    0.1
    ,"sigma0":    3 
    ,"nEpochs":   5
    ,"verboses":  0
    #,"saveNeuronsHist": False
}

# som_old = SOM_2D(l, l, X.shape[1]); som_old.init(X)
som_new = SOM(l, l)

print("n={}".format(n))

t0 = time.time()
for i in range(n): som_new.fit(**params)
t1 = time.time()
total_new = (t1-t0)/n

print(f"SOM finished at {datetime.datetime.now()}")
print(f"SOM time: {total_new}\n")

n=100
som_old finished at 2019-07-04 20:28:42.488451
som_new finished at 2019-07-04 20:28:49.218428

Old SOM time: 1.6339880013465882
New SOM time: 0.0672989273071289

New SOM 24.27955491607816x faster.

CPU times: user 2min 50s, sys: 33.2 ms, total: 2min 50s
Wall time: 2min 50s


In [19]:
from utils import is_over
is_over()