In [3]:
from self_organizing_map import *
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import pylab
import seaborn as sns
import matplotlib.ticker as mticker
import parse
s_fmt = mticker.ScalarFormatter(useMathText=True)

sns.set_theme(context='paper', style='whitegrid', palette='Dark2', font_scale=.75)  # for plots
pylab.rcParams['figure.dpi'] = 150

In [2]:
# animals

# load data
animals_fts = np.fromfile('data/animals.dat', sep=',')
animals_fts = animals_fts.reshape((32, 84))
labels = []
with open('data/animalnames.txt', 'r') as fp:
    for line in fp.readlines():
        labels.append(line.strip()[1:-1])
        # [1: -1] is to remove extra quotation marks

som = SelfOrganizingMap(
    topology=LinearSOMTopology(nnodes=100,
                               starting_neighbor_d=50,
                               neighborhood_decay_fn=lambda d0, d, epoch: d0 - int(2.5 * epoch)))
som.train(animals_fts, n_epochs=25, eta=0.25)
results = som.map(animals_fts)

animal_df = pd.DataFrame(data={'animal': labels, 'node': results}) \
    .set_index('animal')
animal_df = animal_df.sort_values('node')
animal_df

Unnamed: 0_level_0,node
animal,Unnamed: 1_level_1
beetle,0
grasshopper,1
dragonfly,3
butterfly,5
moskito,9
housefly,12
spider,16
ostrich,23
penguin,25
duck,27


In [11]:
# cyclic tour

# load data
cities = []
with open('data/cities.dat', 'r') as fp:
    for line in fp.readlines():
        if line.startswith('%'):
            # skip comments
            continue
            
        tokens = parse.search('{x_coord:f}, {y_coord:f};', line)
        if tokens is not None:
            cities.append([tokens['x_coord'], tokens['y_coord']])

cities = np.array(cities)
print('City coords: ')
print(cities)
            
som = SelfOrganizingMap(
    topology=CircularSOMTopology(nnodes=10,
                                   starting_neighbor_d=2,
                                   neighborhood_decay_fn=lambda d0, d, epoch: d0 - (epoch // 10)))
som.train(cities, n_epochs=25, eta=0.25)
results = som.map(cities)

cities_df = pd.DataFrame(data={'city': [f'{c}' for c in cities], 'node': results}) \
    .set_index('city')
cities_df = cities_df.sort_values('node')
cities_df
            

City coords: 
[[0.4    0.4439]
 [0.2439 0.1463]
 [0.1707 0.2293]
 [0.2293 0.761 ]
 [0.5171 0.9414]
 [0.8732 0.6536]
 [0.6878 0.5219]
 [0.8488 0.3609]
 [0.6683 0.2536]
 [0.6195 0.2634]]


Unnamed: 0_level_0,node
city,Unnamed: 1_level_1
[0.2439 0.1463],1
[0.1707 0.2293],1
[0.4 0.4439],3
[0.2293 0.761 ],4
[0.5171 0.9414],5
[0.8732 0.6536],7
[0.6878 0.5219],7
[0.8488 0.3609],8
[0.6683 0.2536],9
[0.6195 0.2634],9
