# Libraries

In [1]:
import numpy as np
import pandas as pd
import torch

from sklearn.preprocessing import MinMaxScaler

from bib_sensing.TorchSOM.TorchSOM.core import TorchSOM # To modify
from bib_sensing.TorchSOM.TorchSOM.plotting import SOMVisualizer, VisualizationConfig # To modify

In [2]:
random_seed = 42
torch.manual_seed(random_seed)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Preprocessing 

In [3]:
iris_df = pd.read_csv(
    filepath_or_buffer="../data/iris.csv",
)

In [4]:
feature_columns = iris_df.columns[:-1]  
scaler = MinMaxScaler()
iris_df[feature_columns] = scaler.fit_transform(iris_df[feature_columns])

In [5]:
iris_df['Species'] = iris_df['Species'].map({
    'Iris-setosa': 1, 
    'Iris-versicolor': 2, 
    'Iris-virginica': 3}
)

In [6]:
iris_df.head()

Unnamed: 0,Sepal Length,Sepal Width,Petal Length,Petal Width,Species
0,0.222222,0.625,0.067797,0.041667,1
1,0.166667,0.416667,0.067797,0.041667,1
2,0.111111,0.5,0.050847,0.041667,1
3,0.083333,0.458333,0.084746,0.041667,1
4,0.194444,0.666667,0.067797,0.041667,1


In [7]:
iris_df.describe()

Unnamed: 0,Sepal Length,Sepal Width,Petal Length,Petal Width,Species
count,150.0,150.0,150.0,150.0,150.0
mean,0.428704,0.439167,0.467571,0.457778,2.0
std,0.230018,0.180664,0.299054,0.317984,0.819232
min,0.0,0.0,0.0,0.0,1.0
25%,0.222222,0.333333,0.101695,0.083333,1.0
50%,0.416667,0.416667,0.567797,0.5,2.0
75%,0.583333,0.541667,0.694915,0.708333,3.0
max,1.0,1.0,1.0,1.0,3.0


In [8]:
feature_names = feature_columns.to_list()
feature_names

['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width']

In [9]:
iris_df.shape

(150, 5)

In [10]:
"""
1. Create a tensor from the iris df and separate the features and the target
2. Randomly shuffle the data
3. Split the data into training and testing sets
"""
iris_torch = torch.tensor(iris_df.to_numpy(dtype=np.float32))
all_features, all_targets = iris_torch[:, :4], iris_torch[:, 4].long()


shuffled_indices = torch.randperm(len(all_features))
all_features, all_targets = all_features[shuffled_indices], all_targets[shuffled_indices]

train_ratio = 0.8
train_count = int(train_ratio * len(all_features))
train_features, train_targets = all_features[:train_count], all_targets[:train_count]
test_features, test_targets = all_features[train_count:], all_targets[train_count:]

print(train_features.shape, test_features.shape)
print(train_targets.shape, test_targets.shape)

torch.Size([120, 4]) torch.Size([30, 4])
torch.Size([120]) torch.Size([30])


# TorchSOM

In [11]:
som = TorchSOM(
    x=25,
    y=15,
    sigma=1.45,
    learning_rate=0.95,
    neighborhood_order=3,
    epochs=100,
    batch_size=16,
    topology="rectangular",
    distance_function="euclidean",
    neighborhood_function="gaussian",
    num_features=all_features.shape[1],
    lr_decay_function="asymptotic_decay",
    sigma_decay_function="asymptotic_decay",
    initialization_mode="pca",
    device=device,
    random_seed=random_seed,
) 

In [12]:
som.initialize_weights(
    data=train_features,
)

In [13]:
QE, TE = som.fit(
    data=train_features
)

Training SOM: 100%|██████████| 100/100 [00:01<00:00, 89.88epoch/s]


In [14]:
visualizer = SOMVisualizer(som=som, config=None)
save_path = "results/iris" # Set to None if you want a direct plot

In [15]:
visualizer.plot_training_errors(
    quantization_errors=QE, 
    topographic_errors=TE, 
    save_path=save_path
)

In [16]:
visualizer.plot_distance_map(save_path=save_path)

In [17]:
visualizer.plot_hit_map(
    data=train_features,
    save_path=save_path
)

In [18]:
visualizer.plot_classification_map(
    data=train_features,
    target=train_targets,
    save_path=save_path
)

In [19]:
visualizer.plot_component_planes(
    component_names=feature_names,
    save_path=save_path
)