# Libraries

In [1]:
import numpy as np
import pandas as pd
import torch

from sklearn.preprocessing import StandardScaler

from bib_sensing.TorchSOM.TorchSOM.core import TorchSOM # To modify
from bib_sensing.TorchSOM.TorchSOM.plotting import SOMVisualizer, VisualizationConfig # To modify

In [2]:
random_seed = 42
torch.manual_seed(random_seed)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Preprocessing 

In [3]:
energy_df = pd.read_csv(
    filepath_or_buffer="../data/energy_efficiency.csv",
)

In [4]:
energy_df_scaled = energy_df
# scaler = StandardScaler()
# energy_df_scaled = pd.DataFrame(scaler.fit_transform(energy_df), columns=energy_df.columns)

In [5]:
energy_df_scaled.head()

Unnamed: 0,Relative Compactness,Surface Area,Wall Area,Roof Area,Overall Height,Orientation,Glazing Area,Glazing Area Distribution,Heating Load,Cooling Load
0,0.98,514.5,294.0,110.25,7.0,2,0.0,0,15.55,21.33
1,0.98,514.5,294.0,110.25,7.0,3,0.0,0,15.55,21.33
2,0.98,514.5,294.0,110.25,7.0,4,0.0,0,15.55,21.33
3,0.98,514.5,294.0,110.25,7.0,5,0.0,0,15.55,21.33
4,0.9,563.5,318.5,122.5,7.0,2,0.0,0,20.84,28.28


In [6]:
energy_df_scaled.describe()

Unnamed: 0,Relative Compactness,Surface Area,Wall Area,Roof Area,Overall Height,Orientation,Glazing Area,Glazing Area Distribution,Heating Load,Cooling Load
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,0.764167,671.708333,318.5,176.604167,5.25,3.5,0.234375,2.8125,22.307195,24.58776
std,0.105777,88.086116,43.626481,45.16595,1.75114,1.118763,0.133221,1.55096,10.090204,9.513306
min,0.62,514.5,245.0,110.25,3.5,2.0,0.0,0.0,6.01,10.9
25%,0.6825,606.375,294.0,140.875,3.5,2.75,0.1,1.75,12.9925,15.62
50%,0.75,673.75,318.5,183.75,5.25,3.5,0.25,3.0,18.95,22.08
75%,0.83,741.125,343.0,220.5,7.0,4.25,0.4,4.0,31.6675,33.1325
max,0.98,808.5,416.5,220.5,7.0,5.0,0.4,5.0,43.1,48.03


In [7]:
feature_names = energy_df_scaled.columns.to_list()[:-2]
feature_names

['Relative Compactness',
 'Surface Area',
 'Wall Area',
 'Roof Area',
 'Overall Height',
 'Orientation',
 'Glazing Area',
 'Glazing Area Distribution']

In [8]:
energy_df_scaled.shape

(768, 10)

In [9]:
"""
1. Create a tensor from the energy df and separate the features and the target
2. Randomly shuffle the data
3. Split the data into training and testing sets
"""
energy_torch = torch.tensor(energy_df_scaled.to_numpy(dtype=np.float32))
all_features = energy_torch[:, :-2]
all_targets_heating, all_targets_cooling = energy_torch[:, -2], energy_torch[:, -1]

shuffled_indices = torch.randperm(len(all_features))
all_features = all_features[shuffled_indices]
all_targets_heating, all_targets_cooling = all_targets_heating[shuffled_indices], all_targets_cooling[shuffled_indices]

train_ratio = 0.8
train_count = int(train_ratio * len(all_features))

train_features = all_features[:train_count]
train_targets_heating, train_targets_cooling = all_targets_heating[:train_count], all_targets_cooling[:train_count]

test_features = all_features[train_count:]
test_targets_heating, test_targets_cooling = all_targets_heating[train_count:], all_targets_cooling[train_count:]

print(train_features.shape, test_features.shape)
print(train_targets_heating.shape, train_targets_cooling.shape, test_targets_heating.shape, test_targets_cooling.shape)

torch.Size([614, 8]) torch.Size([154, 8])
torch.Size([614]) torch.Size([614]) torch.Size([154]) torch.Size([154])


# TorchSOM

In [10]:
som = TorchSOM(
    x=35,
    y=20,
    sigma=2.5,
    learning_rate=0.95,
    neighborhood_order=3,
    epochs=125,
    batch_size=16,
    topology="rectangular",
    distance_function="euclidean",
    neighborhood_function="gaussian",
    num_features=all_features.shape[1],
    lr_decay_function="asymptotic_decay",
    sigma_decay_function="asymptotic_decay",
    initialization_mode="pca",
    device=device,
    random_seed=random_seed,
) 

In [11]:
som.initialize_weights(
    data=train_features,
)

In [12]:
QE, TE = som.fit(
    data=train_features
)

Training SOM: 100%|██████████| 125/125 [00:11<00:00, 10.95epoch/s]


In [13]:
visualizer = SOMVisualizer(som=som, config=None)
save_path = "results/energy" # Set to None if you want a direct plot

In [14]:
visualizer.plot_training_errors(
    quantization_errors=QE, 
    topographic_errors=TE, 
    save_path=save_path
)

In [15]:
visualizer.plot_distance_map(save_path=save_path)

In [16]:
visualizer.plot_hit_map(
    data=train_features,
    save_path=save_path
)

In [17]:
visualizer.plot_component_planes(
    component_names=feature_names,
    save_path=save_path
)

### Heating Target

In [18]:
heating_path = save_path + "/heating"

In [19]:
visualizer.plot_metric_map(
    data=train_features,
    target=train_targets_heating, 
    reduction_parameter="mean",
    save_path=heating_path,
)

In [20]:
visualizer.plot_metric_map(
    data=train_features,
    target=train_targets_heating,
    reduction_parameter="std",
    save_path=heating_path,
)

In [21]:
visualizer.plot_rank_map(
    data=train_features,
    target=train_targets_heating,
    save_path=heating_path
)

In [22]:
visualizer.plot_score_map(
    data=train_features,
    target=train_targets_heating,
    save_path=heating_path,
)

### Cooling Target

In [23]:
cooling_path = save_path + "/cooling"

In [24]:
visualizer.plot_metric_map(
    data=train_features,
    target=train_targets_cooling,
    reduction_parameter="mean",
    save_path=cooling_path,
)

In [25]:
visualizer.plot_metric_map(
    data=train_features,
    target=train_targets_cooling,
    reduction_parameter="std",
    save_path=cooling_path,
)

In [26]:
visualizer.plot_rank_map(
    data=train_features,
    target=train_targets_cooling,
    save_path=cooling_path
)

In [27]:
visualizer.plot_score_map(
    data=train_features,
    target=train_targets_cooling,
    save_path=cooling_path,
)