In [14]:
import pandas as pd
import pathlib
import numpy as np

prefix_train = pathlib.Path("UCI HAR Dataset split/train")
prefix_test = pathlib.Path("UCI HAR Dataset split/test")

In [15]:
def load_df(glob : str, path : pathlib.Path, subject = -1, avoid = -1) -> pd.DataFrame: # idx must be a int TODO
    _df = pd.DataFrame()
    for idx, fn in enumerate(path.glob(f'**/{glob}.csv')):
            if subject in (-1, idx) and idx != avoid: 
                _df = pd.concat([_df, pd.read_csv(fn, sep=' ',header = None)], ignore_index = True)
    return _df

In [16]:
train_x_df = load_df('Xtrain', prefix_train, subject = -1, avoid = 23).to_numpy()
train_y_df = load_df('ytrain', prefix_train, subject = -1, avoid = 23).to_numpy()

test_x_df = load_df('Xtest', prefix_test, subject = 23).to_numpy()
test_y_df = load_df('ytest', prefix_test, subject = 23).to_numpy()

In [17]:
# Train the SOM
from minisom import MiniSom

SOM_DIM = 20
SOM_NUM_ITER = 100

som = MiniSom(
        SOM_DIM,
        SOM_DIM,
        train_x_df.shape[1], #Number of Features
        sigma = 5,
        learning_rate = 0.1,
        neighborhood_function = "gaussian",
        activation_distance = "manhattan",
    )

som.random_weights_init(train_x_df)
som.train_random(train_x_df, SOM_NUM_ITER, verbose=False)  # random training

# weights = som.get_weights()
# weights = weights.reshape((-1, weights.shape[2]))

In [18]:
# Test the SOM
from sklearn.metrics import classification_report
from pprint import pprint

# Find the association between neurons and class
winmap = som.labels_map(train_x_df , train_y_df.flatten()) 
default_class = np.sum( list (winmap.values())).most_common()[0][0] # The most common class in the dataset (when we are undecided)

# Test
prediction = []
for test_sample in test_x_df:
    win_position = som.winner(test_sample)
    if win_position in winmap:
        prediction.append( winmap [ win_position ].most_common()[0][0])
    else:
        prediction.append(default_class) #FIXME take the neighboring

# Eval Accuracy
pprint ( classification_report(
        test_y_df.flatten(),
        prediction,
        zero_division=0.0,
        output_dict=True,
    )
)

{'1': {'f1-score': 0.9142857142857143,
       'precision': 0.9411764705882353,
       'recall': 0.8888888888888888,
       'support': 18},
 '2': {'f1-score': 0.8749999999999999,
       'precision': 0.9333333333333333,
       'recall': 0.8235294117647058,
       'support': 17},
 '3': {'f1-score': 0.8484848484848485,
       'precision': 0.7777777777777778,
       'recall': 0.9333333333333333,
       'support': 15},
 '4': {'f1-score': 0.6666666666666666,
       'precision': 0.7368421052631579,
       'recall': 0.6086956521739131,
       'support': 23},
 '5': {'f1-score': 0.8085106382978724,
       'precision': 0.76,
       'recall': 0.8636363636363636,
       'support': 22},
 '6': {'f1-score': 0.8936170212765957,
       'precision': 0.875,
       'recall': 0.9130434782608695,
       'support': 23},
 'accuracy': 0.8305084745762712,
 'macro avg': {'f1-score': 0.8344274815019496,
               'precision': 0.837354947827084,
               'recall': 0.838521188009679,
               'suppor