In [None]:
import numpy as np

import torch
import torch.nn as nn
from torchvision import datasets
import torchvision.transforms as transforms
from torchvision.models import resnet34
from torch.utils.data import DataLoader

from sklearn.metrics import confusion_matrix, f1_score
from tqdm import tqdm

from numpy.ma.core import ceil
from scipy.spatial import distance #distance calculation
from sklearn.preprocessing import MinMaxScaler #normalisation
from sklearn.metrics import accuracy_score #scoring
import matplotlib.pyplot as plt
from matplotlib import animation, colors

In [None]:
transform = transforms.Compose([
    transforms.Resize((128, 128)), # Resize to 224x224 (height x width)
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                          std=[0.229, 0.224, 0.225])
])

In [None]:
# loading the train data
batch_size = 50
#drop_last=True
train_data = datasets.CIFAR10('data', train=True,
                              download=True, transform=transform)
train_dataloader = DataLoader(train_data, batch_size=batch_size,shuffle=True )

#loading the test data
test_data = datasets.CIFAR10('data', train=False,
                             download=True, transform=transform)
test_dataloader = DataLoader(test_data,batch_size=batch_size, shuffle=True)


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

## Feature Extractor

In [None]:
feature_extractor = resnet34(pretrained=True)
num_features = feature_extractor.fc.in_features

for param in feature_extractor.parameters():
    param.requires_grad = False

feature_extractor.fc = nn.Identity()
feature_extractor.to(device)

## Helper Functions

In [None]:

# Data Normalisation
def minmax_scaler(data):
  scaler = MinMaxScaler()
  scaled = scaler.fit_transform(data)
  return scaled

# Manhattan distance
def manhattan_distance(x, y):
  return distance.cityblock(x,y)

# Euclidean distance
def euclidean_distance(x, y):
    return torch.sqrt(torch.sum((x - y) ** 2))


# Best Matching Unit search
def bmu_search(data, som, num_rows, num_cols):
  winner = [0,0]
  # som = som.to(device)
  # data = data.to(device)
  shortest_distance = 10e7 
  for row in range(num_rows):
    for col in range(num_cols):
      if som[row][col] != None:
        
        distance = euclidean_distance(som[row][col], data)
        if distance < shortest_distance: 
          shortest_distance = distance
          winner = [row,col]
  return winner

# Learning rate and neighbourhood range calculation
def optimizer(step, max_steps, max_learning_rate, max_m_distance):
  coefficient = 1.0 - (np.float64(step)/max_steps)
  learning_rate = coefficient*max_learning_rate
  neighbourhood_range = ceil(coefficient * max_m_distance)
  # neighbourhood_range = max_m_distance
  return learning_rate, neighbourhood_range

#guassian
def distance_func(x):
  sig = 2 
  return np.exp(-np.power(x , 2.) / (2 * np.power(sig, 2.)))
  

### Feature Extracting Train Data

In [None]:
y_data_list = []
data_list = []
for x_train, y_train in train_dataloader:
  x_train, y_train = x_train.to(device), y_train.to(device)

  features = feature_extractor(x_train)
  features = minmax_scaler(features.cpu().numpy())
  features = torch.from_numpy(features)
  data_list.append(features)

  y_data_list.append(y_train)

print(len(y_data_list))
print(len(data_list))

### Feature Extracting Test Data

In [None]:
y_test_list = []
data_test_list = []
for x_test, y_test in test_dataloader:
  x_test, y_test = x_test.to(device), y_test.to(device)

  features = feature_extractor(x_test)
  features = minmax_scaler(features.cpu().numpy())
  features = torch.from_numpy(features)
  data_test_list.append(features)

  y_test_list.append(y_test)

print(len(y_test_list))
print(len(data_test_list))

## Hyperparameters

In [None]:
num_rows = 10
num_cols = 10
max_neighborhood_range = 4
max_learning_rate = 0.6
max_steps = 20
is_2d_10_neuron = False


## Initialising Self Organising Map

In [None]:
num_features = data_list[0].shape[1] # numnber of dimensions in the input data

if is_2d_10_neuron:
  np.random.seed(40)
  som = np.random.random_sample(size=(num_rows, num_cols, num_features)) # map construction
  som[3][0] = None
  som[3][2] = None
  som = torch.from_numpy(som)
else:
  np.random.seed(40)
  som = np.random.random_sample(size=(num_rows, num_cols, num_features)) # map construction
  som = torch.from_numpy(som)

In [None]:
epochs = 20

for epoch in range(epochs):
    for features in tqdm(data_list, desc=f"Epoch {epoch+1}", colour="blue"):
        # features.to(device)
        # som = som.to(device)

        # start training iterations
        for i in range(features.shape[0]):
          learning_rate, neighbourhood_range = optimizer(epoch, epochs, max_learning_rate, max_neighborhood_range)
          bmu = bmu_search(features[i], som, num_rows, num_cols)
          for row in range(num_rows):
            for col in range(num_cols):
              if som[row][col] != None:
                dist = manhattan_distance([row, col], bmu)
                if dist <= neighbourhood_range:
                  som[row][col] += learning_rate * distance_func(dist) * (features[i] - som[row][col]) #update neighbour's weight


## Collecting Labels

In [None]:
map = np.empty(shape=(num_rows, num_cols), dtype=object)
for row in range(num_rows):
  for col in range(num_cols):
    if som[row][col] != None:
      map[row][col] = [] # empty list to store the label

for i, features in enumerate(data_list):

  label_data = y_data_list[i].cpu().numpy()

  for t in range(features.shape[0]):
    bmu = bmu_search(features[t], som, num_rows, num_cols)
    map[bmu[0]][bmu[1]].append(label_data[t]) # label of winning neuron

## Construct Label Map

In [None]:
label_map = np.zeros(shape=(num_rows, num_cols),dtype=np.int64)
for row in range(num_rows):
  for col in range(num_cols):
    if som[row][col] != None:
      label_list = map[row][col]
      if len(label_list)==0:
        label = 11
      else:
        label = max(label_list, key=label_list.count)
      label_map[row][col] = label


## Feature Map

In [None]:
title = ('Feature Map')
# cmap = colors.ListedColormap(['tab:green', 'tab:red', 'tab:orange'])
plt.imshow(label_map, cmap='Blues')
plt.colorbar()
plt.title(title)
plt.show()

## Test Data
using the trained som, search the winning node of corresponding to the test data 

In [None]:
sum_acc = 0
n = 0
for i, features in enumerate(data_test_list):

  winner_labels = []

  for t in range(features.shape[0]):
    bmu = bmu_search(features[t], som, num_rows, num_cols)
    row = bmu[0]
    col = bmu[1]
    predicted = label_map[row][col]
    winner_labels.append(predicted)
  acc = accuracy_score(y_test_list[i].cpu().numpy(), winner_labels)
  sum_acc += acc
  n += 1
  print("Accuracy: ",acc)

print("Total Accuracy: ", sum_acc /n)

In [None]:
num_rows = 10
num_cols = 10
max_m_dsitance = 4
max_learning_rate = 0.6
max_steps = 20
is_2d = False


## Initialising Self Organising Map

In [None]:
num_dims = data_list[0].shape[1] # numnber of dimensions in the input data
if is_2d:
  np.random.seed(40)
  som = np.random.random_sample(size=(num_rows, num_cols, num_dims)) # map construction
  som[3][0] = None
  som[3][2] = None
  som = torch.from_numpy(som)
else:
  np.random.seed(40)
  som = np.random.random_sample(size=(num_rows, num_cols, num_dims)) # map construction
  som = torch.from_numpy(som)

In [None]:
epochs = 20

for epoch in range(epochs):
    for features in tqdm(data_list, desc=f"Epoch {epoch+1}", colour="blue"):
        # features.to(device)
        # som = som.to(device)

        # start training iterations
        for i in range(features.shape[0]):
          learning_rate, neighbourhood_range = decay(epoch, epochs, max_learning_rate, max_m_dsitance)
          winner = winning_neuron(features[i], som, num_rows, num_cols)
          for row in range(num_rows):
            for col in range(num_cols):
              if som[row][col] != None:
                dist = m_distance([row,col],winner)
                if dist <= neighbourhood_range:
                  som[row][col] += learning_rate * dist_func(dist) * (features[i]-som[row][col]) #update neighbour's weight


Epoch 1: 100%|[34m██████████[0m| 1000/1000 [11:33<00:00,  1.44it/s]
Epoch 2: 100%|[34m██████████[0m| 1000/1000 [11:18<00:00,  1.47it/s]
Epoch 3: 100%|[34m██████████[0m| 1000/1000 [10:53<00:00,  1.53it/s]
Epoch 4: 100%|[34m██████████[0m| 1000/1000 [10:48<00:00,  1.54it/s]
Epoch 5: 100%|[34m██████████[0m| 1000/1000 [10:48<00:00,  1.54it/s]
Epoch 6: 100%|[34m██████████[0m| 1000/1000 [10:14<00:00,  1.63it/s]
Epoch 7: 100%|[34m██████████[0m| 1000/1000 [10:17<00:00,  1.62it/s]
Epoch 8: 100%|[34m██████████[0m| 1000/1000 [10:22<00:00,  1.61it/s]
Epoch 9: 100%|[34m██████████[0m| 1000/1000 [10:22<00:00,  1.61it/s]
Epoch 10: 100%|[34m██████████[0m| 1000/1000 [10:23<00:00,  1.60it/s]
Epoch 11: 100%|[34m██████████[0m| 1000/1000 [09:51<00:00,  1.69it/s]
Epoch 12: 100%|[34m██████████[0m| 1000/1000 [09:50<00:00,  1.69it/s]
Epoch 13: 100%|[34m██████████[0m| 1000/1000 [09:52<00:00,  1.69it/s]
Epoch 14: 100%|[34m██████████[0m| 1000/1000 [09:44<00:00,  1.71it/s]
Epoch 15: 100%|

## Collecting Labels

In [None]:
map = np.empty(shape=(num_rows, num_cols), dtype=object)
for row in range(num_rows):
  for col in range(num_cols):
    if som[row][col] != None:
      map[row][col] = [] # empty list to store the label

for i, features in enumerate(data_list):


  # features = feature_extractor(x_train)
  # features = minmax_scaler(features)
  # features = torch.from_numpy(features)
  label_data = y_data_list[i].cpu().numpy()
  
  
  for t in range(features.shape[0]):
    winner = winning_neuron(features[t], som, num_rows, num_cols)
    map[winner[0]][winner[1]].append(label_data[t]) # label of winning neuron

## Construct Label Map

In [None]:
label_map = np.zeros(shape=(num_rows, num_cols),dtype=np.int64)
for row in range(num_rows):
  for col in range(num_cols):
    if som[row][col] != None:
      label_list = map[row][col]
      if len(label_list)==0:
        label = 11
      else:
        label = max(label_list, key=label_list.count)
      label_map[row][col] = label


## Feature Map

In [1]:
title = ('Feature Map')
# cmap = colors.ListedColormap(['tab:green', 'tab:red', 'tab:orange'])
plt.imshow(label_map, cmap='Blues')
plt.colorbar()
plt.title(title)
plt.show()

NameError: ignored

## Test Data
using the trained som, search the winning node of corresponding to the test data 

In [None]:
sum_acc = 0
n = 0
for i, features in enumerate(data_test_list):

  winner_labels = []

  for t in range(features.shape[0]):
    winner = winning_neuron(features[t], som, num_rows, num_cols)
    row = winner[0]
    col = winner[1]
    predicted = label_map[row][col]
    winner_labels.append(predicted)
  acc = accuracy_score(y_test_list[i].cpu().numpy(), winner_labels)
  sum_acc += acc
  n += 1
  print("Accuracy: ",acc)

print("Total Accuracy: ", sum_acc /n)

Accuracy:  0.58
Accuracy:  0.56
Accuracy:  0.5
Accuracy:  0.62
Accuracy:  0.52
Accuracy:  0.6
Accuracy:  0.54
Accuracy:  0.76
Accuracy:  0.72
Accuracy:  0.66
Accuracy:  0.56
Accuracy:  0.54
Accuracy:  0.66
Accuracy:  0.6
Accuracy:  0.66
Accuracy:  0.56
Accuracy:  0.52
Accuracy:  0.64
Accuracy:  0.62
Accuracy:  0.6
Accuracy:  0.6
Accuracy:  0.58
Accuracy:  0.6
Accuracy:  0.56
Accuracy:  0.6
Accuracy:  0.54
Accuracy:  0.64
Accuracy:  0.72
Accuracy:  0.64
Accuracy:  0.64
Accuracy:  0.56
Accuracy:  0.52
Accuracy:  0.52
Accuracy:  0.64
Accuracy:  0.58
Accuracy:  0.62
Accuracy:  0.54
Accuracy:  0.62
Accuracy:  0.64
Accuracy:  0.64
Accuracy:  0.54
Accuracy:  0.56
Accuracy:  0.58
Accuracy:  0.52
Accuracy:  0.6
Accuracy:  0.58
Accuracy:  0.48
Accuracy:  0.68
Accuracy:  0.54
Accuracy:  0.6
Accuracy:  0.64
Accuracy:  0.64
Accuracy:  0.6
Accuracy:  0.52
Accuracy:  0.56
Accuracy:  0.7
Accuracy:  0.64
Accuracy:  0.66
Accuracy:  0.52
Accuracy:  0.6
Accuracy:  0.52
Accuracy:  0.54
Accuracy:  0.64
Accu