In [2]:
pip install minisom

Collecting minisom
  Downloading MiniSom-2.3.2.tar.gz (10 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hBuilding wheels for collected packages: minisom
  Building wheel for minisom (setup.py) ... [?25ldone
[?25h  Created wheel for minisom: filename=MiniSom-2.3.2-py3-none-any.whl size=10650 sha256=2279d3a9c9f02cd5662f6916c28746e612d86207dfcc2c48179ac96c1f347ff4
  Stored in directory: /Users/swetha/Library/Caches/pip/wheels/b4/f6/8a/9daf8831901c3e3805775633404248f10663d1c80b7e5a1314
Successfully built minisom
Installing collected packages: minisom
Successfully installed minisom-2.3.2
Note: you may need to restart the kernel to use updated packages.


In [4]:
import numpy as np
from minisom import MiniSom
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
def feature_selection_som(X, y, map_size, num_epochs=250, sigma=1.0, learning_rate=0.5):
    """
    Feature selection using Self-Organizing Map (SOM).
   
    Parameters:
    - X: Input data (features).
    - y: Target variable.
    - map_size: Tuple representing the size of the SOM map (e.g., (5, 5)).
    - num_epochs: Number of training epochs.
    - sigma: Spread of the neighborhood function.
    - learning_rate: Initial learning rate.

    Returns:
    - Selected features based on SOM.
    """

    # Feature scaling
   

    # Initialize SOM
    som = MiniSom(map_size[0], map_size[1], X.shape[1], sigma=sigma, learning_rate=learning_rate)

    # Training the SOM
    som.train_random(X, num_epochs)

    # Calculate the distance map
    distance_map = som.distance_map()

    # Find the best-matching units (BMUs) for each sample
    bmu_indices = np.array([som.winner(x) for x in X])

    # Calculate the feature importance based on BMU distances
    feature_importance = np.sum(np.exp(-distance_map[bmu_indices[:, 0], bmu_indices[:, 1]])[:, np.newaxis] * X, axis=0)

    # Normalize feature importance
    feature_importance /= np.sum(feature_importance)

    # Select the top features
    print("feature_importance")
    print(feature_importance)
    num_selected_features = int(np.ceil(np.sum(feature_importance > 3.0)))
    selected_feature_indices = np.argsort(feature_importance)[-num_selected_features:]

    # Display the selected features
    print("Selected Features:", selected_feature_indices)
    plt.figure(figsize=(20, 12))

    # Plot the SOM map
    plt.subplot(2, 1, 1)
    plt.title('SOM Map')
    plt.pcolor(distance_map.T, cmap='bone_r', alpha=0.8)
    plt.colorbar()

    # Highlight selected features on the map
    for i in selected_feature_indices:
        plt.text(bmu_indices[i, 0] + 0.5, bmu_indices[i, 1] + 0.5, str(i+1),
                 color='red', fontweight='bold', ha='center', va='center')

    # Plot the feature importance on a logarithmic scale
    plt.subplot(2, 1, 2)
    plt.bar(range(X.shape[1]), feature_importance, align='center')
    plt.yscale('log')  # Set y-axis to logarithmic scale
    plt.xticks(range(X.shape[1]), range(1, X.shape[1] + 1))
    plt.title('Feature Importance (log scale)')
    plt.show()
   
    return selected_feature_indices

# Example usage with the Iris dataset
columns_to_read=['FIT101', 'LIT101', ' MV101', 'P101', 'P102', ' AIT201', 'AIT202',
       'AIT203', 'FIT201', ' MV201', ' P201', ' P202', 'P203', ' P204', 'P205',
       'P206', 'DPIT301', 'FIT301', 'LIT301', 'MV301', 'MV302', ' MV303',
       'MV304', 'P301', 'P302', 'AIT401', 'AIT402', 'FIT401', 'LIT401', 'P401',
       'P402', 'P403', 'P404', 'UV401', 'AIT501', 'AIT502', 'AIT503', 'AIT504',
       'FIT501', 'FIT502', 'FIT503', 'FIT504', 'P501', 'P502', 'PIT501',
       'PIT502', 'PIT503', 'FIT601', 'P601', 'P602', 'P603', 'Label',
       'numeric_timestamp']
data=pd.read_csv("swatcsv.csv",usecols=columns_to_read)

df=pd.DataFrame(data)
y_value=df['Label'].to_numpy()
df=df.drop('Label',axis=1)
X_value=df.to_numpy()
print(X_value)
map_size = (10, 10)
selected_features = feature_selection_som(X_value, y_value, map_size)


ValueError: Usecols do not match columns, columns expected but not found: ['numeric_timestamp', 'Label']

In [3]:
print(data.columns)

NameError: name 'data' is not defined