# Detecting emotions in Spanish lyrics songs: Vanilla SNN 

In [1]:
wget -V

NameError: name 'wget' is not defined

In [99]:
!wget https://pln.inf.um.es/corpora/emospeech/2024/dataset/download/train?api_key=25414fffb668b475f0b5ce01d3b37d25 -O segments_train.zip
!wget https://pln.inf.um.es/corpora/emospeech/2024/dataset/download/test?api_key=25414fffb668b475f0b5ce01d3b37d25 -O segments_test.zip

!wget https://pln.inf.um.es/corpora/emospeech/2024/EmoSPeech_phase_2_train_public.csv
!wget https://pln.inf.um.es/corpora/emospeech/2024/EmoSPeech_phase_2_test_public.csv

zsh:1: no matches found: https://pln.inf.um.es/corpora/emospeech/2024/dataset/download/train?api_key=25414fffb668b475f0b5ce01d3b37d25
zsh:1: no matches found: https://pln.inf.um.es/corpora/emospeech/2024/dataset/download/test?api_key=25414fffb668b475f0b5ce01d3b37d25
zsh:1: command not found: wget
zsh:1: command not found: wget


## Imports

In [1]:
import pandas as pd, numpy as np
import brian2 as b2
from brian2 import *
import matplotlib.pyplot as plt

# Vectorizadores
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.model_selection import train_test_split

# descarga las stopwords en español y guardalas en una lista
from nltk.corpus import stopwords
import string 
from nltk.tokenize import word_tokenize
stopwords = set(stopwords.words('spanish'))


# Grafica en formato retina
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('retina')

  set_matplotlib_formats('retina')


## Functions

In [3]:
def vectorize_text(train_data, test_data, min_df, ngrams, stop_words_lang):
    # Vectorizar texto
    vectorizer = TfidfVectorizer(min_df=min_df, 
                                ngram_range=ngrams, 
                                stop_words=stop_words_lang)
    tfidf_train = vectorizer.fit_transform(train_data).toarray()
    tfidf_test = [vectorizer.transform(subset).toarray() for subset in test_data]
    
    return vectorizer, tfidf_train, tfidf_test

### Model

In [49]:
from brian2 import *

def initialize_supervised_model(n_features, n_classes=3):
    start_scope()
    
    # Define parameters
    tau_pre = 20*ms
    tau_post = 20*ms
    w_max = 0.3
    w_min = 0.0
    A_pre = 0.01
    A_post = -0.01
    tau = 20*ms
    vr = -70*mV
    vt = -50*mV
    learning_rate = 0.01
    
    eqs = '''
    dv/dt = (0.04*v**2 + 5*v + 140 - u + I) / tau : volt
    du/dt = (a*(b*v - u)) / tau : volt
    I : volt
    a : 1
    b : 1
    c : volt
    d : volt
    '''
    
    # Define neuron groups
    G = NeuronGroup(n_classes, eqs, threshold='v > vt', reset='v = c; u += d', method='euler')
    G.a = 0.02
    G.b = 0.2
    G.c = -65*mV
    G.d = 8*mV

    # Define TimedArray for dynamic rates and PoissonGroup
    rate_array = TimedArray(np.zeros(n_features), dt=1*ms)  # Placeholder array
    P = PoissonGroup(n_features, rates='rate_array(t, i)')

    # Define synapses
    S = Synapses(P, G,
                 '''
                 w : 1
                 dapre/dt = -apre/tau_pre : 1 (event-driven)
                 dapost/dt = -apost/tau_post : 1 (event-driven)
                 ''',
                 on_pre='''
                 I_post += w * mV
                 apre += A_pre
                 w = clip(w + (apost - learning_rate*(target - 1)), w_min, w_max)
                 ''',
                 on_post='''
                 apost += A_post
                 w = clip(w + (apre + learning_rate*(target - 0)), w_min, w_max)
                 ''')
    
    S.connect()
    S.w = 'rand() * 0.1'
    
    model = {
        'neuron_group': G,
        'input_group': P,
        'synapses': S,
        'rate_array': rate_array  # Add rate_array to the model dict
    }
    
    return model

In [50]:
def train_model(model, tfidf_matrix, sim_time=100*ms):
    """
    Train the SNN model on a given TF-IDF matrix where each row is an input document.
    Args:
    - model: The SNN model components.
    - tfidf_matrix: The input TF-IDF matrix.
    - sim_time: The simulation time for each document.

    Returns:
    - Updated model after training.
    """
    for document_vector in tfidf_matrix:
        model['input_group'].rates = document_vector * Hz
        run(sim_time)
    return model

In [51]:
def train_supervised_model(model, tfidf_matrix, labels, sim_time=50*ms):
    """
    Train the SNN model using supervised learning. This includes dynamically updating the input rates and handling the target labels for supervised STDP.
    """
    # Check that the duration of the TimedArray matches or exceeds the simulation time
    duration = len(tfidf_matrix) * sim_time
    timestep = sim_time / len(tfidf_matrix)  # Simplified timestep assumption
    model['rate_array'] = TimedArray(np.zeros((len(tfidf_matrix), model['input_group'].N)) * Hz, dt=timestep)
    
    for idx, (document_vector, label) in enumerate(zip(tfidf_matrix, labels)):
        # Update the rates in the TimedArray for the current step
        model['rate_array'].values[idx, :] = document_vector * Hz
        
        # Define target logic and update the synapses namespace for supervised learning
        target = np.zeros(model['neuron_group'].N)
        target[label] = 1
        model['synapses'].namespace['target'] = target  # This should be reflected in synapse equations if 'target' is used there

        # Run the simulation for the specified time
        run(sim_time)

In [86]:
def predict(model, n_features, tfidf_vector, sim_time=50*ms):
    """
    Predict the class of a new document by temporarily creating a PoissonGroup and Synapses with the provided tfidf_vector.
    """
    # Ensure tfidf_vector is a 1D array matching n_features
    if tfidf_vector.ndim > 1:
        tfidf_vector = tfidf_vector.flatten()

    # Create a temporary PoissonGroup with rates set from the tfidf_vector
    temp_input_group = PoissonGroup(n_features, rates=tfidf_vector * Hz)
    
    # Create temporary synapses with the same parameters as the original model, explicitly stating synaptic model and actions
    temp_synapses = Synapses(temp_input_group, model['neuron_group'],
                             model['synapses'].model,
                             on_pre=model['synapses'].pre.code,  # Use the actual code strings or define explicitly if not available
                             on_post=model['synapses'].post.code)  # Same here
    temp_synapses.connect()
    temp_synapses.w = model['synapses'].w  # Copy weights from the trained model
    
    # Run the simulation with the temporary setup
    run(sim_time)
    
    # Determine the class by the neuron with the highest spike count
    spike_counts = [np.sum(model['neuron_group'].v[i] > model['neuron_group'].vt) for i in range(model['neuron_group'].N)]
    predicted_class = np.argmax(spike_counts)
    
    return predicted_class


In [70]:
def normalize_tfidf_vectors(tfidf_matrix):
    """
    Normaliza los vectores TF-IDF para que estén en una escala adecuada para
    ser utilizados como tasas de disparo en una red de neuronas de Poisson.
    """
    tfidf_matrix = np.array(tfidf_matrix)
    normalized = (tfidf_matrix - np.min(tfidf_matrix)) / (np.max(tfidf_matrix) - np.min(tfidf_matrix)) * 255
    return normalized

## Implementation

### Exploring data

In [71]:
data_path = '../Data/Spanish_songs_lyrics.csv'
data = pd.read_csv(data_path)

In [72]:
data.emocion.value_counts()

emocion
N    2211
P    2140
S    1606
Name: count, dtype: int64

In [73]:
# Divide las canciones en X_train, y_train, X_test y y_test con un 80/20 split
X_train, X_test, y_train, y_test = train_test_split(data['fragmento'], 
                                                    data['emocion'], 
                                                    test_size=0.2, 
                                                    random_state=42)

### Vectorizing the data

In [74]:
stop_words_esp = list(stopwords)

In [75]:
vectorizer_train, tfidf_train, tfidf_test = vectorize_text(train_data=X_train, 
                                                           test_data=[X_test], 
                                                           min_df=2, 
                                                           ngrams=(1, 2), 
                                                           stop_words_lang=stop_words_esp)
train_normalized, test_normalized = [], []
for tfidf in tfidf_train:
    train_normalized.append(normalize_tfidf_vectors(tfidf))

for tfidf in tfidf_test:
    test_normalized.append(normalize_tfidf_vectors(tfidf))

  normalized = (tfidf_matrix - np.min(tfidf_matrix)) / (np.max(tfidf_matrix) - np.min(tfidf_matrix)) * 255


In [76]:
# Example initialization and usage
n_features = tfidf_train.shape[1]
model = initialize_supervised_model(n_features=n_features, 
                                    n_classes=3)

In [77]:
def prepare_labels(labels):
    from sklearn.preprocessing import LabelEncoder
    """Encode text labels into integers."""
    le = LabelEncoder()
    labels = le.fit_transform(labels)
    return le, labels

In [78]:
# Example TF-IDF data and labels
tfidf_matrix = train_normalized
re, labels = prepare_labels(y_train)

In [79]:
# Training the model
train_supervised_model(model, tfidf_matrix, labels)

In [87]:
# Example prediction
predicted_class = predict(model=model, 
                        n_features=n_features,
                        tfidf_vector=test_normalized[0][0])
print("Predicted Class:", predicted_class)

AttributeError: No attribute with name model

In [92]:
from brian2 import *

def initialize_supervised_model(n_features, n_classes=3):
    start_scope()

    # Neuron and synaptic parameters
    tau_pre = 20*ms
    tau_post = 20*ms
    w_max = 0.3
    w_min = 0.0
    A_pre = 0.01
    A_post = -0.01
    tau = 20*ms
    vr = -70*mV
    vt = -50*mV

    # Izhikevich model equations
    eqs = '''
    dv/dt = (0.04*v**2 + 5*v + 140 - u + I) / tau : volt
    du/dt = (a*(b*v - u)) / tau : volt
    I : volt
    a : 1
    b : 1
    c : volt
    d : volt
    '''

    G = NeuronGroup(n_classes, eqs, threshold='v > vt', reset='v = c; u += d', method='euler')
    G.a = 0.02
    G.b = 0.2
    G.c = -65*mV
    G.d = 8*mV

    rate_array = TimedArray(np.zeros((1, n_features)) * Hz, dt=1*ms)
    P = PoissonGroup(n_features, rates='rate_array(t, i)')

    # Synapses with reward-modulated STDP
    S = Synapses(P, G,
                 '''
                 w : 1
                 dapre/dt = -apre/tau_pre : 1 (event-driven)
                 dapost/dt = -apost/tau_post : 1 (event-driven)
                 target : integer (constant over runs)
                 ''',
                 on_pre='''
                 I_post += w * mV
                 apre += A_pre
                 w = clip(w + (target == i_post) * A_post, w_min, w_max)
                 ''',
                 on_post='''
                 apost += A_post
                 w = clip(w + (target == i_post) * A_pre, w_min, w_max)
                 ''')
    S.connect()
    S.w = 'rand() * w_max'
    
    model = {
        'neuron_group': G,
        'input_group': P,
        'synapses': S,
        'rate_array': rate_array
    }
    
    return model

def train_supervised_model(model, tfidf_matrix, labels, sim_time=50*ms):
    for document_vector, label in zip(tfidf_matrix, labels):
        model['rate_array'].values = document_vector.reshape((1, -1)) * Hz
        model['synapses'].target = label
        run(sim_time)

def predict(model, tfidf_vector, sim_time=50*ms):
    model['rate_array'].values = tfidf_vector.reshape((1, -1)) * Hz
    run(sim_time)

    spike_counts = [sum(model['neuron_group'].v[i] > model['neuron_group'].vt) for i in range(model['neuron_group'].N)]
    predicted_class = np.argmax(spike_counts)

    return predicted_class

In [95]:
model = initialize_supervised_model(n_features=n_features, n_classes=3)

ValueError: Equations of type 'parameter' cannot have a flag 'constant over runs', only the following flags are allowed: ['constant', 'shared']

In [93]:
train_supervised_model(model, tfidf_matrix, labels)

KeyError: 'rate_array'