In [33]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.optimizers import Adam
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split

In [45]:
data_path = 'data/training_data_full.csv'
model_path = 'data/emotion_recognition_model.keras'

In [35]:
df = pd.read_csv(data_path, header=0)
df['word_counts'] = df['word_counts'].apply(lambda x: eval(x))
df['emotion_freqs'] = df['emotion_freqs'].apply(lambda x: eval(x))
df

Unnamed: 0,track_id,word_counts,emotion_freqs
0,TRAAAAV128F421A322,"{'like': 2, 'de': 1, 'got': 1, 'would': 1, 'se...","{'angry': 0.05, 'disgust': 0.15, 'fear': 0.15,..."
1,TRAAABD128F429CF47,"{'know': 5, 'time': 3, 'la': 7, 'get': 2, 'got...","{'angry': 0.0, 'disgust': 0.0, 'fear': 0.0, 'h..."
2,TRAAAED128E0783FAB,"{'love': 11, 'like': 1, 'time': 6, 'come': 4, ...","{'angry': 0.0, 'disgust': 0.0, 'fear': 0.02941..."
3,TRAAAEF128F4273421,"{'know': 1, 'got': 3, 'feel': 1, 'let': 1, 'wo...","{'angry': 0.07142857142857142, 'disgust': 0.07..."
4,TRAAAEW128F42930C0,"{'like': 1, 'take': 1, 'would': 1, 'wo': 1, 's...","{'angry': 0.125, 'disgust': 0.5, 'fear': 0.125..."
...,...,...,...
189030,TRZZZUK128F92E3C60,"{'love': 2, 'see': 2, 'heart': 2, 'think': 1, ...","{'angry': 0.0, 'disgust': 0.0, 'fear': 0.0, 'h..."
189031,TRZZZXA128F428ED56,"{'time': 1, 'la': 1, 'get': 1, 'eye': 1, 'thin...","{'angry': 0.14705882352941177, 'disgust': 0.14..."
189032,TRZZZXV128F4289747,"{'know': 1, 'like': 3, 'time': 1, 'get': 3, 'n...","{'angry': 0.6923076923076923, 'disgust': 0.076..."
189033,TRZZZYV128F92E996D,"{'get': 21, 'got': 3, 'let': 6, 'would': 2, 'a...","{'angry': 0.2624113475177305, 'disgust': 0.234..."


In [36]:
vectorizer = TfidfVectorizer()

X = vectorizer.fit_transform(df['word_counts'].apply(str))
y = np.array([list(emotion.values()) for emotion in df['emotion_freqs']])

In [37]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [38]:
model = Sequential([
    Input(shape=(X_train.shape[1],)),
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(7, activation='sigmoid')
])

In [39]:
model.compile(optimizer=Adam(), loss='mean_squared_error', metrics=['mae'])

In [40]:
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/10
[1m4726/4726[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 3ms/step - loss: 0.0134 - mae: 0.0741 - val_loss: 0.0037 - val_mae: 0.0388
Epoch 2/10
[1m4726/4726[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 3ms/step - loss: 0.0032 - mae: 0.0365 - val_loss: 0.0032 - val_mae: 0.0344
Epoch 3/10
[1m4726/4726[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 3ms/step - loss: 0.0026 - mae: 0.0321 - val_loss: 0.0031 - val_mae: 0.0342
Epoch 4/10
[1m4726/4726[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 3ms/step - loss: 0.0023 - mae: 0.0299 - val_loss: 0.0030 - val_mae: 0.0329
Epoch 5/10
[1m4726/4726[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 3ms/step - loss: 0.0021 - mae: 0.0284 - val_loss: 0.0030 - val_mae: 0.0326
Epoch 6/10
[1m4726/4726[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 3ms/step - loss: 0.0018 - mae: 0.0270 - val_loss: 0.0031 - val_mae: 0.0331
Epoch 7/10
[1m4726/4726[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [

<keras.src.callbacks.history.History at 0x1f05252a250>

In [41]:
loss, mae = model.evaluate(X_test, y_test)
mae

[1m1182/1182[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.0033 - mae: 0.0337


0.033932626247406006

In [46]:
model.save(model_path)