In [15]:
import pandas as pd

import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, Flatten, MaxPooling1D, Dropout
from sklearn.preprocessing import StandardScaler

In [16]:
df_cnn = pd.read_pickle('../../Data/output/df_process.pkl')

In [17]:
df_cnn.info()

<class 'pandas.core.frame.DataFrame'>
Index: 718 entries, 2 to 735
Data columns (total 49 columns):
 #   Column                                                       Non-Null Count  Dtype  
---  ------                                                       --------------  -----  
 0   Age                                                          718 non-null    float64
 1   Hours per day                                                718 non-null    float64
 2   While working                                                718 non-null    int64  
 3   Instrumentalist                                              718 non-null    int64  
 4   Composer                                                     718 non-null    int64  
 5   Exploratory                                                  718 non-null    int64  
 6   Foreign languages                                            718 non-null    int64  
 7   BPM                                                          718 non-null    float64


In [18]:
genres_freq_columns = df_cnn.iloc[:, 8:24]  # Columns 8-23
other_columns = pd.concat([df_cnn.iloc[:, :8], df_cnn.iloc[:, 24:28], df_cnn.iloc[:, 29:49]], axis=1)

target = df_cnn['Music effects']

new_columns = {}

for genre_col in genres_freq_columns.columns:
    for other_col in other_columns.columns:
        new_columns[f'{genre_col}_x_{other_col}'] = genres_freq_columns[genre_col] * other_columns[other_col]

feature_grid = pd.DataFrame(new_columns)

feature_grid['target'] = target

print(feature_grid.head())
print("Total rows in feature grid:", feature_grid.shape[0])

   Frequency [Classical]_x_Age  Frequency [Classical]_x_Hours per day   
2                    -0.000000                               0.000000  \
3                     5.997714                              -0.714973   
4                    -0.000000                               0.000000   
5                    -0.600797                               0.471515   
6                    -1.201595                              -0.383373   

   Frequency [Classical]_x_While working   
2                                      0  \
3                                      2   
4                                      0   
5                                      1   
6                                      2   

   Frequency [Classical]_x_Instrumentalist  Frequency [Classical]_x_Composer   
2                                        0                                 0  \
3                                        0                                 2   
4                                        0              

In [19]:
X = feature_grid.drop('target', axis=1).values
y = feature_grid['target'].values

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_scaled = X_scaled.reshape(X_scaled.shape[0], X_scaled.shape[1], 1)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [20]:
model = Sequential([
    Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)),
    MaxPooling1D(pool_size=2),
    Dropout(0.2),
    Conv1D(filters=64, kernel_size=3, activation='relu'),
    MaxPooling1D(pool_size=2),
    Dropout(0.2),
    Flatten(),
    Dense(50, activation='relu'),
    Dense(1, activation='softmax') 
])

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [21]:
history = model.fit(X_train, y_train, epochs=30, batch_size=32, validation_split=0.15, verbose=1)

loss, accuracy = model.evaluate(X_test, y_test)
print("Test Accuracy:", accuracy)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Test Accuracy: 0.2222222238779068


Even changing of the different hyperparameters such as epochs, batch_size, validation_split, there is no change with the test accuracy. 

Other fine-tune can try:
- activation
- loss
- optimizer

Also, check the output layer - the correct output being defined

The changes made from previous (11%) to this new code (22%):
- Use labelencoding insetad of one-hot encoding for Target
- Correctly defined the Target columns
- Adding the variable fav_genre (with 15/16 more columns)