In [43]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
import csv
import os
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential

In [44]:
file_path = 'music-mental.csv'
df = pd.read_csv(file_path)

df.head()

Unnamed: 0,Timestamp,Age,Primary streaming service,Hours per day,While working,Instrumentalist,Composer,Fav genre,Exploratory,Foreign languages,...,Frequency [R&B],Frequency [Rap],Frequency [Rock],Frequency [Video game music],Anxiety,Depression,Insomnia,OCD,Music effects,Permissions
0,8/27/2022 19:29:02,18.0,Spotify,3.0,Yes,Yes,Yes,Latin,Yes,Yes,...,Sometimes,Very frequently,Never,Sometimes,3.0,0.0,1.0,0.0,,I understand.
1,8/27/2022 19:57:31,63.0,Pandora,1.5,Yes,No,No,Rock,Yes,No,...,Sometimes,Rarely,Very frequently,Rarely,7.0,2.0,2.0,1.0,,I understand.
2,8/27/2022 21:28:18,18.0,Spotify,4.0,No,No,No,Video game music,No,Yes,...,Never,Rarely,Rarely,Very frequently,7.0,7.0,10.0,2.0,No effect,I understand.
3,8/27/2022 21:40:40,61.0,YouTube Music,2.5,Yes,No,Yes,Jazz,Yes,Yes,...,Sometimes,Never,Never,Never,9.0,7.0,3.0,3.0,Improve,I understand.
4,8/27/2022 21:54:47,18.0,Spotify,4.0,Yes,No,No,R&B,Yes,No,...,Very frequently,Very frequently,Never,Rarely,7.0,2.0,5.0,9.0,Improve,I understand.


In [45]:
label_encoder = LabelEncoder()
df['Fav genre'] = label_encoder.fit_transform(df['Fav genre'])

In [46]:
df = df.drop(columns=['Timestamp', 'Permissions'])

In [51]:
df.dropna(inplace=True)

In [52]:
df.fillna(df.mean(), inplace=True)

  df.fillna(df.mean(), inplace=True)


In [54]:
X = df[['Age', 'Fav genre', 'Hours per day']]
Y = df[['Anxiety', 'Depression', 'Insomnia', 'OCD']]

In [55]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [56]:
# Standardizing the features (scaling)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [58]:
# Building a neural network model
model = keras.Sequential([
    layers.Input(shape=(X_train.shape[1],)),  # Input layer
    layers.Dense(64, activation='relu'),      # Hidden layer with 64 neurons and ReLU activation
    layers.Dense(32, activation='relu'),      # Hidden layer with 32 neurons and ReLU activation
    layers.Dense(4, activation='linear')      # Output layer with 4 neurons (Anxiety, Depression, Insomnia, OCD)
])


In [59]:
# Compiling the model
model.compile(optimizer='adam', loss='mean_squared_error')

In [60]:
# Training the model
history = model.fit(X_train, Y_train, epochs=100, batch_size=32, validation_split=0.2, verbose=2)

Epoch 1/100
13/13 - 1s - loss: 29.8600 - val_loss: 24.1787 - 849ms/epoch - 65ms/step
Epoch 2/100
13/13 - 0s - loss: 28.0731 - val_loss: 22.4362 - 57ms/epoch - 4ms/step
Epoch 3/100
13/13 - 0s - loss: 26.0228 - val_loss: 20.3649 - 52ms/epoch - 4ms/step
Epoch 4/100
13/13 - 0s - loss: 23.4894 - val_loss: 17.8230 - 68ms/epoch - 5ms/step
Epoch 5/100
13/13 - 0s - loss: 20.3771 - val_loss: 14.8737 - 67ms/epoch - 5ms/step
Epoch 6/100
13/13 - 0s - loss: 16.9222 - val_loss: 12.0417 - 69ms/epoch - 5ms/step
Epoch 7/100
13/13 - 0s - loss: 13.6045 - val_loss: 10.1504 - 56ms/epoch - 4ms/step
Epoch 8/100
13/13 - 0s - loss: 11.4663 - val_loss: 9.4571 - 79ms/epoch - 6ms/step
Epoch 9/100
13/13 - 0s - loss: 10.4976 - val_loss: 9.4643 - 81ms/epoch - 6ms/step
Epoch 10/100
13/13 - 0s - loss: 10.1660 - val_loss: 9.4421 - 90ms/epoch - 7ms/step
Epoch 11/100
13/13 - 0s - loss: 10.0281 - val_loss: 9.3877 - 87ms/epoch - 7ms/step
Epoch 12/100
13/13 - 0s - loss: 9.9199 - val_loss: 9.2958 - 95ms/epoch - 7ms/step
Epoch

In [61]:
# Evaluating the model on the test data
loss = model.evaluate(X_test, Y_test)
print(f"Mean Squared Error on Test Data: {loss:.2f}")


Mean Squared Error on Test Data: 8.32


In [62]:
# Defining a dictionary to map categories to numerical values
genre_mapping = {'Rock': 0, 'Pop': 1, 'Hip hop': 2, 'R&B': 3, 'Jazz': 4, 'Country': 5, 'EDM': 6, 'K pop': 7, 'Latin': 8, 'Lofi': 9, 'Metal': 10, 'Classical': 11, 'Gospel': 12, 'Folk': 13, 'Video game music': 14}
service_mapping = {'Spotify': 0, 'Pandora': 1, 'YouTube Music': 2, 'I do not use a streaming service.': 3}

In [68]:
# new data point values
age = 30
fav_genre = 'Rock'
hours_per_day = 4

In [70]:
# Transforming the 'Fav genre' and 'Primary streaming service' to numerical values using the dictionaries
fav_genre_encoded = genre_mapping.get(fav_genre, -1)  # -1 is a default value if the genre is not found

In [71]:
# Checking if the values were found in the dictionaries
if fav_genre_encoded == -1:
    print(f"'{fav_genre}' is not found in the genre mapping.")

In [None]:
new_data_point = np.array([[age, fav_genre_encoded, hours_per_day]])

# Standardizing the new data point using the same scaler
new_data_point = scaler.transform(new_data_point)


In [73]:
# Making predictions
predictions = model.predict(new_data_point)

print("Predictions (Anxiety, Depression, Insomnia, OCD):", predictions)


Predictions (Anxiety, Depression, Insomnia, OCD): [[6.5294333 5.850461  4.724788  3.23068  ]]
