# Abstract

-- Enter Here --

# Data

In [11]:
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader

from torchinfo import summary

import pandas as pd
import numpy as np
import time

# for train-test split
from sklearn.model_selection import train_test_split

# for suppressing bugged warnings from torchinfo
import warnings
warnings.filterwarnings("ignore", category = UserWarning)

# tokenizers from HuggingFace
from transformers import BertTokenizer

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

  from .autonotebook import tqdm as notebook_tqdm


We are loading in a [Kaggle dataset](https://www.kaggle.com/datasets/saurabhshahane/music-dataset-1950-to-2019) that contains information about music made between the years 1950 and 2019 collected through Spotify. The dataset contains lyrics, artist info, track names, etc. Importantly it also includes music metadata like sadness, danceability, loudness, acousticness, etc.

In [None]:
url = "https://raw.githubusercontent.com/PhilChodrow/PIC16B/master/datasets/tcc_ceds_music.csv"
df = pd.read_csv(url)

Lets have a look at some of the raw data!

In [2]:
df.head()

Unnamed: 0.1,Unnamed: 0,artist_name,track_name,release_date,genre,lyrics,len,dating,violence,world/life,...,sadness,feelings,danceability,loudness,acousticness,instrumentalness,valence,energy,topic,age
0,0,mukesh,mohabbat bhi jhoothi,1950,pop,hold time feel break feel untrue convince spea...,95,0.000598,0.063746,0.000598,...,0.380299,0.117175,0.357739,0.454119,0.997992,0.901822,0.339448,0.13711,sadness,1.0
1,4,frankie laine,i believe,1950,pop,believe drop rain fall grow believe darkest ni...,51,0.035537,0.096777,0.443435,...,0.001284,0.001284,0.331745,0.64754,0.954819,2e-06,0.325021,0.26324,world/life,1.0
2,6,johnnie ray,cry,1950,pop,sweetheart send letter goodbye secret feel bet...,24,0.00277,0.00277,0.00277,...,0.00277,0.225422,0.456298,0.585288,0.840361,0.0,0.351814,0.139112,music,1.0
3,10,pérez prado,patricia,1950,pop,kiss lips want stroll charm mambo chacha merin...,54,0.048249,0.001548,0.001548,...,0.225889,0.001548,0.686992,0.744404,0.083935,0.199393,0.77535,0.743736,romantic,1.0
4,12,giorgos papadopoulos,apopse eida oneiro,1950,pop,till darling till matter know till dream live ...,48,0.00135,0.00135,0.417772,...,0.0688,0.00135,0.291671,0.646489,0.975904,0.000246,0.597073,0.394375,romantic,1.0


As we try to predict the genre of the track, we will use lyrics alongside some other engineered features (metadata) that we define below.

In [3]:
engineered_features = ['dating', 'violence', 'world/life', 'night/time','shake the audience','family/gospel', 'romantic', 'communication','obscene', 'music', 'movement/places', 'light/visual perceptions','family/spiritual', 'like/girls', 'sadness', 'feelings', 'danceability','loudness', 'acousticness', 'instrumentalness', 'valence', 'energy']      

# Model Building 

We are going to define **three** neural networks for our classification tasks, so we will need three data frames.

- Using Lyrics to Classify
- Using Engineered Features (Metadata) to Classify
- Using Lyrics and Metadata to Classify

In [13]:
df_engineered = df[engineered_features].copy()
df_lyrics = df['lyrics'].copy()
df_both = df[engineered_features + ['lyrics']].copy()
df_both.head()

Unnamed: 0,dating,violence,world/life,night/time,shake the audience,family/gospel,romantic,communication,obscene,music,...,like/girls,sadness,feelings,danceability,loudness,acousticness,instrumentalness,valence,energy,lyrics
0,0.000598,0.063746,0.000598,0.000598,0.000598,0.048857,0.017104,0.263751,0.000598,0.039288,...,0.000598,0.380299,0.117175,0.357739,0.454119,0.997992,0.901822,0.339448,0.13711,hold time feel break feel untrue convince spea...
1,0.035537,0.096777,0.443435,0.001284,0.001284,0.027007,0.001284,0.001284,0.001284,0.118034,...,0.001284,0.001284,0.001284,0.331745,0.64754,0.954819,2e-06,0.325021,0.26324,believe drop rain fall grow believe darkest ni...
2,0.00277,0.00277,0.00277,0.00277,0.00277,0.00277,0.158564,0.250668,0.00277,0.323794,...,0.00277,0.00277,0.225422,0.456298,0.585288,0.840361,0.0,0.351814,0.139112,sweetheart send letter goodbye secret feel bet...
3,0.048249,0.001548,0.001548,0.001548,0.0215,0.001548,0.411536,0.001548,0.001548,0.001548,...,0.081132,0.225889,0.001548,0.686992,0.744404,0.083935,0.199393,0.77535,0.743736,kiss lips want stroll charm mambo chacha merin...
4,0.00135,0.00135,0.417772,0.00135,0.00135,0.00135,0.46343,0.00135,0.00135,0.00135,...,0.00135,0.0688,0.00135,0.291671,0.646489,0.975904,0.000246,0.597073,0.394375,till darling till matter know till dream live ...
