# Classify Song Genres from Audio Data

In [1]:
import pandas as pd
import numpy as np
import seaborn as sn

### Loading ```"echonest-metrics"``` dataset

In [6]:
em_data = pd.read_json("echonest-metrics.json")
em_data[:5]

Unnamed: 0,track_id,acousticness,danceability,energy,instrumentalness,liveness,speechiness,tempo,valence
0,2,0.416675,0.675894,0.634476,0.010628,0.177647,0.15931,165.922,0.576661
1,3,0.374408,0.528643,0.817461,0.001851,0.10588,0.461818,126.957,0.26924
2,5,0.043567,0.745566,0.70147,0.000697,0.373143,0.124595,100.26,0.621661
3,10,0.95167,0.658179,0.924525,0.965427,0.115474,0.032985,111.562,0.96359
4,134,0.452217,0.513238,0.56041,0.019443,0.096567,0.525519,114.29,0.894072


### Loading ```"fma-rock-vs-hiphop.csv"``` dataset

In [5]:
rh_data = pd.read_csv("fma-rock-vs-hiphop.csv")
rh_data[:5]

Unnamed: 0,track_id,bit_rate,comments,composer,date_created,date_recorded,duration,favorites,genre_top,genres,...,information,interest,language_code,license,listens,lyricist,number,publisher,tags,title
0,135,256000,1,,2008-11-26 01:43:26,2008-11-26 00:00:00,837,0,Rock,"[45, 58]",...,,2484,en,Attribution-NonCommercial-ShareAlike 3.0 Inter...,1832,,0,,[],Father's Day
1,136,256000,1,,2008-11-26 01:43:35,2008-11-26 00:00:00,509,0,Rock,"[45, 58]",...,,1948,en,Attribution-NonCommercial-ShareAlike 3.0 Inter...,1498,,0,,[],Peel Back The Mountain Sky
2,151,192000,0,,2008-11-26 01:44:55,,192,0,Rock,[25],...,,701,en,Attribution-NonCommercial-ShareAlike 3.0 Inter...,148,,4,,[],Untitled 04
3,152,192000,0,,2008-11-26 01:44:58,,193,0,Rock,[25],...,,637,en,Attribution-NonCommercial-ShareAlike 3.0 Inter...,98,,11,,[],Untitled 11
4,153,256000,0,Arc and Sender,2008-11-26 01:45:00,2008-11-26 00:00:00,405,5,Rock,[26],...,,354,en,Attribution-NonCommercial-NoDerivatives (aka M...,424,,2,,[],Hundred-Year Flood


In [8]:
em_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 13129 entries, 0 to 13128
Data columns (total 9 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   track_id          13129 non-null  int64  
 1   acousticness      13129 non-null  float64
 2   danceability      13129 non-null  float64
 3   energy            13129 non-null  float64
 4   instrumentalness  13129 non-null  float64
 5   liveness          13129 non-null  float64
 6   speechiness       13129 non-null  float64
 7   tempo             13129 non-null  float64
 8   valence           13129 non-null  float64
dtypes: float64(8), int64(1)
memory usage: 1.0 MB


In [9]:
rh_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17734 entries, 0 to 17733
Data columns (total 21 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   track_id       17734 non-null  int64 
 1   bit_rate       17734 non-null  int64 
 2   comments       17734 non-null  int64 
 3   composer       166 non-null    object
 4   date_created   17734 non-null  object
 5   date_recorded  1898 non-null   object
 6   duration       17734 non-null  int64 
 7   favorites      17734 non-null  int64 
 8   genre_top      17734 non-null  object
 9   genres         17734 non-null  object
 10  genres_all     17734 non-null  object
 11  information    482 non-null    object
 12  interest       17734 non-null  int64 
 13  language_code  4089 non-null   object
 14  license        17714 non-null  object
 15  listens        17734 non-null  int64 
 16  lyricist       53 non-null     object
 17  number         17734 non-null  int64 
 18  publisher      52 non-null

In [11]:
nan_df = rh_data[rh_data.isna().any(axis=1)]
nan_df

Unnamed: 0,track_id,bit_rate,comments,composer,date_created,date_recorded,duration,favorites,genre_top,genres,...,information,interest,language_code,license,listens,lyricist,number,publisher,tags,title
0,135,256000,1,,2008-11-26 01:43:26,2008-11-26 00:00:00,837,0,Rock,"[45, 58]",...,,2484,en,Attribution-NonCommercial-ShareAlike 3.0 Inter...,1832,,0,,[],Father's Day
1,136,256000,1,,2008-11-26 01:43:35,2008-11-26 00:00:00,509,0,Rock,"[45, 58]",...,,1948,en,Attribution-NonCommercial-ShareAlike 3.0 Inter...,1498,,0,,[],Peel Back The Mountain Sky
2,151,192000,0,,2008-11-26 01:44:55,,192,0,Rock,[25],...,,701,en,Attribution-NonCommercial-ShareAlike 3.0 Inter...,148,,4,,[],Untitled 04
3,152,192000,0,,2008-11-26 01:44:58,,193,0,Rock,[25],...,,637,en,Attribution-NonCommercial-ShareAlike 3.0 Inter...,98,,11,,[],Untitled 11
4,153,256000,0,Arc and Sender,2008-11-26 01:45:00,2008-11-26 00:00:00,405,5,Rock,[26],...,,354,en,Attribution-NonCommercial-NoDerivatives (aka M...,424,,2,,[],Hundred-Year Flood
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17729,155063,320000,0,,2017-03-24 19:40:43,,283,3,Hip-Hop,"[21, 811]",...,,1283,,Attribution,1050,,4,,"['old school beats', '2017 free instrumentals'...",Been On
17730,155064,320000,0,,2017-03-24 19:40:44,,250,2,Hip-Hop,"[21, 811]",...,,1077,,Attribution,858,,2,,"['old school beats', '2017 free instrumentals'...",Send Me
17731,155065,320000,0,,2017-03-24 19:40:45,,219,3,Hip-Hop,"[21, 811]",...,,1340,,Attribution,1142,,1,,"['old school beats', '2017 free instrumentals'...",The Question
17732,155066,320000,0,,2017-03-24 19:40:47,,252,6,Hip-Hop,"[21, 811]",...,,2065,,Attribution,1474,,3,,"['old school beats', '2017 free instrumentals'...",Roy


In [12]:
print(rh_data.isnull().sum())

track_id             0
bit_rate             0
comments             0
composer         17568
date_created         0
date_recorded    15836
duration             0
favorites            0
genre_top            0
genres               0
genres_all           0
information      17252
interest             0
language_code    13645
license             20
listens              0
lyricist         17681
number               0
publisher        17682
tags                 0
title                0
dtype: int64


In [14]:
nan_df[["composer","date_recorded","information", "language_code", "lyricist", "publisher"]]

Unnamed: 0,composer,date_recorded,information,language_code,lyricist,publisher
0,,2008-11-26 00:00:00,,en,,
1,,2008-11-26 00:00:00,,en,,
2,,,,en,,
3,,,,en,,
4,Arc and Sender,2008-11-26 00:00:00,,en,,
...,...,...,...,...,...,...
17729,,,,,,
17730,,,,,,
17731,,,,,,
17732,,,,,,
