## Spotify Tracks (By Genre)

There are a few columns such as "Valence" and "Danceability", these all have been [defined here.](https://www.spotify-song-stats.com/about)

In [1]:
#!pip install datasets

In [2]:
from datasets import load_dataset
import pandas as pd
import numpy as np

dataset = load_dataset("maharshipandya/spotify-tracks-dataset", data_files="dataset.csv", split="train")

In [3]:
df = pd.DataFrame(dataset)

In [4]:
# Dropping and renaming columns.
if 'track_id' in df:
    cols_to_drop = [
        'Unnamed: 0',
        'track_id'
        ]
else:
    cols_to_drop = []
    
df = df.drop(cols_to_drop, axis=1)

# Columns to uppercase.
df.columns = df.columns.str.upper()

# Renaming to "DURATION" since I'll be converting the ms to HH:MM:SS (or just MM:SS if there aren't any over an hour).
df.rename(columns={'DURATION_MS': 'DURATION'}, inplace=True)

In [5]:
df.head()

Unnamed: 0,ARTISTS,ALBUM_NAME,TRACK_NAME,POPULARITY,DURATION,EXPLICIT,DANCEABILITY,ENERGY,KEY,LOUDNESS,MODE,SPEECHINESS,ACOUSTICNESS,INSTRUMENTALNESS,LIVENESS,VALENCE,TEMPO,TIME_SIGNATURE,TRACK_GENRE
0,Gen Hoshino,Comedy,Comedy,73,230666,False,0.676,0.461,1,-6.746,0,0.143,0.0322,1e-06,0.358,0.715,87.917,4,acoustic
1,Ben Woodward,Ghost (Acoustic),Ghost - Acoustic,55,149610,False,0.42,0.166,1,-17.235,1,0.0763,0.924,6e-06,0.101,0.267,77.489,4,acoustic
2,Ingrid Michaelson;ZAYN,To Begin Again,To Begin Again,57,210826,False,0.438,0.359,0,-9.734,1,0.0557,0.21,0.0,0.117,0.12,76.332,4,acoustic
3,Kina Grannis,Crazy Rich Asians (Original Motion Picture Sou...,Can't Help Falling In Love,71,201933,False,0.266,0.0596,0,-18.515,1,0.0363,0.905,7.1e-05,0.132,0.143,181.74,3,acoustic
4,Chord Overstreet,Hold On,Hold On,82,198853,False,0.618,0.443,2,-9.681,1,0.0526,0.469,0.0,0.0829,0.167,119.949,4,acoustic


In [6]:
%%html
<style>
  table {margin-left: 0 !important;}
</style>

#### American Standard Pitch Notation

Number | Pitch  | Number | Pitch  
------ | ------ | ------ | ------
0      | C      | 6      | F♯/G♭
1      | C♯/D♭  | 7      | G    
2      | D      | 8      | A♭/G♯
3      |E♭/D♯   | 9      | A    
4      | E      | 10     | B♭/A♯
5      | F      | 11     | B   

In [7]:
# Create a function to map valence to the mood. 
def map_valence_to_mood(valence):
    for i, (lower, upper) in enumerate(valence_ranges):
        if lower <= valence <= upper:
            return moods[i]

# Create a function to map valence to the mood. 
def map_energy_to_energy_lvl(energy):
    for i, (lower, upper) in enumerate(energy_ranges):
        if lower <= energy <= upper:
            return energy_levels[i]
            
# If the duration is less than an hour, it will exclude the HH portion and leave the MM:SS. Otherwise it includes HH.
def trim_hours(value):
    if value.startswith('00:'):
        return value[3:]
    else:
        return value

key_dict = {
    '0':'C',
    '1':'C♯/D♭',
    '2':'D',
    '3':'E♭/D♯',
    '4':'E',
    '5':'F',
    '6':'F♯/G♭',
    '7':'G',
    '8':'A♭/G♯',
    '9':'A',
    '10':'B♭/A♯',
    '11':'B'
}

mode_dict = {
    '0':'Minor',
    '1':'Major'
}

# Defining a range of valence and energy for every mood / energy level.
valence_ranges = [(0.0, 0.099), (0.1, 0.199), (0.2, 0.299), (0.3, 0.399), (0.4, 0.499),
          (0.5, 0.599), (0.6, 0.699), (0.7, 0.799), (0.8, 0.899), (0.9, 0.995)]

energy_ranges = [(0.0, 0.099), (0.1, 0.199), (0.2, 0.299), (0.3, 0.399), (0.4, 0.499),
                 (0.5, 0.599), (0.6, 0.699), (0.7, 0.799), (0.8, 0.899), (0.9, 1.0)]

# This isn't too necessary, but wanted to try something like this by turning numerical values into more categorical elements.
# NOTE: These may not be 100% accurate.
moods = ['Dark', 'Melancholic', 'Calm', 'Neutral', 'Positive',
                'Energetic', 'Upbeat', 'Cheerful', 'Uplifting', 'Blissful']

energy_levels = ['Low', 'Low-Mid', 'Mid', 'Mid-High', 'High-Mid',
                 'High', 'Very High', 'Extremely High', 'Maximum', 'Maximum+']

# Apply the function to the 'VALENCE' col and assign the new value to a new 'MOOD' column 
df['MOOD'] = df['VALENCE'].apply(map_valence_to_mood)
df['MOOD'] = df['MOOD'].astype(str)
df = df.drop(columns='VALENCE', axis=1)

df['ENERGY_LVL'] = df['ENERGY'].apply(map_energy_to_energy_lvl)
df['ENERGY_LVL'] = df['ENERGY_LVL'].astype(str)
df = df.drop(columns='ENERGY', axis=1)

# Replacing the 0 - 11 value in the KEY col to it's actual musical pitch notation. 
df['KEY'] = df['KEY'].astype(str)
df['KEY'] = df['KEY'].map(key_dict)

df['MODE'] = df['MODE'].astype(str)
df['MODE'] = df['MODE'].map(mode_dict)

In [8]:
# Converting the duration column from milliseconds to minutes and seconds.
df['DURATION'] = pd.to_datetime(df['DURATION'], unit='ms').dt.strftime('%H:%M:%S')
df['DURATION'] = df['DURATION'].astype(str)
df['DURATION'] = df['DURATION'].apply(trim_hours)

In [10]:
df.head()

Unnamed: 0,ARTISTS,ALBUM_NAME,TRACK_NAME,POPULARITY,DURATION,EXPLICIT,DANCEABILITY,KEY,LOUDNESS,MODE,SPEECHINESS,ACOUSTICNESS,INSTRUMENTALNESS,LIVENESS,TEMPO,TIME_SIGNATURE,TRACK_GENRE,MOOD,ENERGY_LVL
0,Gen Hoshino,Comedy,Comedy,73,03:50,False,0.676,C♯/D♭,-6.746,Minor,0.143,0.0322,1e-06,0.358,87.917,4,acoustic,Cheerful,High-Mid
1,Ben Woodward,Ghost (Acoustic),Ghost - Acoustic,55,02:29,False,0.42,C♯/D♭,-17.235,Major,0.0763,0.924,6e-06,0.101,77.489,4,acoustic,Calm,Low-Mid
2,Ingrid Michaelson;ZAYN,To Begin Again,To Begin Again,57,03:30,False,0.438,C,-9.734,Major,0.0557,0.21,0.0,0.117,76.332,4,acoustic,Melancholic,Mid-High
3,Kina Grannis,Crazy Rich Asians (Original Motion Picture Sou...,Can't Help Falling In Love,71,03:21,False,0.266,C,-18.515,Major,0.0363,0.905,7.1e-05,0.132,181.74,3,acoustic,Melancholic,Low
4,Chord Overstreet,Hold On,Hold On,82,03:18,False,0.618,D,-9.681,Major,0.0526,0.469,0.0,0.0829,119.949,4,acoustic,Melancholic,High-Mid
