In [1]:
import pandas as pd
import math
from matplotlib import pyplot as plt 
import seaborn as sns
import csv


In [2]:
listKeys = ['C','C#','C#dim','C#m','Cdim','Cm','D','D#','D#dim','D#m','Ddim','Dm','E','Edim','Em','F','F#','F#dim','F#m','Fdim','Fm','G','G#', 'G#dim','G#m','Gdim', 'Gm', 'A', 'A#','A#dim','A#m','Adim','Am','B','Bdim','Bm']

MOOD_CATEGORIES = {
    'sad': 1,
    'bored': 2,
    'nervous': 3,
    'angry': 4,
    'sleepy': 5,
    'calm': 6,
    'excited': 7,
    'peaceful': 8,
    'relaxed': 9,
    'pleased': 10,
    'happy': 11
}


def getKeyMap():
    keyMapper = {}
    for key in listKeys:
        if key not in keyMapper:
            keyMapper[key] = 0  
    return keyMapper

In [3]:
# 0	C (also B♯, Ddouble flat)	do
# 1	C♯, D♭ (also Bdouble sharp)	
# 2	D (also Cdouble sharp, Edouble flat)	re
# 3	D♯, E♭ (also Fdouble flat)	
# 4	E (also Ddouble sharp, F♭)	mi
# 5	F (also E♯, Gdouble flat)	fa
# 6	F♯, G♭ (also Edouble sharp)	
# 7	G (also Fdouble sharp, Adouble flat)	sol
# 8	G♯, A♭	
# 9	A (also Gdouble sharp, Bdouble flat)	la
# 10, t or A	A♯, B♭ (also Cdouble flat)	
# 11, e or B	B (also Adouble sharp, C♭)	si

spotifyKeys =['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']

In [4]:
len(spotifyKeys)

12

### Loading DataFrames in a clean way

In [5]:
# Puts together the spotifyFeatures and TransitionMatrices, additionally does a cleaning process 
# over the indexes.
# way to use it spotifyFeatures, transitionMatrices, df = loadDataFrames()
def loadCoverGroups():
    ## IMPORT COVERS IN LIST
    with open('./data/unified/FinalCoverList.csv', newline='') as f:
        reader = csv.reader(f)
        FinalSongsGroups = list(reader)

    del FinalSongsGroups[0]
    FinalSongsGroupsAnalyze = []
    for songsGroups in FinalSongsGroups:
        if len(songsGroups) > 0:
            toInt = [int(x) for x in songsGroups]
            FinalSongsGroupsAnalyze.append(toInt)
    
    return FinalSongsGroupsAnalyze
def loadDataFrames():
    spotifyFeatures = pd.read_csv('./data/unified/03-spotifyFeaturesDirty.csv')
    transitionMatrices = pd.read_csv('./data/unified/transitionMatrices.csv')

    spotifyFeatures = spotifyFeatures.iloc[:, 1:]
    transitionMatrices = transitionMatrices.iloc[:, 1:]
    
    spotifyFeatures = spotifyFeatures.sort_values('id_chord')
    transitionMatrices = transitionMatrices.sort_values('id')
    transitionMatrices.id = transitionMatrices.id.astype('int64')
    spotifyFeatures = spotifyFeatures.rename(columns={"id_chord": "id"})
    
    
    df = pd.merge(spotifyFeatures, transitionMatrices, on='id')
    
    ## Removing columns with constant values
    df = df.loc[:, (df != df.iloc[0]).any()] 

    ## Sets Indexes
    spotifyFeatures = spotifyFeatures.set_index('id')
    transitionMatrices = transitionMatrices.set_index('id')
    
    return spotifyFeatures, transitionMatrices, df

## Plotting Distribution Graphs

In [6]:
from matplotlib import pyplot as plt 

def createKDeplot(df, features, numColumns = 2):
    numRows = math.ceil(len(features) / numColumns)
    
    plt.figure(figsize=(15,18))
    
    for index in range(0, len(features)):
        plt.subplot(numRows, numColumns, (index + 1))
        sns.kdeplot(data=df, x=features[index])

## Mood Calculation

In [7]:
def setMoodNumber(cat):  
    cat = cat.mood
    if cat == 'sad':
        return MOOD_CATEGORIES['sad']
    elif cat == 'bored':
        return MOOD_CATEGORIES['bored']
    elif cat == 'nervous':
        return MOOD_CATEGORIES['nervous']
    elif cat == 'angry':
        return MOOD_CATEGORIES['angry']
    elif cat == 'sleepy':
        return MOOD_CATEGORIES['sleepy']
    elif cat == 'calm':
        return MOOD_CATEGORIES['calm']
    elif cat == 'excited':
        return MOOD_CATEGORIES['excited']
    elif cat == 'peaceful':
        return MOOD_CATEGORIES['peaceful']
    elif cat == 'relaxed':
        return MOOD_CATEGORIES['relaxed']
    elif cat == 'pleased':
        return MOOD_CATEGORIES['pleased']
    elif cat == 'happy':
        return MOOD_CATEGORIES['happy']
    

def setMoodLabel(row):    
    if (row.valence < 0.33 and row.energy < 0.25):
        return 'sad'
    elif row.valence < 0.33 and (row.energy >= .25 and row.energy < .5):
        return 'bored'
    elif row.valence < 0.33 and (row.energy >= .5 and row.energy < .75):
        return 'nervous'
    elif row.valence < 0.33 and (row.energy >= .75):
        return 'angry'
    elif (row.valence >= 0.33 and row.valence < .66) and (row.energy < 0.25):
        return 'sleepy'
    elif (row.valence >= 0.33 and row.valence < .66) and (row.energy >= .25 and row.energy < .75):
        return 'calm'
    elif (row.valence >= 0.33 and row.valence < .66) and (row.energy >= .75):
        return 'excited'
    elif (row.valence >= 0.66) and (row.energy < 0.25):
        return 'peaceful'
    elif (row.valence >= 0.66) and (row.energy >= 0.25 and row.energy < 0.5):
        return 'relaxed'
    elif (row.valence >= 0.66) and (row.energy >= 0.5 and row.energy < 0.75):
        return 'pleased'
    elif (row.valence >= 0.66) and (row.energy >= 0.75):
        return 'happy'


## Drop features with no variance

In [8]:
from sklearn.feature_selection import VarianceThreshold

def dropFeaturesWithNoVariance(df):

    # Create a VarianceThreshold feature selector
    sel = VarianceThreshold(threshold=0.001)

    # Fit the selector to normalized head_df
    sel.fit(df / df.mean())

    # Create a boolean mask
    mask = sel.get_support()

    # Apply the mask to create a reduced dataframe
    reduced_df = df.loc[:, mask]

    print("Dimensionality reduced from {} to {}.".format(df.shape[1], reduced_df.shape[1]))

    return reduced_df