In [1]:
%matplotlib inline

# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# Configure libraries
sns.set()

# Round decimals when displaying DataFrames
pd.set_option('precision', 4)

In [3]:
# load dataset
billboardWeekly = pd.read_csv('billboardHotWeekly.csv')

In [4]:
# create a new dictionary and store the max number of weeks on chart for each song/instance
dictSortIndex = {}
# number of songs that have been #1 on the list
peakList = []
for row in billboardWeekly.itertuples():
    # save to peak1 list
    if row.PeakPosition == 1:
        if not (row.SongID in peakList):
            peakList.append(row.SongID)
    # max weeks_on_chart
    if row.SongID in dictSortIndex.keys():
        if row.Instance in ((dictSortIndex[row.SongID]).keys()):
            if (dictSortIndex[row.SongID])[row.Instance] < row.WeeksOnChart:
                (dictSortIndex[row.SongID])[row.Instance] = row.WeeksOnChart
        else:
            (dictSortIndex[row.SongID])[row.Instance] = row.WeeksOnChart
    else:
        dictSortIndex[row.SongID] = {}
        (dictSortIndex[row.SongID])[row.Instance] = row.WeeksOnChart

In [5]:
# list of songs that have been popular for more than a year        
popularSongs = []
for pair1 in dictSortIndex:
    tempSum = 0
    for pair2 in dictSortIndex[pair1]:
        tempSum = tempSum + dictSortIndex[pair1][pair2]
    if tempSum >= 51:
        popularSongs.append(pair1)
# percentage of songs that have been popular for more than a year vs. total songs
popularMoreThan1Yr = ((len(popularSongs))/(len(dictSortIndex.keys())))

# songs that have been #1 & were popular for more than a year
superPopular = []
for song in peakList:
    if song in popularSongs:
        superPopular.append(song)

In [6]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

#Get authorization key to use Spotify API from developer account
client_credentials_manager = SpotifyClientCredentials('84b5738225894c76aecf0578c6ff2862', '89d0aeb4ee7c4841b5800cd068ee3f69')
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [7]:
#cleaning the data for input into the spotify API
df = billboardWeekly[billboardWeekly['SongID'].isin(peakList)]
df = df.drop_duplicates(subset='SongID')
sp_df = df[['Song', 'Performer', 'WeekID'].copy()]
sp_df = sp_df.reset_index(drop='True')
sp_df['Song'] = sp_df['Song'].str.replace("'",'')
sp_df['Song'] = sp_df['Song'].str.replace('"','')
sp_df['Song'] = sp_df['Song'].str.replace(',','')
sp_df['Song'] = sp_df['Song'].str.replace('&','')
sp_df['Song'] = sp_df['Song'].str.split('/').str[0]
sp_df['Song'] = sp_df['Song'].str.split('-').str[0]
sp_df['Song'] =sp_df['Song'].str.replace(r"\(.*\)","")
sp_df['Performer'] = sp_df['Performer'].str.replace('&','')
sp_df['Performer'] = sp_df['Performer'].str.replace("Featuring",'')
sp_df['Performer'] = sp_df['Performer'].str.replace("With",'')
sp_df['Performer'] = sp_df['Performer'].str.replace("And",'')
sp_df['Performer'] = sp_df['Performer'].str.replace(r"\(.*\)","")

In [8]:
dance = []
energy = []
speech = []
acoustic = []
valence = []
tempo = []
for i in sp_df.index:
    artist = sp_df.loc[i,'Performer']
    track = sp_df.loc[i, 'Song']
    track_search = sp.search(q='artist:' + artist + ' track:' + track, limit=1, type='track')
    if track_search['tracks']['total'] == 0:
        track_id = 'none'
    else:
        track_id = track_search['tracks']['items'][0]['id']
    atts = sp.audio_features(track_id)
    if atts == [None]:
        dance.append('NaN')
        energy.append('NaN')
        speech.append('Nan')
        acoustic.append('NaN')
        valence.append('NaN')
        tempo.append('NaN')
    else:
        dance.append(atts[0]['danceability'])
        energy.append(atts[0]['energy'])
        speech.append(atts[0]['speechiness'])
        acoustic.append(atts[0]['acousticness'])
        valence.append(atts[0]['valence'])
        tempo.append(atts[0]['tempo'])

In [None]:
sp_df['danceability'] = dance
sp_df['energy'] = energy
sp_df['speechiness'] = speech
sp_df['acousticness'] = acoustic
sp_df['valence'] = valence
sp_df['tempo'] = tempo
sp_df = sp_df[sp_df['danceability'] != 'NaN' ]
sp_df['WeekID'] = sp_df['WeekID'].str[-4:-1]
print (sp_df)

In [None]:
fifties = sp_df.loc[sp_df['WeekID'] == '195']
sixties = sp_df.loc[sp_df['WeekID'] == '196']
seventies = sp_df.loc[sp_df['WeekID'] == '197']
eighties = sp_df.loc[sp_df['WeekID'] == '198']
nineties = sp_df.loc[sp_df['WeekID'] == '199']
oughts = sp_df.loc[sp_df['WeekID'] == '200']
tens = sp_df.loc[sp_df['WeekID'] == '201']

In [None]:
dance50 = fifties['danceability'].mean()
energy50 = fifties['energy'].mean()
speech50 = fifties['speechiness'].mean()
acoustic50 = fifties['acousticness'].mean()
valence50 = fifties['valence'].mean()
tempo50 = fifties['tempo'].mean()

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
from math import pi
 
# We are going to plot the first line of the data frame.
# But we need to repeat the first value to close the circular graph:
dance50 = fifties['danceability'].mean()
energy50 = fifties['energy'].mean()
speech50 = fifties['speechiness'].mean()
acoustic50 = fifties['acousticness'].mean()
valence50 = fifties['valence'].mean()
categories = ['danceability', 'energy', 'speechiness', 'acousticness', 'valence']
values= [dance50, energy50, speech50, acoustic50, valence50]
values += values[:1]
values
 
# What will be the angle of each axis in the plot? (we divide the plot / number of variable)
angles = [n / float(5) * 2 * pi for n in range(5)]
angles += angles[:1]
 
# Initialise the spider plot
ax = plt.subplot(111, polar=True)
 
# Draw one axe per variable + add labels labels yet
plt.xticks(angles[:-1], categories, color='grey', size=8)

 
# Plot data
ax.plot(angles, values, linewidth=1, linestyle='solid')
 
# Fill area
ax.fill(angles, values, 'b', alpha=0.1)


In [None]:
import matplotlib.pyplot as plt
import pandas as pd
from math import pi
 
# We are going to plot the first line of the data frame.
# But we need to repeat the first value to close the circular graph:
dance60 = sixties['danceability'].mean()
energy60 = sixties['energy'].mean()
speech60 = sixties['speechiness'].mean()
acoustic60 = sixties['acousticness'].mean()
valence60 = sixties['valence'].mean()
categories = ['danceability', 'energy', 'speechiness', 'acousticness', 'valence']
values= [dance60, energy60, speech60, acoustic60, valence60]
values += values[:1]
values
 
# What will be the angle of each axis in the plot? (we divide the plot / number of variable)
angles = [n / float(5) * 2 * pi for n in range(5)]
angles += angles[:1]
 
# Initialise the spider plot
ax = plt.subplot(111, polar=True)
 
# Draw one axe per variable + add labels labels yet
plt.xticks(angles[:-1], categories, color='grey', size=8)

 
# Plot data
ax.plot(angles, values, linewidth=1, linestyle='solid')
 
# Fill area
ax.fill(angles, values, 'b', alpha=0.1)

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
from math import pi
 
# We are going to plot the first line of the data frame.
# But we need to repeat the first value to close the circular graph:
dance70 = seventies['danceability'].mean()
energy70 = seventies['energy'].mean()
speech70 = seventies['speechiness'].mean()
acoustic70 = seventies['acousticness'].mean()
valence70 = seventies['valence'].mean()
categories = ['danceability', 'energy', 'speechiness', 'acousticness', 'valence']
values= [dance70, energy70, speech70, acoustic70, valence70]
values += values[:1]
values
 
# What will be the angle of each axis in the plot? (we divide the plot / number of variable)
angles = [n / float(5) * 2 * pi for n in range(5)]
angles += angles[:1]
 
# Initialise the spider plot
ax = plt.subplot(111, polar=True)
 
# Draw one axe per variable + add labels labels yet
plt.xticks(angles[:-1], categories, color='grey', size=8)

 
# Plot data
ax.plot(angles, values, linewidth=1, linestyle='solid')
 
# Fill area
ax.fill(angles, values, 'b', alpha=0.1)

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
from math import pi
 
# We are going to plot the first line of the data frame.
# But we need to repeat the first value to close the circular graph:
dance80 = eighties['danceability'].mean()
energy80 = eighties['energy'].mean()
speech80 = eighties['speechiness'].mean()
acoustic80 = eighties['acousticness'].mean()
valence80 = eighties['valence'].mean()
categories = ['danceability', 'energy', 'speechiness', 'acousticness', 'valence']
values= [dance80, energy80, speech80, acoustic80, valence80]
values += values[:1]
values
 
# What will be the angle of each axis in the plot? (we divide the plot / number of variable)
angles = [n / float(5) * 2 * pi for n in range(5)]
angles += angles[:1]
 
# Initialise the spider plot
ax = plt.subplot(111, polar=True)
 
# Draw one axe per variable + add labels labels yet
plt.xticks(angles[:-1], categories, color='grey', size=8)

 
# Plot data
ax.plot(angles, values, linewidth=1, linestyle='solid')
 
# Fill area
ax.fill(angles, values, 'b', alpha=0.1)

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
from math import pi
 
# We are going to plot the first line of the data frame.
# But we need to repeat the first value to close the circular graph:
dance90 = nineties['danceability'].mean()
energy90 = nineties['energy'].mean()
speech90 = nineties['speechiness'].mean()
acoustic90 = nineties['acousticness'].mean()
valence90 = nineties['valence'].mean()
categories = ['danceability', 'energy', 'speechiness', 'acousticness', 'valence']
values= [dance90, energy90, speech90, acoustic90, valence90]
values += values[:1]
values
 
# What will be the angle of each axis in the plot? (we divide the plot / number of variable)
angles = [n / float(5) * 2 * pi for n in range(5)]
angles += angles[:1]
 
# Initialise the spider plot
ax = plt.subplot(111, polar=True)
 
# Draw one axe per variable + add labels labels yet
plt.xticks(angles[:-1], categories, color='grey', size=)

 
# Plot data
ax.plot(angles, values, linewidth=1, linestyle='solid')
 
# Fill area
ax.fill(angles, values, 'b', alpha=0.1)

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
from math import pi
 
# We are going to plot the first line of the data frame.
# But we need to repeat the first value to close the circular graph:
dance00 = oughts['danceability'].mean()
energy00 = oughts['energy'].mean()
speech00 = oughts['speechiness'].mean()
acoustic00 = oughts['acousticness'].mean()
valence00 = oughts['valence'].mean()
categories = ['danceability', 'energy', 'speechiness', 'acousticness', 'valence']
values= [dance00, energy00, speech00, acoustic00, valence00]
values += values[:1]
values
 
# What will be the angle of each axis in the plot? (we divide the plot / number of variable)
angles = [n / float(5) * 2 * pi for n in range(5)]
angles += angles[:1]
 
# Initialise the spider plot
ax = plt.subplot(111, polar=True)
 
# Draw one axe per variable + add labels labels yet
plt.xticks(angles[:-1], categories, color='grey', size=10)

 
# Plot data
ax.plot(angles, values, linewidth=1, linestyle='solid')
 
# Fill area
ax.fill(angles, values, 'b', alpha=0.1)

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
from math import pi
 
# We are going to plot the first line of the data frame.
# But we need to repeat the first value to close the circular graph:
dance10 = tens['danceability'].mean()
energy10 = tens['energy'].mean()
speech10 = tens['speechiness'].mean()
acoustic10 = tens['acousticness'].mean()
valence10 = tens['valence'].mean()
categories = ['danceability', 'energy', 'speechiness', 'acousticness', 'valence']
values= [dance10, energy10, speech10, acoustic10, valence10]
values += values[:1]
values
 
# What will be the angle of each axis in the plot? (we divide the plot / number of variable)
angles = [n / float(5) * 2 * pi for n in range(5)]
angles += angles[:1]
 
# Initialise the spider plot
ax = plt.subplot(111, polar=True)
 
# Draw one axe per variable + add labels labels yet
plt.xticks(angles[:-1], categories, color='grey', size=10)

 
# Plot data
ax.plot(angles, values, linewidth=1, linestyle='solid')
 
# Fill area
ax.fill(angles, values, 'b', alpha=0.1)