<h1>Imports and API setups<h1>

In [1]:
from __future__ import print_function    # (at top of module)
import warnings
warnings.filterwarnings('always')
from spotipy.oauth2 import SpotifyClientCredentials
import json
import spotipy
import time
import sys
import csv
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
% matplotlib inline
import math
import seaborn as sns
import config


# Spotify API Setup
client_credentials_manager = SpotifyClientCredentials(config.client_id, config.client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

# Enables verbose JSON requests tracing
sp.trace=False

  from .qhull import *
  from .lbfgsb import _minimize_lbfgsb


## Getting the data

In [2]:
# My public user ID on Spotify
userId = "spotify:user:217unxkx4en4irnq4nkvgax6y"

# The id of the playlist where the data will come from
playlistID = "spotify:user:217unxkx4en4irnq4nkvgax6y:playlist:1v3tLpVLqty2FMPm4oY2rs"

# File name to write to
fileName = "mydata_new_stuff.csv"

# Columns for my pandas DataFrame in which we will keep the data
columns = ["song_id","song_title", "artist", "popularity", "energy", "liveness", "tempo"
          , "speechiness", "acousticness", "instrumentalness", "time_signature", "danceability",
          "key", "duration", "loudness", "valence", "mode"]

# Actual data structure for the data
myData = []

# Get first 100 songs here so we can get the total number of songs
playlist = sp.user_playlist_tracks(userId, playlistID, "",100,0)

totalNoSongs = playlist['total']
# Really smart way of rounding up to the nearest 100 and then adding 1 so the python range would include that hundred
if totalNoSongs % 100 != 0:
    totalNoSongs = ((totalNoSongs / 100) + 1) * 100 + 1
    
# Can only get 100 tracks at a time so we use an offset
for offset in range(100, totalNoSongs, 100):
    
    print("Getting songs from ", offset - 100, " to ", offset)
    
    # Add a delay because we are querying for a lot of data - don't want to flood
    time.sleep(10)
    
    for item in playlist['items'] :
        
        # Now for each song in the playlist we want to get certain features
        track = item['track']
        
        trackId = track['uri']
        songTitle = track['name']
        popularity = track['popularity']
        
        # A song might have more than one artist so we make a list of all of them
        artistName = []
        for artist in track['artists']:
            artistName.append(artist['name'])

        # Get features for the track
        features = sp.audio_features([trackId])
        
        # If the feature array is empty this usually means something has gone wrong 
        # with the request so this stops the program from failing in that case
        if features[0] != None :
            energy = features[0]['energy']
            liveness = features[0]['liveness'] 
            tempo = features[0]['tempo']
            speechiness = features[0]['speechiness']
            acousticness = features[0]['acousticness']
            instrumentalness = features[0]['instrumentalness']
            time_signature = features[0]['time_signature']
            danceability = features[0]['danceability']
            key = features[0]['key']
            duration_ms = features[0]['duration_ms']
            loudness = features[0]['loudness']
            valence = features[0]['valence']
            mode = features[0]['mode']
            
        # Create a new row of data for each song using the features above
        newRow = [trackId,songTitle, artistName, popularity, energy, liveness, tempo, speechiness, acousticness, instrumentalness, time_signature,
                  danceability, key, duration_ms, loudness, valence, mode]
        
        # Add the new row to our existing data
        myData.append(newRow)
        
        # Get the next 100 songs in the playlist - done at the end because we needed the total number of songs
        playlist = sp.user_playlist_tracks(userId, playlistID, "",100,offset)
        
print("DONE!")

# Store all the data in a DataFrame
df = pd.DataFrame(myData, columns=columns)

# Writing the data from the Spotify API to a CSV 
print("Writing file to CSV...")
df.to_csv(fileName, encoding="utf-8", header=True, index=False)

print("ALL DONE!")

# Read the data from the CSV to make sure everything is fine
data = pd.read_csv(fileName)
data.head()

Getting songs from  0  to  100
Getting songs from  100  to  200
Getting songs from  200  to  300
Getting songs from  300  to  400
Getting songs from  400  to  500
Getting songs from  500  to  600
Getting songs from  600  to  700
DONE!
Writing file to CSV...
ALL DONE!


Unnamed: 0,song_id,song_title,artist,popularity,energy,liveness,tempo,speechiness,acousticness,instrumentalness,time_signature,danceability,key,duration,loudness,valence,mode
0,spotify:track:01IQ4aQgOf0Kkq3a273hmO,Funky Friday,"[Dave, Fredo]",82,0.535,0.107,125.066,0.196,0.0783,0.0,4,0.975,1,182741,-7.227,0.671,1
1,spotify:track:4iZC3zbvagtT4UbvuwH7fQ,Mia Khalifa,[iLOVEFRiDAY],71,0.517,0.315,145.026,0.113,0.23,0.0,4,0.817,1,177188,-9.613,0.541,1
2,spotify:track:2Y73798F4zbbbvKT02fbtE,Finalidade Era Ficar em Casa,[MC Kevin o Chris],77,0.978,0.0846,149.888,0.198,0.571,0.212,4,0.81,6,185652,-3.949,0.703,1
3,spotify:track:5iwz1NiezX7WWjnCgY5TH4,Calma - Remix,"[Pedro Capó, Farruko]",79,0.773,0.143,126.899,0.0524,0.323,0.0,4,0.826,11,238200,-4.218,0.761,0
4,spotify:track:4ej94zqr9xYLbWphCH3TcD,Beauty & Essex (feat. Daniel Caesar & Unknown ...,"[Free Nationals, Daniel Caesar, Unknown Mortal...",65,0.741,0.334,69.709,0.0542,0.284,8.2e-05,4,0.552,11,277046,-5.419,0.718,0


## Feature description: 
![alt text](https://github.com/pawKer/predicting-music-popularity/blob/master/features0.PNG?raw=true "Description of popularity")
![alt text](https://github.com/pawKer/predicting-music-popularity/blob/master/features1.PNG?raw=true "Description of audio features part 1")
![alt text](https://github.com/pawKer/predicting-music-popularity/blob/master/features2.PNG?raw=true "Description of audio features part 2")