<h1>Imports and API setups<h1>

In [1]:
from __future__ import print_function    # (at top of module)
import warnings
warnings.filterwarnings('always')
from spotipy.oauth2 import SpotifyClientCredentials
import json
import spotipy
import time
import sys
import csv
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
% matplotlib inline
import math
import seaborn as sns
import config


# Spotify API Setup
client_credentials_manager = SpotifyClientCredentials(config.client_id, config.client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

# Enables verbose requests tracing
sp.trace=False

  from .qhull import *
  from .lbfgsb import _minimize_lbfgsb


## Getting the data

In [None]:
# The id of the playlist where the data will come from
playlistID = "spotify:user:217unxkx4en4irnq4nkvgax6y:playlist:1v3tLpVLqty2FMPm4oY2rs"

# File name to write to
fileName = "mydata.csv"

# These will be the table headings in our data
myData = np.array([["Song Title", "Artist", "Popularity", "Energy", "Liveness", "Tempo"
          , "Speechiness", "Acousticness", "Instrumentalness", "Time Signature", "Danceability",
          "Key", "Duration in ms", "Loudness", "Valence", "Mode"]])

# Declare those variables as empty Strings
energy = ""
liveness = "" 
tempo = ""
speechiness = ""
acousticness = ""
instrumentalness = ""
time_signature = ""
danceability = ""
key = ""
duration_ms = ""
loudness = ""
valence = ""
mode = ""

# Can only get 100 tracks at a time so we use an offset
for offset in range(0, 500, 100):
    
    # Get 100 tracks in the playlist
    playlist = sp.user_playlist_tracks("spotify:user:217unxkx4en4irnq4nkvgax6y", playlistID, "",100,offset)
    
    # Add a delay because we are querying for a lot of data
    time.sleep(10)
    
    for item in playlist['items'] :
        
        # Now for each song in the playlist we want to get certain features
        track = item['track']
        
        trackId = track['uri']
        songTitle = track['name']
        
        # A song might have more than one artist so we make a list of all of them
        # TOFIX:  Not elegant at the moment, maybe make it a list - if we make it a list we can't store it into a numpy
        # array anymore - maybe numpy is not an apropriate datastructure for this actually
        artistName = ""
        for artist in track['artists']:
            artistName = artistName + artist['name'] + ","
        artistName.rstrip(',')
        
        # Print to check
        print("Song title: ", songTitle)
        print("Artist: ", artistName)

        # Get features for the track
        features = sp.audio_features([trackId])
        popularity = track['popularity']
        
        # If the feature array is empty this usually means something has gone wrong 
        # with the request so this stops the program from failing in that case
        if features[0] != None :

            energy = features[0]['energy']
            liveness = features[0]['liveness'] 
            tempo = features[0]['tempo']
            speechiness = features[0]['speechiness']
            acousticness = features[0]['acousticness']
            instrumentalness = features[0]['instrumentalness']
            time_signature = features[0]['time_signature']
            danceability = features[0]['danceability']
            key = features[0]['key']
            duration_ms = features[0]['duration_ms']
            loudness = features[0]['loudness']
            valence = features[0]['valence']
            mode = features[0]['mode']
            
        # Create a new row of data using the features above
        newRow = np.array([songTitle, artistName, popularity, energy, liveness, tempo, speechiness, acousticness, instrumentalness, time_signature,
                  danceability, key, duration_ms, loudness, valence, mode])
        # Stack the rows vertically to create a matrix
        myData = np.vstack([myData, newRow])
  
        print("Popularity: ", popularity)
        print("Energy: ", energy)
        print("Liveness: ", liveness)
        print("Tempo: ", tempo)
        print("Speechiness: ", speechiness)
        print("Acousticness: ", acousticness)
        print("Instrumentalness: ", instrumentalness)
        print("Time signature: ", time_signature)
        print("Danceability: ", danceability)
        print("Key: ", key)
        print("Duration ms: ", duration_ms)
        print("Loudness: ", loudness)
        print("Valence: ", valence)
        print("Mode: ", mode)
        print("============================================================")

# Writing the data from the Spotify API to a CSV 
df = pd.DataFrame(myData)
df.to_csv(fileName, encoding="utf-8", header=None, index=False)

# Read the data from the CSV to make sure everything is fine
data = pd.read_csv(fileName)
data.head()

## Feature description: 
![alt text](https://github.com/pawKer/predicting-music-popularity/blob/master/features0.PNG?raw=true "Description of popularity")
![alt text](https://github.com/pawKer/predicting-music-popularity/blob/master/features1.PNG?raw=true "Description of audio features part 1")
![alt text](https://github.com/pawKer/predicting-music-popularity/blob/master/features2.PNG?raw=true "Description of audio features part 2")