The problem addressed in this project is the classification of musical tracks into different levels of energy using a classification model. Specifically, it aims to determine whether a track is low-energy, moderately energetic, or highly energetic. The solution to this problem is based on data regarding various attributes of the track, such as key, loudness, speechiness, acousticness, instrumentalness, valence, danceability, tempo, and also the musical genre to which the track can be categorized.

In [1]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import csv  
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, ConfusionMatrixDisplay
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from keys import CLIENT_ID, CLIENT_SECRET

<h5>Collecting data</h5>

To acquire data using Spotify API, the following methods will be needed.

In [None]:
def acquire_data(playlist_link, filename):
    #authorization and creation of spotify object:
    client_credentials_manager = SpotifyClientCredentials(client_id=CLIENT_ID, client_secret=CLIENT_SECRET)
    sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager, requests_timeout=10,retries=10)

    playlist_URI = playlist_link.split("/")[-1].split("?")[0]
    tracks = [x for x in get_all_tracks(playlist_URI, sp)]  #collecting all songs from given playlist

    with open(filename, 'a', encoding='UTF8', newline='') as f:

        fieldnames=['key', 'loudness', 'speechiness', 'acousticness', 'instrumentalness',
                        'valence', 'genre', 'energy', 'danceability' ,'tempo']
        
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()

        for track in tracks:
            try:
                track_uri = track["track"]["uri"]
                
                artist_uri = track["track"]["artists"][0]["uri"]
                artist_info = sp.artist(artist_uri)
                artist_genres = artist_info["genres"]

                audio_features = sp.audio_features(track_uri)[0]
                #saving audio features and genre of each track
                row=create_row(audio_features, artist_genres[0])  #we'll take the primary genre of the artist
                writer.writerow(row)

            except (IndexError, TypeError): #tracks with missing data will be skipped
                pass

In [None]:
def get_all_tracks(playlist_URI, sp):
    results = sp.playlist_tracks(playlist_URI)
    tracks = results['items']
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    return tracks

In [None]:
def create_row(audio_features,genre):
    row={}
    row['key'] = audio_features['key']
    row['loudness'] = audio_features['loudness']
    row['speechiness'] = audio_features['speechiness']
    row['acousticness'] = audio_features['acousticness']
    row['instrumentalness'] = audio_features['instrumentalness']
    row['valence'] = audio_features['valence']
    row['energy'] = audio_features['energy']
    row['danceability'] = audio_features['danceability']
    row['tempo'] = audio_features['tempo']
    row['genre'] = genre
    return row