# Audio Features Data Crawling

### Libraries import

In [47]:
import spotipy
import csv
import os
from spotipy.oauth2 import SpotifyClientCredentials, SpotifyOAuth
from dotenv import load_dotenv

### Authentication to Spotify Developer

In [48]:
load_dotenv()

client_id = os.getenv('SPOTIPY_CLIENT_ID')
client_secret = os.getenv('SPOTIPY_CLIENT_SECRET')
redirect_uri = os.getenv('SPOTIPY_REDIRECT_URI')

spotify = spotipy.Spotify(auth_manager=SpotifyOAuth(client_id=client_id, client_secret=client_secret, redirect_uri=redirect_uri),
                    client_credentials_manager=SpotifyClientCredentials())

### Getting 1000 tracks of each year from 2020 to 2022 and saving the received data into a list

In [15]:
tracks = []
for i in range(2020, 2023):
    search_query = 'year:' + str(i)
    search_result = spotify.search(q=search_query, limit=50) #the result only consists of tracks

    tracks.extend(search_result['tracks']['items'])

    for i in range(19):
        search_result = spotify.next(search_result['tracks'])
        tracks.extend(search_result['tracks']['items'])

In [49]:
len(tracks)

3000

### Getting tracks' ID and their first artist's ID from the list of tracks

In [50]:
tracks_id = []
for i in range(len(tracks)):
    tracks_id.append([tracks[i]['id'], tracks[i]['artists'][0]['id']])

# tracks_id = list(dict.fromkeys(tracks_id))

In [51]:
len(tracks_id)

3000

## Tracks' audio features and genres initialization

### For each track id, using API to get corresponding audio features of the track and the first genre of its artist, or its first artist (if there are more than 1 artists) and saving all received data into a list

*There are some tracks that don't have audio features so for each of those, we will keep it's id.*

In [52]:
spotify.artist(tracks_id[1][1])

KeyboardInterrupt: 

In [None]:
track_features_list = []

for i in range(len(tracks_id)):
    features = spotify.audio_features(tracks_id[i][0])
    genre = spotify.artist(tracks_id[i][1])['genres']
    print(1)
    if genre == []:
        genre = ''
    else: 
        genre = genre[0]

    if (features == [None]):
        features = {'id': tracks_id[i][0], 'genre': genre}
    else:
        features = dict(list(filter(lambda x: x[0] == 'id' or type(x[1]) != str, list(features[0].items()))))
        features['genre'] = genre
    track_features_list.append(features)

In [20]:
len(track_features_list)

0

#### Bringing the key "id" to the front

In [None]:
for i in range(len(track_features_list)):
    track_features_list[i] = {'id': track_features_list[i].pop('id'), **track_features_list[i]}

#### Saving to csv file

In [None]:
with open('audio_features.csv', 'w') as file:
    writer = csv.DictWriter(file, fieldnames=track_features_list[0].keys(), delimiter='\t')
    writer.writeheader()
    for data in track_features_list:
        writer.writerow(data)