# Artists Data Crawling

### Libraries import

In [1]:
import spotipy
import csv
import os
from spotipy.oauth2 import SpotifyClientCredentials, SpotifyOAuth
from dotenv import load_dotenv

### Authentication to Spotify Developer

In [2]:
load_dotenv()

client_id = os.getenv('SPOTIPY_CLIENT_ID')
client_secret = os.getenv('SPOTIPY_CLIENT_SECRET')
redirect_uri = os.getenv('SPOTIPY_REDIRECT_URI')

client_credentials_manager = SpotifyClientCredentials()
spotify = spotipy.Spotify(auth_manager=SpotifyOAuth(client_id=client_id, client_secret=client_secret, redirect_uri=redirect_uri),
                    client_credentials_manager=SpotifyClientCredentials())

### Getting 1000 tracks of each year from 2020 to 2022 and saving the received data into a list

In [4]:
tracks = []
for i in range(2020, 2023):
    search_query = 'year:' + str(i)
    search_result = spotify.search(q=search_query, limit=50) #the result only consists of tracks

    tracks.extend(search_result['tracks']['items'])

    for i in range(19):
        search_result = spotify.next(search_result['tracks'])
        tracks.extend(search_result['tracks']['items'])

### Getting artists' ID from the list of tracks

In [5]:
artists_id = []
for i in range(len(tracks)):
    for j in range(len(tracks[i]['artists'])):
        artists_id.append(tracks[i]['artists'][j]['id'])

# artists_id = list(dict.fromkeys(artists_id))

### For each artist id, using API to get corresponding information of the artist and saving all received data into a list

In [6]:
artists = []
for i in range(len(artists_id)):
    artist_info = spotify.artist(artist_id=artists_id[i])
    cols = ['id', 'name', 'genres', 'followers', 'popularity']
    artist = dict.fromkeys(cols)
    for key in artist:
        if key != 'followers' and key != 'genres':
            artist[key] = artist_info[key]
        elif key != 'genres':
            artist[key] = artist_info[key]['total']
        else:
            artist[key] = ', '.join(artist_info[key])
    artists.append(artist)

### Saving the list of artists' infomation into "artists.csv" file

In [7]:
with open('../../data/artists_data.csv', 'w') as file:
    writer = csv.DictWriter(file, fieldnames=artists[0].keys(), delimiter='\t')
    writer.writeheader()
    for data in artists:
        writer.writerow(data)