# <b>Albums Data Crawling</b>

### <b>Libraries Import</b>

In [1]:
import spotipy
import os
from spotipy.oauth2 import SpotifyClientCredentials, SpotifyOAuth
from dotenv import load_dotenv

### <b>Authentication to Spotify Developer</b>

Get Client ID, Client Secret and Redirect URI of to connect to Spotify Developer.

In [2]:
load_dotenv()

client_id = os.getenv('SPOTIPY_CLIENT_ID')
client_secret = os.getenv('SPOTIPY_CLIENT_SECRET')
redirect_uri = os.getenv('SPOTIPY_REDIRECT_URI')

In [3]:
client_id, client_secret, redirect_uri

('3677a750517046759ee088f90023f8f8',
 'a47c50d9b87d482e8bc72aca283f265b',
 'https://localhost:3000/callback')

Authentication - without user

In [4]:
client_credentials_manager = SpotifyClientCredentials(client_id = client_id, client_secret = client_secret)
sp = spotipy.Spotify(auth_manager = SpotifyOAuth(client_id = client_id, client_secret = client_secret, redirect_uri = redirect_uri),
                    client_credentials_manager = SpotifyClientCredentials())

### <b>Call API To Get Data From Last 3 Years</b>

Get 1000 songs of year 2022

In [5]:
result = sp.search(q='2022', limit=50)
tracks = result['tracks']['items']

for _ in range(19):
    result = sp.next(result['tracks'])
    tracks.extend(result['tracks']['items'])

Get 1000 songs of year 2021

In [6]:
for i in range(20):
    result = sp.search(q = '2021', limit = 50, offset = i*50)
    tracks.extend(result['tracks']['items'])

Get 1000 songs of year 2020

In [7]:
for i in range(20):
    result = sp.search(q = '2020', limit = 50, offset = i*50)
    tracks.extend(result['tracks']['items'])

Total songs we have got

In [8]:
len(tracks)

3000

Get albums' uri from 3000 songs above

In [10]:
album_uri = []

for i in range(len(tracks)):
    album_uri.append(tracks[i]['album']['uri'])

spotify:album:7lPoGKpCGgdKFAxpudhAH5
spotify:album:0gr5OmB74UhoANEXwYT3gE
spotify:album:6al2VdKbb6FIz9d7lU7WRB
spotify:album:6oSxSPOg7Kuitjt2zwP7sU
spotify:album:4uDaEWj8W206qyyyndHri9
spotify:album:1Ll6pBUUA0uZttCi95oXY5
spotify:album:3Qj2vsFzmaB8jcH6Q60WIG
spotify:album:2emh7ZBzZLeQJwzAfeX1CK
spotify:album:6mPSjHXiuf2zHA0ssVp8iU
spotify:album:0gX9tkL5njRax8ymWcXARi
spotify:album:5bKmRG1QsggSXoHxYUnPIY
spotify:album:0Gmf4pfe0POEQq2FgGAj2q
spotify:album:4OZGRo7rvIxPHeioQcKhH7
spotify:album:4kJhEHkg1ggweXBa45GhPf
spotify:album:4PPV8jiPVXJdyVjSCjXEb7
spotify:album:2UUGG4Y3bmV8BBbs8DCgmF
spotify:album:5J8MNLLViH5zqM6VoGErz8
spotify:album:2pqdSWeJVsXAhHFuVLzuA8
spotify:album:6lmFOcDD9Ojs3FxdEAEJqd
spotify:album:3NZ94nQbqimcu2i71qhc4f
spotify:album:1ulbeAv0gZQqzxyPzp6pst
spotify:album:6PMZEFLrGOUBBn4BtLBp0F
spotify:album:4lfFgz2rD1irxf7dZhNJht
spotify:album:4cAcTMGFjTBufC7Eu0FizU
spotify:album:17l09k7ZDb4GYwmsIVGcRZ
spotify:album:7r2UDe86W9yNHVymWu02xG
spotify:album:4r815m6eq8OXYzfqtk3FST
s

### <b>Call API To Get Albums' Data</b>

Get albums' data by calling api through albums' uri we have got last shell.

In [None]:
album_data = []

for uri in album_uri:
    album_data.append(sp.album(uri))    

Check features of each data to decide which are necessary.

In [12]:
album_data[0].keys()

dict_keys(['album_type', 'artists', 'available_markets', 'copyrights', 'external_ids', 'external_urls', 'genres', 'href', 'id', 'images', 'label', 'name', 'popularity', 'release_date', 'release_date_precision', 'total_tracks', 'tracks', 'type', 'uri'])

### <b>Data Export</b>

Store necessary albums' information to an array following 6 columns of neccessary features.

In [13]:
album_info = {k: [] for k in album_data[0].keys() if k in {'id', 'name', 'popularity', 'available_markets', 'release_date', 'total_tracks'}}

for i in album_data:
    album_info['id'].append(i['id'])
    album_info['name'].append(i['name'])
    album_info['popularity'].append(str(i['popularity']))
    album_info['available_markets'].append(str(len(i['available_markets'])))
    album_info['release_date'].append(i['release_date'])
    album_info['total_tracks'].append(str(i['total_tracks']))

Check if albums' information is stored in array successfully or not.

In [None]:
#for i in range(len(album_info['id'])):
print(album_info['name'])

Write albums' information above into a tsv file.

In [21]:
with open('../../data/albums_data.tsv', 'w', encoding = "utf-8") as f:
    f.write('id\tname\tpopularity\tavailable_markets\trelease_date\ttotal_tracks\n')

    for i in range(len(album_info['id'])):
        f.write(album_info['id'][i] + '\t' + album_info['name'][i] + '\t' + album_info['popularity'][i] + '\t' + album_info['available_markets'][i] + '\t' + album_info['release_date'][i] + '\t' + album_info['total_tracks'][i] + '\n')