# Spotify API Access, Data Retrieval, and Graph Creation

## Load the libraries

In [1]:
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from IPython.display import clear_output
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pickle

## Access to Spotify API
This part sets up the Spotify API access by providing the client ID and client secret. It uses the SpotifyClientCredentials class from Spotipy to authenticate and create a Spotify client.

In [2]:
client_id = "f5e3a31fa56a478baa39be2677673773"
client_secret = "f73770724a174477950672de71ea99ae"

credmanager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=credmanager)


## Demonstrating some features of Spotify API

This part demonstrates how to use the Spotify API to search for the artist, retrieve their information, and find related artists.

In [3]:
artist_search = sp.search('ed sheeran', type='artist')['artists']['items'][0]
print(artist_search)

{'external_urls': {'spotify': 'https://open.spotify.com/artist/6eUKZXaKkcviH0Ku9w2n3V'},
 'followers': {'href': None, 'total': 112147503},
 'genres': ['pop', 'uk pop'],
 'href': 'https://api.spotify.com/v1/artists/6eUKZXaKkcviH0Ku9w2n3V',
 'id': '6eUKZXaKkcviH0Ku9w2n3V',
 'images': [{'height': 640,
   'url': 'https://i.scdn.co/image/ab6761610000e5eb9e690225ad4445530612ccc9',
   'width': 640},
  {'height': 320,
   'url': 'https://i.scdn.co/image/ab676161000051749e690225ad4445530612ccc9',
   'width': 320},
  {'height': 160,
   'url': 'https://i.scdn.co/image/ab6761610000f1789e690225ad4445530612ccc9',
   'width': 160}],
 'name': 'Ed Sheeran',
 'popularity': 92,
 'type': 'artist',
 'uri': 'spotify:artist:6eUKZXaKkcviH0Ku9w2n3V'}

### Searching Artist

Performs a search for the artist using the sp.search function and retrieves the search results. It then extracts important features of the artist, such as name, ID, popularity, genre, and number of followers.

In [4]:
def extract_artist_features(spotify_search_result):
    result = {
        'artist_name': spotify_search_result.get('name', 'artist_name_not_available'),
        'artist_id': spotify_search_result.get('id', 'artist_id_not_available'),
        'artist_popularity': spotify_search_result.get('popularity', 0),
        'artist_first_genre': (spotify_search_result.get('genres', ['genre_not_available']) + ['genre_not_available'])[0],
        'artist_n_followers': spotify_search_result.get('followers', {}).get('total', 0)
    }
    return result

In [5]:
artist_features = extract_artist_features(artist_search)
print(artist_features)

{'artist_name': 'Ed Sheeran',
 'artist_id': '6eUKZXaKkcviH0Ku9w2n3V',
 'artist_popularity': 92,
 'artist_first_genre': 'pop',
 'artist_n_followers': 112147503}

### Retrieving related artists

It retrieves the related artists of the artist using the sp.artist_related_artists function. It prints the number of related artists and displays some information about the first 20 related artists.

In [6]:
artist_related_artists = sp.artist_related_artists(artist_features['artist_id'])['artists']

print('Ed Sheeran has', len(artist_related_artists), 'related artists. The first one is', artist_related_artists[0]['name'], '\n')

for i in range(20):
    print(artist_related_artists[i]['name'])

Ed Sheeran has 20 related artists. The first one is James Arthur 

James Arthur
Shawn Mendes
James TW
Sam Smith
Charlie Puth
Hailee Steinfeld
Calum Scott
Liam Payne
Niall Horan
James Bay
Lewis Capaldi
Lukas Graham
Nick Jonas
Alessia Cara
The Vamps
DNCE
Meghan Trainor
Cheat Codes
Camila Cabello
The Script


Every artist on Spotify has only 20 related artists.

## Create a graph

This part demonstrates how to create a graph representation using the networkx library. It creates an empty undirected graph and sets a popularity threshold. It adds initial nodes to the graph based on a list of artist names stored in a file.

In [7]:
G = nx.Graph()  # create an empty graph
popularity_threshold = 50  # if an artist has a lower popularity, it won't be in our graph.

### Add some initial nodes to the graph

It reads the artist names from a file and searches for each artist using the Spotify API. If the artist is sufficiently popular (above the popularity threshold), it adds the artist as a node to the graph, with attributes representing the artist's features.

In [8]:
with open('foreign_new.txt', 'r', encoding='utf-8') as file:
    artists_name_list = file.read().splitlines()

print('There are', len(artists_name_list), 'artists in the initial list.')

There are 18963 artists in the initial list.


In [9]:
not_popular = 0
for name in artists_name_list:
    if name.strip() == "":
        continue
    try:
        search_results = sp.search(name, type='artist')['artists']['items']
        if search_results:
            search = search_results[0]
            this_artist = extract_artist_features(search)
            if this_artist['artist_popularity'] >= popularity_threshold:
                G.add_node(this_artist['artist_name'], **this_artist, related_found=False)
            else:
                not_popular += 1
                clear_output(wait=True)
                print(name, 'is not a popular artist, we do not add it to our graph.')
                print(f"there are {not_popular} nonpopular artists")
        else:
            print(name, 'could not be found.')
    except spotipy.SpotifyException as e:
        print("An error occurred while searching for", name)
        print("Error message:", str(e))
        print(f"there are {not_popular} nonpopular artists")

The Yossarians is not a popular artist, we do not add it to our graph.
there are 13868 nonpopular artists


In [None]:
nodes_to_remove = []

for node in G:
    if "$" in node:
        nodes_to_remove.append(node)

for node in nodes_to_remove:
    G.remove_node(node)

### Adding edges and more nodes to the graph

This part iterates through the nodes in the graph and finds their related artists, adding them as nodes and creating edges between the artists. The process continues until there are no new artists to add or the number of artists in the graph exceeds 1000.

In [10]:
dummy = 0

while dummy == 0:
    l = len(G)  # number of nodes in the graph currently
    num_edges = len(G.edges)  # number of edges in the graph currently
    
    for x in list(G):  # iterate each node
        if G.nodes[x]['related_found'] == False:  # then we need to find its related artists
            relateds = sp.artist_related_artists(G.nodes[x]['artist_id'])['artists']
            relateds = [extract_artist_features(r) for r in relateds]  # Updated line
            relateds_names = [r['artist_name'] for r in relateds]
            G.nodes[x]['related_found'] = True  # it was False, but now we found its related artists
            
            for rname, rdict in zip(relateds_names, relateds):
                if rdict['artist_popularity'] >= popularity_threshold:
                    
                    if rname in G:  # node already in G
                        pass  # do nothing
                    
                    else:
                        G.add_node(rname, **rdict, related_found=False)  # we added a new node, we don't know its relateds yet
                        clear_output(wait=True)
                        print('The graph has', len(G), 'nodes now.')
                        
                    G.add_edge(x, rname)  # we add an edge between x and its related rname
    
    if len(G) == l or len(G) > 10000:  # number of nodes didn't change or graph grew too large
        dummy = 1  # break the while loop
        print('Done.')
    
num_new_edges = len(G.edges) - num_edges
print('Number of edges:', num_new_edges)

The graph has 10505 nodes now.
Done.
Number of edges: 34858


In [18]:
with open('graph.pickle', 'wb') as file:
    pickle.dump(G, file)