# API assignment (Spotify API)

In [177]:
import requests

# Data science imports
import pandas as pd
import numpy as np

# plotly standard imports
import plotly.graph_objs as go
import chart_studio.plotly as py
import plotly.figure_factory as ff

# Cufflinks wrapper on plotly
import cufflinks as cf

%matplotlib inline

# Display all cell outputs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

from plotly.offline import iplot, init_notebook_mode
init_notebook_mode(connected=True)
cf.go_offline(connected=True)

# Set global theme
cf.set_config_file(world_readable=True, theme='pearl')

In [456]:
client_id=''
client_secret=''
client_auth=requests.auth.HTTPBasicAuth(client_id,client_secret)
post_data = {'grant_type': 'client_credentials'}
response = requests.post('https://accounts.spotify.com/api/token', auth=client_auth, data=post_data)
response_info=response.json()

# Token (Reset every 3600s)

In [454]:
headers = {"Authorization": "Bearer " +response_info['access_token']}

## Getting data for a particular artist.

In [455]:
artist_name="Taylor Swift"
uri="https://api.spotify.com/v1/search?q="+artist_name+"&type=artist&market=IN"
search_artist_results=requests.get(uri,headers=headers)
artist_info=search_artist_results.json()
artist_info

{'artists': {'href': 'https://api.spotify.com/v1/search?query=Taylor+Swift&type=artist&market=IN&offset=0&limit=20',
  'items': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/06HL4z0CvFAxyc27GXpf02'},
    'followers': {'href': None, 'total': 30267160},
    'genres': ['dance pop', 'pop', 'post-teen pop'],
    'href': 'https://api.spotify.com/v1/artists/06HL4z0CvFAxyc27GXpf02',
    'id': '06HL4z0CvFAxyc27GXpf02',
    'images': [{'height': 640,
      'url': 'https://i.scdn.co/image/62b33d12e2b9a033cf77585f6e3d4b2c6b3a63a1',
      'width': 640},
     {'height': 320,
      'url': 'https://i.scdn.co/image/9ab124f3323d161f87fc9b7f8b82ab1717483b3a',
      'width': 320},
     {'height': 160,
      'url': 'https://i.scdn.co/image/d28b46cd7e1b7336d1688d462085535963a77311',
      'width': 160}],
    'name': 'Taylor Swift',
    'popularity': 93,
    'type': 'artist',
    'uri': 'spotify:artist:06HL4z0CvFAxyc27GXpf02'},
   {'external_urls': {'spotify': 'https://open.spotify.com/arti

## Finding the artist id.

In [212]:
artist_id=artist_info['artists']['items'][0]['id']
artist_id

'06HL4z0CvFAxyc27GXpf02'

## Getting the artist image details

In [181]:
artist_image=artist_info['artists']['items'][0]['images'][1]['url']
artist_image

'https://i.scdn.co/image/9ab124f3323d161f87fc9b7f8b82ab1717483b3a'

## Artist Image

![](https://i.scdn.co/image/9ab124f3323d161f87fc9b7f8b82ab1717483b3a)

# Finding all albums of Artist using artist id.

In [182]:
# artist_albums_url="https://api.spotify.com/v1/artists/"+artist_id+"/albums?limit=50&offset=0"

# artist_albums_results=requests.get(artist_albums_url,headers=headers)
# artist_albums_info=artist_albums_results.json()
# artist_albums_info["items"][0]["id"]

In [247]:
# https://api.spotify.com/v1/artists/1/albums?include_groups=single%2Calbum&limit=50&offset=0

albums=[]
albums_url="https://api.spotify.com/v1/artists/"+artist_id+"/albums?include_groups=single%2Calbum&limit=50&offset=0"
while True:
    albums_results=requests.get(albums_url,headers=headers)
    albums_info=albums_results.json()
    albums=albums+albums_info["items"]    
    if albums_info['next']==None:
        break
    albums_url=albums_info['next']    

## Checking number of albums.

In [184]:
len(albums)

76

## Getting the album id.

In [185]:
albums[0]["id"]

'1NAmidJlEaVgA3MpcPFYGq'

## Getting the list of album ids in albums.

In [187]:
album_ids=[album["id"] for album in albums]
album_ids

['1NAmidJlEaVgA3MpcPFYGq',
 '6DEjYFkNZh67HP7R9PSZvv',
 '1Hrs3jLGexOvBoaPMoOQYJ',
 '1MPAXuTVL2Ej5x0JHiSPq8',
 '0bEySlRAkuPxV9KVWhXXBr',
 '4uUAUqIfSomFTbbjGp3TYp',
 '2QJmrSgbdM35R67eoGQo4j',
 '6EsTJnpahwW6xX20zvqQgZ',
 '7N3xz2HFNVH5BEkm8ZVmoR',
 '0L3oaZUj3loxosjvugCLGG',
 '02H4kc9YLgorpUIREOwa0q',
 '34OkZVpuzBa9y40DCy0LPR',
 '5LVuISYu2g4YbyxYhFPIbK',
 '4R3hKmiJWEjRe6l03DoV9t',
 '5fy0X0JmZRZnVa2UEicIOl',
 '1yGbNOtRIgdIiGHOEBaZWf',
 '1EoDsNmgTLtmwe1BDAVxV5',
 '1KVKqWeRuXsJDLTW0VuD29',
 '1KlU96Hw9nlvqpBPlSqcTV',
 '4jTYApZPMapg56gRycOn0D',
 '7daMnnffzVSbNJj8Dy75Ev',
 '63lVCnv8B30qedCiTlAc9J',
 '11gfxXxJPd3j6sdWUyEA5S',
 '4ErTrymYK8VIBQR8J8Hjy1',
 '6fyR4wBPwLHKcRtxgd4sGh',
 '5MfAxS5zz8MlfROjGQVXhy',
 '75N0Z60SNMQbAPYZuxKgWd',
 '3QXlUpSDgakWZK2WqQv0pF',
 '1BdjHo5IR6twMhJDxzlpLt',
 '5EpMjweRD573ASl7uNiHym',
 '6Ar2o9KCqcyYF9J0aQP3au',
 '6GPyXXND6hIZpd9bRhCsFv',
 '6S6JQWzUrJVcJLK4fi74Fw',
 '3Mvk2LKxfhc2KVSnDYC40I',
 '6tgMb6LEwb3yj7BdYy462y',
 '08CWGiv27MVQhYpuTtvx83',
 '2dqn5yOQWdyGwOpOIi9O4x',
 

# Getting the tracks of artist using album ids.

In [328]:
#https://api.spotify.com/v1/albums/1/tracks?limit=10&offset=5

tracks=[]
for album_id in album_ids:
    album_tracks_url="https://api.spotify.com/v1/albums/"+album_id+"/tracks?limit=50&offset=0"
    while True:
        tracks_results=requests.get(album_tracks_url,headers=headers)
        try:
            tracks_info=tracks_results.json()
        except:
            print("Error fetching: "+ album_tracks_url)
        for item in tracks_info["items"]:
            item['album_id']=album_id
        tracks=tracks+tracks_info["items"] 
        if tracks_info['next']==None:
            break
        album_tracks_url=tracks_info['next']

## Checking the type and length.

In [326]:
type(tracks)

list

In [327]:
len(tracks)

918

## Coverting the tracks list to a dataframe.

In [324]:
df_tracks=pd.DataFrame(tracks)
df_tracks.head()

Unnamed: 0,album_id,artists,available_markets,disc_number,duration_ms,explicit,external_urls,href,id,is_local,name,preview_url,track_number,type,uri
0,1NAmidJlEaVgA3MpcPFYGq,[{'external_urls': {'spotify': 'https://open.s...,"[AD, AE, AR, AT, AU, BE, BG, BH, BO, BR, CA, C...",1,170640,False,{'spotify': 'https://open.spotify.com/track/43...,https://api.spotify.com/v1/tracks/43rA71bccXFG...,43rA71bccXFGD4C8GOpIlN,False,I Forgot That You Existed,,1,track,spotify:track:43rA71bccXFGD4C8GOpIlN
1,1NAmidJlEaVgA3MpcPFYGq,[{'external_urls': {'spotify': 'https://open.s...,"[AD, AE, AR, AT, AU, BE, BG, BH, BO, BR, CA, C...",1,178426,False,{'spotify': 'https://open.spotify.com/track/1B...,https://api.spotify.com/v1/tracks/1BxfuPKGuaTg...,1BxfuPKGuaTgP7aM0Bbdwr,False,Cruel Summer,,2,track,spotify:track:1BxfuPKGuaTgP7aM0Bbdwr
2,1NAmidJlEaVgA3MpcPFYGq,[{'external_urls': {'spotify': 'https://open.s...,"[AD, AE, AR, AT, AU, BE, BG, BH, BO, BR, CA, C...",1,221306,False,{'spotify': 'https://open.spotify.com/track/1d...,https://api.spotify.com/v1/tracks/1dGr1c8CrMLD...,1dGr1c8CrMLDpV6mPbImSI,False,Lover,,3,track,spotify:track:1dGr1c8CrMLDpV6mPbImSI
3,1NAmidJlEaVgA3MpcPFYGq,[{'external_urls': {'spotify': 'https://open.s...,"[AD, AE, AR, AT, AU, BE, BG, BH, BO, BR, CA, C...",1,190360,False,{'spotify': 'https://open.spotify.com/track/3R...,https://api.spotify.com/v1/tracks/3RauEVgRgj1I...,3RauEVgRgj1IuWdJ9fDs70,False,The Man,,4,track,spotify:track:3RauEVgRgj1IuWdJ9fDs70
4,1NAmidJlEaVgA3MpcPFYGq,[{'external_urls': {'spotify': 'https://open.s...,"[AD, AE, AR, AT, AU, BE, BG, BH, BO, BR, CA, C...",1,211240,False,{'spotify': 'https://open.spotify.com/track/3p...,https://api.spotify.com/v1/tracks/3pHkh7d0lzM2...,3pHkh7d0lzM2AldUtz2x37,False,The Archer,,5,track,spotify:track:3pHkh7d0lzM2AldUtz2x37


## Getting the columns of tracks dataframe.

In [329]:
df_tracks.columns

Index(['album_id', 'artists', 'available_markets', 'disc_number',
       'duration_ms', 'explicit', 'external_urls', 'href', 'id', 'is_local',
       'name', 'preview_url', 'track_number', 'type', 'uri'],
      dtype='object')

## Renaming two columns.

In [330]:
df_tracks=df_tracks.rename({'id':'track_id','name':'track_name'},axis=1)
df_tracks.head()

Unnamed: 0,album_id,artists,available_markets,disc_number,duration_ms,explicit,external_urls,href,track_id,is_local,track_name,preview_url,track_number,type,uri
0,1NAmidJlEaVgA3MpcPFYGq,[{'external_urls': {'spotify': 'https://open.s...,"[AD, AE, AR, AT, AU, BE, BG, BH, BO, BR, CA, C...",1,170640,False,{'spotify': 'https://open.spotify.com/track/43...,https://api.spotify.com/v1/tracks/43rA71bccXFG...,43rA71bccXFGD4C8GOpIlN,False,I Forgot That You Existed,,1,track,spotify:track:43rA71bccXFGD4C8GOpIlN
1,1NAmidJlEaVgA3MpcPFYGq,[{'external_urls': {'spotify': 'https://open.s...,"[AD, AE, AR, AT, AU, BE, BG, BH, BO, BR, CA, C...",1,178426,False,{'spotify': 'https://open.spotify.com/track/1B...,https://api.spotify.com/v1/tracks/1BxfuPKGuaTg...,1BxfuPKGuaTgP7aM0Bbdwr,False,Cruel Summer,,2,track,spotify:track:1BxfuPKGuaTgP7aM0Bbdwr
2,1NAmidJlEaVgA3MpcPFYGq,[{'external_urls': {'spotify': 'https://open.s...,"[AD, AE, AR, AT, AU, BE, BG, BH, BO, BR, CA, C...",1,221306,False,{'spotify': 'https://open.spotify.com/track/1d...,https://api.spotify.com/v1/tracks/1dGr1c8CrMLD...,1dGr1c8CrMLDpV6mPbImSI,False,Lover,,3,track,spotify:track:1dGr1c8CrMLDpV6mPbImSI
3,1NAmidJlEaVgA3MpcPFYGq,[{'external_urls': {'spotify': 'https://open.s...,"[AD, AE, AR, AT, AU, BE, BG, BH, BO, BR, CA, C...",1,190360,False,{'spotify': 'https://open.spotify.com/track/3R...,https://api.spotify.com/v1/tracks/3RauEVgRgj1I...,3RauEVgRgj1IuWdJ9fDs70,False,The Man,,4,track,spotify:track:3RauEVgRgj1IuWdJ9fDs70
4,1NAmidJlEaVgA3MpcPFYGq,[{'external_urls': {'spotify': 'https://open.s...,"[AD, AE, AR, AT, AU, BE, BG, BH, BO, BR, CA, C...",1,211240,False,{'spotify': 'https://open.spotify.com/track/3p...,https://api.spotify.com/v1/tracks/3pHkh7d0lzM2...,3pHkh7d0lzM2AldUtz2x37,False,The Archer,,5,track,spotify:track:3pHkh7d0lzM2AldUtz2x37


## Finding artist id from tracks.

In [331]:
tracks[0]["artists"][0]['id']

'06HL4z0CvFAxyc27GXpf02'

## Finding all track ids.

In [332]:
artist_track_ids=[]
for track in tracks:
    for artist in track['artists']:
        if artist['id']==artist_id:
            artist_track_ids.append(track['id'])    

## Getting the total number of track ids.

In [333]:
len(artist_track_ids)

917

## To check if there is any duplicate track id.

In [334]:
artist_track_ids=list(set(artist_track_ids))
len(artist_track_ids)

917

# Audio Features of artist tracks.

In [335]:
audio_features=[]
for track_id in artist_track_ids:
    audio_feature_url="https://api.spotify.com/v1/audio-features/"+track_id
    audio_feature_results=requests.get(audio_feature_url,headers=headers)
    audio_feature_info=audio_feature_results.json()
    audio_features.append(audio_feature_info)

In [336]:
audio_features

[{'danceability': 0.624,
  'energy': 0.34,
  'key': 7,
  'loudness': -12.411,
  'mode': 1,
  'speechiness': 0.029,
  'acousticness': 0.632,
  'instrumentalness': 0.0337,
  'liveness': 0.121,
  'valence': 0.261,
  'tempo': 129.987,
  'type': 'audio_features',
  'id': '6LKjHhOW1az75pCQ9XJJtF',
  'uri': 'spotify:track:6LKjHhOW1az75pCQ9XJJtF',
  'track_href': 'https://api.spotify.com/v1/tracks/6LKjHhOW1az75pCQ9XJJtF',
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/6LKjHhOW1az75pCQ9XJJtF',
  'duration_ms': 283680,
  'time_signature': 3},
 {'danceability': 0.897,
  'energy': 0.366,
  'key': 0,
  'loudness': -8.029,
  'mode': 1,
  'speechiness': 0.0569,
  'acousticness': 0.00889,
  'instrumentalness': 0.000353,
  'liveness': 0.0715,
  'valence': 0.416,
  'tempo': 100.003,
  'type': 'audio_features',
  'id': '2YWtcWi3a83pdEg3Gif4Pd',
  'uri': 'spotify:track:2YWtcWi3a83pdEg3Gif4Pd',
  'track_href': 'https://api.spotify.com/v1/tracks/2YWtcWi3a83pdEg3Gif4Pd',
  'analysis_url': 'http

## Checking the type.

In [337]:
type(audio_features)

list

## Converting the audio features list to a dataframe.

In [338]:
df_audio_features=pd.DataFrame(audio_features)
df_audio_features.head()

Unnamed: 0,acousticness,analysis_url,danceability,duration_ms,energy,id,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,track_href,type,uri,valence
0,0.632,https://api.spotify.com/v1/audio-analysis/6LKj...,0.624,283680,0.34,6LKjHhOW1az75pCQ9XJJtF,0.0337,7,0.121,-12.411,1,0.029,129.987,3,https://api.spotify.com/v1/tracks/6LKjHhOW1az7...,audio_features,spotify:track:6LKjHhOW1az75pCQ9XJJtF,0.261
1,0.00889,https://api.spotify.com/v1/audio-analysis/2YWt...,0.897,173387,0.366,2YWtcWi3a83pdEg3Gif4Pd,0.000353,0,0.0715,-8.029,1,0.0569,100.003,4,https://api.spotify.com/v1/tracks/2YWtcWi3a83p...,audio_features,spotify:track:2YWtcWi3a83pdEg3Gif4Pd,0.416
2,0.202,https://api.spotify.com/v1/audio-analysis/1nuk...,0.662,175280,0.864,1nuk1yTZO3zR8XB8Ofk9q3,0.0,7,0.0997,-2.08,1,0.0322,105.607,4,https://api.spotify.com/v1/tracks/1nuk1yTZO3zR...,audio_features,spotify:track:1nuk1yTZO3zR8XB8Ofk9q3,0.791
3,0.0348,https://api.spotify.com/v1/audio-analysis/7aMf...,0.789,212600,0.634,7aMftpNNJiQj1c2rllc8Dk,2e-06,7,0.302,-4.762,1,0.0323,116.992,4,https://api.spotify.com/v1/tracks/7aMftpNNJiQj...,audio_features,spotify:track:7aMftpNNJiQj1c2rllc8Dk,0.658
4,0.241,https://api.spotify.com/v1/audio-analysis/7AiI...,0.856,269207,0.371,7AiIXVMIrTqHlGn7bS3f31,0.819,4,0.111,-9.921,1,0.0425,103.999,4,https://api.spotify.com/v1/tracks/7AiIXVMIrTqH...,audio_features,spotify:track:7AiIXVMIrTqHlGn7bS3f31,0.384


## Checking the columns in audio features data frame.

In [339]:
df_audio_features.columns

Index(['acousticness', 'analysis_url', 'danceability', 'duration_ms', 'energy',
       'id', 'instrumentalness', 'key', 'liveness', 'loudness', 'mode',
       'speechiness', 'tempo', 'time_signature', 'track_href', 'type', 'uri',
       'valence'],
      dtype='object')

## Renaming id column to track_id.

In [340]:
df_audio_features=df_audio_features.rename({'id':'track_id'},axis=1)
df_audio_features.head()

Unnamed: 0,acousticness,analysis_url,danceability,duration_ms,energy,track_id,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,track_href,type,uri,valence
0,0.632,https://api.spotify.com/v1/audio-analysis/6LKj...,0.624,283680,0.34,6LKjHhOW1az75pCQ9XJJtF,0.0337,7,0.121,-12.411,1,0.029,129.987,3,https://api.spotify.com/v1/tracks/6LKjHhOW1az7...,audio_features,spotify:track:6LKjHhOW1az75pCQ9XJJtF,0.261
1,0.00889,https://api.spotify.com/v1/audio-analysis/2YWt...,0.897,173387,0.366,2YWtcWi3a83pdEg3Gif4Pd,0.000353,0,0.0715,-8.029,1,0.0569,100.003,4,https://api.spotify.com/v1/tracks/2YWtcWi3a83p...,audio_features,spotify:track:2YWtcWi3a83pdEg3Gif4Pd,0.416
2,0.202,https://api.spotify.com/v1/audio-analysis/1nuk...,0.662,175280,0.864,1nuk1yTZO3zR8XB8Ofk9q3,0.0,7,0.0997,-2.08,1,0.0322,105.607,4,https://api.spotify.com/v1/tracks/1nuk1yTZO3zR...,audio_features,spotify:track:1nuk1yTZO3zR8XB8Ofk9q3,0.791
3,0.0348,https://api.spotify.com/v1/audio-analysis/7aMf...,0.789,212600,0.634,7aMftpNNJiQj1c2rllc8Dk,2e-06,7,0.302,-4.762,1,0.0323,116.992,4,https://api.spotify.com/v1/tracks/7aMftpNNJiQj...,audio_features,spotify:track:7aMftpNNJiQj1c2rllc8Dk,0.658
4,0.241,https://api.spotify.com/v1/audio-analysis/7AiI...,0.856,269207,0.371,7AiIXVMIrTqHlGn7bS3f31,0.819,4,0.111,-9.921,1,0.0425,103.999,4,https://api.spotify.com/v1/tracks/7AiIXVMIrTqH...,audio_features,spotify:track:7AiIXVMIrTqHlGn7bS3f31,0.384


## Checking the shape and missing values.

In [341]:
df_audio_features.shape

(917, 18)

In [366]:
df_audio_features.isnull().sum().sum()

0

## Creating another dataframe with specific columns.

In [367]:
df_features_radar=df_audio_features[['acousticness', 'danceability', 'energy',
        'instrumentalness', 'liveness','speechiness', 'valence']].copy()
df_features_radar.head()

Unnamed: 0,acousticness,danceability,energy,instrumentalness,liveness,speechiness,valence
0,0.632,0.624,0.34,0.0337,0.121,0.029,0.261
1,0.00889,0.897,0.366,0.000353,0.0715,0.0569,0.416
2,0.202,0.662,0.864,0.0,0.0997,0.0322,0.791
3,0.0348,0.789,0.634,2e-06,0.302,0.0323,0.658
4,0.241,0.856,0.371,0.819,0.111,0.0425,0.384


## Describing the audio features.

[features](https://developer.spotify.com/documentation/web-api/reference/tracks/get-audio-features/)
* **acousticness:** A confidence measure from 0.0 to 1.0 of whether the track is acoustic. 1.0 represents high confidence the track is acoustic.
* **danceability:** Danceability describes how suitable a track is for dancing based on a combination of musical elements including tempo, rhythm stability, beat strength, and overall regularity. A value of 0.0 is least danceable and 1.0 is most danceable.
* **energy:** Energy is a measure from 0.0 to 1.0 and represents a perceptual measure of intensity and activity. Typically, energetic tracks feel fast, loud, and noisy.
* **instrumentalness:** Predicts whether a track contains no vocals. “Ooh” and “aah” sounds are treated as instrumental in this context. Rap or spoken word tracks are clearly “vocal”.
* **liveness:** Detects the presence of an audience in the recording.
* **loudness:** The overall loudness of a track in decibels (dB). Loudness values are averaged across the entire track 
* **speechiness:** Speechiness detects the presence of spoken words in a track.
* **valence:** Tracks with high valence sound more positive (e.g. happy, cheerful, euphoric), while tracks with low valence sound more negative (e.g. sad, depressed, angry). A measure from 0.0 to 1.0 describing the musical positiveness conveyed by a track.


## Showing audio feature distribution.

In [415]:
df_features_radar.iplot(
        kind='box', colorscale='set2',
        #xTitle='audio feature',
        yTitle='Audio features',
        title='Audio features of tracks')

## Making a dataframe from the mean values and resetting the index.

In [368]:
df_mean=df_features_radar.mean().to_frame().reset_index()
df_mean

Unnamed: 0,index,0
0,acousticness,0.227978
1,danceability,0.603522
2,energy,0.602208
3,instrumentalness,0.117097
4,liveness,0.180799
5,speechiness,0.135555
6,valence,0.437994


## Renaming two columns.

In [369]:
df_mean=df_mean.rename({'index':'features',0:'mean'},axis=1)
df_mean

Unnamed: 0,features,mean
0,acousticness,0.227978
1,danceability,0.603522
2,energy,0.602208
3,instrumentalness,0.117097
4,liveness,0.180799
5,speechiness,0.135555
6,valence,0.437994


## Showing the radar plot.

In [370]:
import plotly.express as px
fig = px.line_polar(df_mean, r='mean', theta='features', line_close=True)
fig.update_traces(fill='toself')
#fig.show()

## Creating another dataframe from mean values.

In [371]:
df_mean2=df_features_radar.mean().to_frame()
df_mean2

Unnamed: 0,0
acousticness,0.227978
danceability,0.603522
energy,0.602208
instrumentalness,0.117097
liveness,0.180799
speechiness,0.135555
valence,0.437994


## Showing the bar plot for the audio features.

In [372]:
df_mean2.iplot(kind ='bar',colors = 'mediumvioletred')

# Getting album details of our artist using album ids.

In [373]:
# https://api.spotify.com/v1/albums/4aawyAB9vmqN3uQ7FjRGTy

albums_artist=[]
for album_id in album_ids:
    album_url="https://api.spotify.com/v1/albums/"+album_id
    albums_artist_results=requests.get(album_url,headers=headers)
    albums_artist_info=albums_artist_results.json()
    albums_artist.append(albums_artist_info)

In [374]:
albums_artist

[{'album_type': 'album',
  'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/06HL4z0CvFAxyc27GXpf02'},
    'href': 'https://api.spotify.com/v1/artists/06HL4z0CvFAxyc27GXpf02',
    'id': '06HL4z0CvFAxyc27GXpf02',
    'name': 'Taylor Swift',
    'type': 'artist',
    'uri': 'spotify:artist:06HL4z0CvFAxyc27GXpf02'}],
  'available_markets': ['AD',
   'AE',
   'AR',
   'AT',
   'AU',
   'BE',
   'BG',
   'BH',
   'BO',
   'BR',
   'CA',
   'CH',
   'CL',
   'CO',
   'CR',
   'CY',
   'CZ',
   'DE',
   'DK',
   'DO',
   'DZ',
   'EC',
   'EE',
   'EG',
   'ES',
   'FI',
   'FR',
   'GB',
   'GR',
   'GT',
   'HK',
   'HN',
   'HU',
   'ID',
   'IE',
   'IL',
   'IN',
   'IS',
   'IT',
   'JO',
   'JP',
   'KW',
   'LB',
   'LI',
   'LT',
   'LU',
   'LV',
   'MA',
   'MC',
   'MT',
   'MX',
   'MY',
   'NI',
   'NL',
   'NO',
   'NZ',
   'OM',
   'PA',
   'PE',
   'PH',
   'PL',
   'PS',
   'PT',
   'PY',
   'QA',
   'RO',
   'SA',
   'SE',
   'SG',
   'SK',
   'SV',

## Checking the type.

In [375]:
type(albums_artist)

list

## Creating a dataframe for albums of artist.

In [376]:
df_albums_artist=pd.DataFrame(albums_artist)
df_albums_artist.head()

Unnamed: 0,album_type,artists,available_markets,copyrights,external_ids,external_urls,genres,href,id,images,label,name,popularity,release_date,release_date_precision,total_tracks,tracks,type,uri
0,album,[{'external_urls': {'spotify': 'https://open.s...,"[AD, AE, AR, AT, AU, BE, BG, BH, BO, BR, CA, C...","[{'text': '© 2019 Taylor Swift', 'type': 'C'},...",{'upc': '00602508009211'},{'spotify': 'https://open.spotify.com/album/1N...,[],https://api.spotify.com/v1/albums/1NAmidJlEaVg...,1NAmidJlEaVgA3MpcPFYGq,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",Taylor Swift,Lover,92,2019-08-23,day,18,{'href': 'https://api.spotify.com/v1/albums/1N...,album,spotify:album:1NAmidJlEaVgA3MpcPFYGq
1,album,[{'external_urls': {'spotify': 'https://open.s...,"[AD, AE, AR, AT, AU, BE, BG, BH, BO, BR, CA, C...","[{'text': '© 2017 Big Machine Label Group, LLC...",{'upc': '00843930033140'},{'spotify': 'https://open.spotify.com/album/6D...,[],https://api.spotify.com/v1/albums/6DEjYFkNZh67...,6DEjYFkNZh67HP7R9PSZvv,"[{'height': 640, 'url': 'https://i.scdn.co/ima...","Big Machine Records, LLC",reputation,84,2017-11-10,day,15,{'href': 'https://api.spotify.com/v1/albums/6D...,album,spotify:album:6DEjYFkNZh67HP7R9PSZvv
2,album,[{'external_urls': {'spotify': 'https://open.s...,"[AD, AE, AR, AT, AU, BE, BG, BH, BO, BR, CA, C...","[{'text': '© 2018 Big Machine Label Group, LLC...",{'upc': '00843930039579'},{'spotify': 'https://open.spotify.com/album/1H...,[],https://api.spotify.com/v1/albums/1Hrs3jLGexOv...,1Hrs3jLGexOvBoaPMoOQYJ,"[{'height': 640, 'url': 'https://i.scdn.co/ima...","Big Machine Records, LLC",reputation (Big Machine Radio Release Special),66,2017-11-10,day,31,{'href': 'https://api.spotify.com/v1/albums/1H...,album,spotify:album:1Hrs3jLGexOvBoaPMoOQYJ
3,album,[{'external_urls': {'spotify': 'https://open.s...,"[AD, AE, AR, AT, AU, BE, BG, BH, BO, BR, CA, C...","[{'text': '© 2018 Big Machine Label Group, LLC...",{'upc': '00843930039371'},{'spotify': 'https://open.spotify.com/album/1M...,[],https://api.spotify.com/v1/albums/1MPAXuTVL2Ej...,1MPAXuTVL2Ej5x0JHiSPq8,"[{'height': 640, 'url': 'https://i.scdn.co/ima...","Big Machine Records, LLC",reputation Stadium Tour Surprise Song Playlist,75,2017-11-09,day,46,{'href': 'https://api.spotify.com/v1/albums/1M...,album,spotify:album:1MPAXuTVL2Ej5x0JHiSPq8
4,album,[{'external_urls': {'spotify': 'https://open.s...,"[AD, AE, AR, AT, AU, BE, BG, BH, BO, BR, CH, C...","[{'text': '© 2015 Big Machine Records, LLC', '...",{'upc': '00602547235305'},{'spotify': 'https://open.spotify.com/album/0b...,[],https://api.spotify.com/v1/albums/0bEySlRAkuPx...,0bEySlRAkuPxV9KVWhXXBr,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",Universal Music,Taylor Swift Karaoke: 1989 (Deluxe),32,2015-02-01,day,16,{'href': 'https://api.spotify.com/v1/albums/0b...,album,spotify:album:0bEySlRAkuPxV9KVWhXXBr


## Checking info.

In [377]:
df_albums_artist.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 76 entries, 0 to 75
Data columns (total 19 columns):
album_type                76 non-null object
artists                   76 non-null object
available_markets         76 non-null object
copyrights                76 non-null object
external_ids              76 non-null object
external_urls             76 non-null object
genres                    76 non-null object
href                      76 non-null object
id                        76 non-null object
images                    76 non-null object
label                     76 non-null object
name                      76 non-null object
popularity                76 non-null int64
release_date              76 non-null object
release_date_precision    76 non-null object
total_tracks              76 non-null int64
tracks                    76 non-null object
type                      76 non-null object
uri                       76 non-null object
dtypes: int64(2), object(17)
memory usage: 1

In [401]:
df_albums_artist['release_date']=pd.to_datetime(df_albums_artist['release_date'])
df_albums_artist.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 76 entries, 0 to 75
Data columns (total 19 columns):
album_type                76 non-null object
artists                   76 non-null object
available_markets         76 non-null object
copyrights                76 non-null object
external_ids              76 non-null object
external_urls             76 non-null object
genres                    76 non-null object
href                      76 non-null object
album_id                  76 non-null object
images                    76 non-null object
label                     76 non-null object
album_name                76 non-null object
popularity                76 non-null int64
release_date              76 non-null datetime64[ns]
release_date_precision    76 non-null object
total_tracks              76 non-null int64
tracks                    76 non-null object
type                      76 non-null object
uri                       76 non-null object
dtypes: datetime64[ns](1), int64(2),

## Checking the columns.

In [400]:
df_albums_artist.columns

Index(['album_type', 'artists', 'available_markets', 'copyrights',
       'external_ids', 'external_urls', 'genres', 'href', 'album_id', 'images',
       'label', 'album_name', 'popularity', 'release_date',
       'release_date_precision', 'total_tracks', 'tracks', 'type', 'uri'],
      dtype='object')

## Sorting the dataframe by release date.

In [404]:
df_albums_artist.sort_values('release_date',inplace=True)

## Popularity distribution with time for album type.

In [407]:
df_albums_artist.iplot(
    x='release_date',
    y='popularity',
    xTitle='Release date',
    yTitle='Popularity',
    categories='album_type',
    mode='lines+markers',
    title='Popularity with date')

## Box plot showing popularity distribution.

In [391]:
df_albums_artist['popularity'].iplot(
    kind='hist',
    bins=30,
    xTitle='popularity',
    linecolor='black',
    yTitle='count',
    title='Popularity Distribution')

## Popularity of each album.

In [413]:
df_albums_artist.iplot(
    x='album_name',
    y='popularity',
    #xTitle='Album_name',
    yTitle='Popularity',
    kind='bar',
    title='Popularity album wise')

## Renaming id and name columns.

In [381]:
df_albums_artist=df_albums_artist.rename({'id':'album_id','name':'album_name'},axis=1)
df_albums_artist.head()

Unnamed: 0,album_type,artists,available_markets,copyrights,external_ids,external_urls,genres,href,album_id,images,label,album_name,popularity,release_date,release_date_precision,total_tracks,tracks,type,uri
0,album,[{'external_urls': {'spotify': 'https://open.s...,"[AD, AE, AR, AT, AU, BE, BG, BH, BO, BR, CA, C...","[{'text': '© 2019 Taylor Swift', 'type': 'C'},...",{'upc': '00602508009211'},{'spotify': 'https://open.spotify.com/album/1N...,[],https://api.spotify.com/v1/albums/1NAmidJlEaVg...,1NAmidJlEaVgA3MpcPFYGq,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",Taylor Swift,Lover,92,2019-08-23,day,18,{'href': 'https://api.spotify.com/v1/albums/1N...,album,spotify:album:1NAmidJlEaVgA3MpcPFYGq
1,album,[{'external_urls': {'spotify': 'https://open.s...,"[AD, AE, AR, AT, AU, BE, BG, BH, BO, BR, CA, C...","[{'text': '© 2017 Big Machine Label Group, LLC...",{'upc': '00843930033140'},{'spotify': 'https://open.spotify.com/album/6D...,[],https://api.spotify.com/v1/albums/6DEjYFkNZh67...,6DEjYFkNZh67HP7R9PSZvv,"[{'height': 640, 'url': 'https://i.scdn.co/ima...","Big Machine Records, LLC",reputation,84,2017-11-10,day,15,{'href': 'https://api.spotify.com/v1/albums/6D...,album,spotify:album:6DEjYFkNZh67HP7R9PSZvv
2,album,[{'external_urls': {'spotify': 'https://open.s...,"[AD, AE, AR, AT, AU, BE, BG, BH, BO, BR, CA, C...","[{'text': '© 2018 Big Machine Label Group, LLC...",{'upc': '00843930039579'},{'spotify': 'https://open.spotify.com/album/1H...,[],https://api.spotify.com/v1/albums/1Hrs3jLGexOv...,1Hrs3jLGexOvBoaPMoOQYJ,"[{'height': 640, 'url': 'https://i.scdn.co/ima...","Big Machine Records, LLC",reputation (Big Machine Radio Release Special),66,2017-11-10,day,31,{'href': 'https://api.spotify.com/v1/albums/1H...,album,spotify:album:1Hrs3jLGexOvBoaPMoOQYJ
3,album,[{'external_urls': {'spotify': 'https://open.s...,"[AD, AE, AR, AT, AU, BE, BG, BH, BO, BR, CA, C...","[{'text': '© 2018 Big Machine Label Group, LLC...",{'upc': '00843930039371'},{'spotify': 'https://open.spotify.com/album/1M...,[],https://api.spotify.com/v1/albums/1MPAXuTVL2Ej...,1MPAXuTVL2Ej5x0JHiSPq8,"[{'height': 640, 'url': 'https://i.scdn.co/ima...","Big Machine Records, LLC",reputation Stadium Tour Surprise Song Playlist,75,2017-11-09,day,46,{'href': 'https://api.spotify.com/v1/albums/1M...,album,spotify:album:1MPAXuTVL2Ej5x0JHiSPq8
4,album,[{'external_urls': {'spotify': 'https://open.s...,"[AD, AE, AR, AT, AU, BE, BG, BH, BO, BR, CH, C...","[{'text': '© 2015 Big Machine Records, LLC', '...",{'upc': '00602547235305'},{'spotify': 'https://open.spotify.com/album/0b...,[],https://api.spotify.com/v1/albums/0bEySlRAkuPx...,0bEySlRAkuPxV9KVWhXXBr,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",Universal Music,Taylor Swift Karaoke: 1989 (Deluxe),32,2015-02-01,day,16,{'href': 'https://api.spotify.com/v1/albums/0b...,album,spotify:album:0bEySlRAkuPxV9KVWhXXBr


## Checking the shape.

In [382]:
df_albums_artist.shape

(76, 19)

# Merging track df with albums df on the basis of album id (keeping required columns).

In [383]:
df_tracks_albums_artist = pd.merge(df_tracks[['album_id', 'artists', 'available_markets',
       'duration_ms', 'explicit', 'track_id',
       'track_name']],df_albums_artist[['album_type','album_id', 'album_name', 'popularity', 'release_date', 'release_date_precision',
       'total_tracks']],on='album_id', how='left')

df_tracks_albums_artist.head()

Unnamed: 0,album_id,artists,available_markets,duration_ms,explicit,track_id,track_name,album_type,album_name,popularity,release_date,release_date_precision,total_tracks
0,1NAmidJlEaVgA3MpcPFYGq,[{'external_urls': {'spotify': 'https://open.s...,"[AD, AE, AR, AT, AU, BE, BG, BH, BO, BR, CA, C...",170640,False,43rA71bccXFGD4C8GOpIlN,I Forgot That You Existed,album,Lover,92,2019-08-23,day,18
1,1NAmidJlEaVgA3MpcPFYGq,[{'external_urls': {'spotify': 'https://open.s...,"[AD, AE, AR, AT, AU, BE, BG, BH, BO, BR, CA, C...",178426,False,1BxfuPKGuaTgP7aM0Bbdwr,Cruel Summer,album,Lover,92,2019-08-23,day,18
2,1NAmidJlEaVgA3MpcPFYGq,[{'external_urls': {'spotify': 'https://open.s...,"[AD, AE, AR, AT, AU, BE, BG, BH, BO, BR, CA, C...",221306,False,1dGr1c8CrMLDpV6mPbImSI,Lover,album,Lover,92,2019-08-23,day,18
3,1NAmidJlEaVgA3MpcPFYGq,[{'external_urls': {'spotify': 'https://open.s...,"[AD, AE, AR, AT, AU, BE, BG, BH, BO, BR, CA, C...",190360,False,3RauEVgRgj1IuWdJ9fDs70,The Man,album,Lover,92,2019-08-23,day,18
4,1NAmidJlEaVgA3MpcPFYGq,[{'external_urls': {'spotify': 'https://open.s...,"[AD, AE, AR, AT, AU, BE, BG, BH, BO, BR, CA, C...",211240,False,3pHkh7d0lzM2AldUtz2x37,The Archer,album,Lover,92,2019-08-23,day,18


## Converting the release date to datetime64 [ns] format.

In [384]:
df_tracks_albums_artist['release_date']=pd.to_datetime(df_tracks_albums_artist['release_date'])
df_tracks_albums_artist.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 918 entries, 0 to 917
Data columns (total 13 columns):
album_id                  918 non-null object
artists                   918 non-null object
available_markets         918 non-null object
duration_ms               918 non-null int64
explicit                  918 non-null bool
track_id                  918 non-null object
track_name                918 non-null object
album_type                918 non-null object
album_name                918 non-null object
popularity                918 non-null int64
release_date              918 non-null datetime64[ns]
release_date_precision    918 non-null object
total_tracks              918 non-null int64
dtypes: bool(1), datetime64[ns](1), int64(3), object(8)
memory usage: 94.1+ KB


## Checking the correlation between popularity and total number of tracks.

In [385]:
corr_mat = df_tracks_albums_artist[['popularity','total_tracks']].corr()

figure = ff.create_annotated_heatmap(
    z=corr_mat.values,
    x=list(corr_mat.columns),
    y=list(corr_mat.index),
    annotation_text=corr_mat.round(2).values,
    showscale=True)
figure

## Checking the shape

In [386]:
df_tracks_albums_artist.shape

(918, 13)

# Merging the tracks and album dataframes with audio feature dataframe.

In [387]:
df_tracks_albums_artist_features=pd.merge(df_audio_features[['track_id','acousticness', 'danceability', 'energy',
        'instrumentalness', 'liveness', 'loudness','speechiness', 'valence']],df_tracks_albums_artist,on='track_id', how='left')
df_tracks_albums_artist_features.head()

Unnamed: 0,track_id,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,valence,album_id,...,available_markets,duration_ms,explicit,track_name,album_type,album_name,popularity,release_date,release_date_precision,total_tracks
0,6LKjHhOW1az75pCQ9XJJtF,0.632,0.624,0.34,0.0337,0.121,-12.411,0.029,0.261,1EoDsNmgTLtmwe1BDAVxV5,...,"[CA, US]",283680,False,Sad Beautiful Tragic,album,Red,71,2012-10-22,day,16
1,2YWtcWi3a83pdEg3Gif4Pd,0.00889,0.897,0.366,0.000353,0.0715,-8.029,0.0569,0.416,1NAmidJlEaVgA3MpcPFYGq,...,"[AD, AE, AR, AT, AU, BE, BG, BH, BO, BR, CA, C...",173386,False,I Think He Knows,album,Lover,92,2019-08-23,day,18
2,1nuk1yTZO3zR8XB8Ofk9q3,0.202,0.662,0.864,0.0,0.0997,-2.08,0.0322,0.791,2rU7u7C2v5i45MFVxx7xG1,...,"[AD, AE, AR, AT, AU, BE, BG, BH, BO, BR, CA, C...",175280,False,Picture To Burn,album,Taylor Swift (Big Machine Radio Release Special),65,2006-10-24,day,30
3,7aMftpNNJiQj1c2rllc8Dk,0.0348,0.789,0.634,2e-06,0.302,-4.762,0.0323,0.658,6EsTJnpahwW6xX20zvqQgZ,...,"[AD, AE, AR, AT, AU, BE, BG, BH, BO, BR, CA, C...",212600,False,Welcome To New York,album,1989 (Big Machine Radio Release Special),66,2014-10-27,day,26
4,7AiIXVMIrTqHlGn7bS3f31,0.241,0.856,0.371,0.819,0.111,-9.921,0.0425,0.384,0bEySlRAkuPxV9KVWhXXBr,...,"[AD, AE, AR, AT, AU, BE, BG, BH, BO, BR, CH, C...",269207,False,Clean - Karaoke Version,album,Taylor Swift Karaoke: 1989 (Deluxe),32,2015-02-01,day,16


## Checking shape

In [388]:
df_tracks_albums_artist_features.shape

(917, 21)

## Checking info

In [389]:
df_tracks_albums_artist_features.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 917 entries, 0 to 916
Data columns (total 21 columns):
track_id                  917 non-null object
acousticness              917 non-null float64
danceability              917 non-null float64
energy                    917 non-null float64
instrumentalness          917 non-null float64
liveness                  917 non-null float64
loudness                  917 non-null float64
speechiness               917 non-null float64
valence                   917 non-null float64
album_id                  917 non-null object
artists                   917 non-null object
available_markets         917 non-null object
duration_ms               917 non-null int64
explicit                  917 non-null bool
track_name                917 non-null object
album_type                917 non-null object
album_name                917 non-null object
popularity                917 non-null int64
release_date              917 non-null datetime64[ns]
release_d

## Audio features and popularity distribution in one frame.

In [416]:
df_tracks_albums_artist_features[['acousticness', 'danceability', 'energy',
        'instrumentalness', 'liveness','speechiness', 'valence', 'popularity']].iplot(secondary_y='popularity', secondary_y_title='popularity',
    kind='box', yTitle='Features', title='Box Plot of features and popularity')

# Creating the final dataframe for analysis.

In [439]:
df_final=df_tracks_albums_artist_features[['album_id','acousticness', 'danceability', 'energy',
        'instrumentalness', 'liveness','speechiness', 'valence', 'popularity']].copy()
df_final.head()

Unnamed: 0,album_id,acousticness,danceability,energy,instrumentalness,liveness,speechiness,valence,popularity
0,1EoDsNmgTLtmwe1BDAVxV5,0.632,0.624,0.34,0.0337,0.121,0.029,0.261,71
1,1NAmidJlEaVgA3MpcPFYGq,0.00889,0.897,0.366,0.000353,0.0715,0.0569,0.416,92
2,2rU7u7C2v5i45MFVxx7xG1,0.202,0.662,0.864,0.0,0.0997,0.0322,0.791,65
3,6EsTJnpahwW6xX20zvqQgZ,0.0348,0.789,0.634,2e-06,0.302,0.0323,0.658,66
4,0bEySlRAkuPxV9KVWhXXBr,0.241,0.856,0.371,0.819,0.111,0.0425,0.384,32


## Finding correlation matrix.

In [442]:
corr_mat = df_final.corr()
figure = ff.create_annotated_heatmap(
    z=corr_mat.values,
    x=list(corr_mat.columns),
    y=list(corr_mat.index),
    annotation_text=corr_mat.round(2).values,
    showscale=True)
figure