# API Project

## 1. Import libraries and set  options for visualization:

In [1]:
import spotipy
import spotipy.util as util
import json
from pandas.io.json import json_normalize
import numpy as np
import pandas as pd
import os

# Setting options for DataFrame visualization:
pd.set_option('display.width', 1000)
pd.set_option('display.max_columns', 25)

## 2. Set constants:

In [2]:
CLIENT_ID = '50f3b0028641407db383394bce5bdab2'
CLIENT_SECRET = '102abdda8d97489ca12c32c510af6bd9'
SCOPE = "user-read-private user-read-email user-read-playback-state user-read-currently-playing user-library-read"
REDIRECT_URI = 'http://localhost:8888/callback/'
USERNAME = 'dannig012'
PATH = os.getcwd()

## 3. Request Spotify token and access to the API:

In [3]:
token = util.prompt_for_user_token(username=USERNAME, scope=SCOPE, client_id=CLIENT_ID, client_secret=CLIENT_SECRET, redirect_uri=REDIRECT_URI)

In [4]:
sp = spotipy.Spotify(auth=token)

## 4. Request user information and add it to a df: 

In [5]:
# List of selected users and empty dataframe:
friends = ['angelus_', 'amane88', 'aechos', 'alequijada',  'dannig012', 'popersea', 'marc.vallvebernal',
           'nestorviolin', 'pau_ik', 'jluisite', 'elyp', 'elsapato', 'chicareversible', 'brunocossio', '42416345k']
friends_df = pd.DataFrame()

# Loop to request user informaton and adding it to friends_df:
for friend in friends:
    # Uses spotipy's user function to request user info.
    user_df = json_normalize(sp.user(friend))
    friends_df = friends_df.append(user_df)

### friends_df information and export:

In [6]:
friends_df.head()

Unnamed: 0,display_name,external_urls.spotify,followers.href,followers.total,href,id,images,type,uri
0,angelus_,https://open.spotify.com/user/angelus_,,14,https://api.spotify.com/v1/users/angelus_,angelus_,[],user,spotify:user:angelus_
0,Amanda Batista Sosa,https://open.spotify.com/user/amane88,,34,https://api.spotify.com/v1/users/amane88,amane88,"[{'height': None, 'url': 'https://scontent.xx....",user,spotify:user:amane88
0,Alejandro De La Barreda Heusser,https://open.spotify.com/user/aechos,,66,https://api.spotify.com/v1/users/aechos,aechos,"[{'height': None, 'url': 'https://scontent.xx....",user,spotify:user:aechos
0,Alejandro Quijada Fumero,https://open.spotify.com/user/alequijada,,16,https://api.spotify.com/v1/users/alequijada,alequijada,"[{'height': None, 'url': 'https://scontent.xx....",user,spotify:user:alequijada
0,dannig012,https://open.spotify.com/user/dannig012,,24,https://api.spotify.com/v1/users/dannig012,dannig012,[],user,spotify:user:dannig012


In [7]:
friends_df.shape

(15, 9)

In [8]:
friends_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 15 entries, 0 to 0
Data columns (total 9 columns):
display_name             15 non-null object
external_urls.spotify    15 non-null object
followers.href           0 non-null object
followers.total          15 non-null int64
href                     15 non-null object
id                       15 non-null object
images                   15 non-null object
type                     15 non-null object
uri                      15 non-null object
dtypes: int64(1), object(8)
memory usage: 1.2+ KB


In [9]:
#Exporting (uses PATH as constant previously defined):
friends_export = friends_df.to_csv('%s/output/spotify_friends.csv'%PATH, index=False)

## 5. Extract playlist information

### Defining a function to extract (and flatten) the public playlists for a given user:

In [10]:
def playlist_extractor(user):
    # Uses spotipy's user_playlists function.
    playlists = json_normalize(sp.user_playlists(user))
    playlists_flat = json_normalize(playlists['items'][0])
    return playlists_flat

### Looping the function over the 'id' column of friends_df to create a new dataframe containing every playlist:

In [11]:
# Creating the empty dataframe:
playlists_df = pd.DataFrame()

# Loop to get the playlists for every user in friends_df:
for i in friends_df['id']:
    user_playlists = playlist_extractor(i)
    playlists_df = playlists_df.append(user_playlists)

# Reseting playlists_df index:
playlists_df.reset_index(drop=True, inplace=True)

### playlists_df information:

In [12]:
playlists_df.head()

Unnamed: 0,collaborative,external_urls.spotify,href,id,images,name,owner.display_name,owner.external_urls.spotify,owner.href,owner.id,owner.type,owner.uri,primary_color,public,snapshot_id,tracks.href,tracks.total,type,uri
0,False,https://open.spotify.com/playlist/37i9dQZF1DWX...,https://api.spotify.com/v1/playlists/37i9dQZF1...,37i9dQZF1DWXe9gFZP0gtP,"[{'height': None, 'url': 'https://pl.scdn.co/i...",Stress Relief,Spotify,https://open.spotify.com/user/spotify,https://api.spotify.com/v1/users/spotify,spotify,user,spotify:user:spotify,,True,MTU2NDA2NjEzNiwwMDAwMDAwMGQ0MWQ4Y2Q5OGYwMGIyMD...,https://api.spotify.com/v1/playlists/37i9dQZF1...,131,playlist,spotify:playlist:37i9dQZF1DWXe9gFZP0gtP
1,False,https://open.spotify.com/playlist/5oiKq5lfsNvr...,https://api.spotify.com/v1/playlists/5oiKq5lfs...,5oiKq5lfsNvrdN7xAy6XsK,"[{'height': 640, 'url': 'https://mosaic.scdn.c...",Místico All time collection,angelus_,https://open.spotify.com/user/angelus_,https://api.spotify.com/v1/users/angelus_,angelus_,user,spotify:user:angelus_,,True,MTQxLGZhNTJkYmU2ZGYxZDk0MTVmYzZjNzJhNzk4YzU2Zm...,https://api.spotify.com/v1/playlists/5oiKq5lfs...,150,playlist,spotify:playlist:5oiKq5lfsNvrdN7xAy6XsK
2,False,https://open.spotify.com/playlist/4UBx2ZfAjPbh...,https://api.spotify.com/v1/playlists/4UBx2ZfAj...,4UBx2ZfAjPbh0jNdvuVhss,"[{'height': 640, 'url': 'https://mosaic.scdn.c...",Chunda chunda guay discotequero,angelus_,https://open.spotify.com/user/angelus_,https://api.spotify.com/v1/users/angelus_,angelus_,user,spotify:user:angelus_,,True,MTMzLGFiNTMzYWU3NGYyMzAxY2RkMjRiNWExMWEzZTFiYj...,https://api.spotify.com/v1/playlists/4UBx2ZfAj...,135,playlist,spotify:playlist:4UBx2ZfAjPbh0jNdvuVhss
3,False,https://open.spotify.com/playlist/3tPFZzhjsRsJ...,https://api.spotify.com/v1/playlists/3tPFZzhjs...,3tPFZzhjsRsJWavnevNwER,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",New Life,angelus_,https://open.spotify.com/user/angelus_,https://api.spotify.com/v1/users/angelus_,angelus_,user,spotify:user:angelus_,,True,MyxkMWNiZWZjZTYyYmM4Yzc1MTNlMmY5YjkwYjY0ZWNlN2...,https://api.spotify.com/v1/playlists/3tPFZzhjs...,2,playlist,spotify:playlist:3tPFZzhjsRsJWavnevNwER
4,False,https://open.spotify.com/playlist/6xa4DYxZ5IO0...,https://api.spotify.com/v1/playlists/6xa4DYxZ5...,6xa4DYxZ5IO09eyKt5be52,"[{'height': 640, 'url': 'https://mosaic.scdn.c...",MBM,angelus_,https://open.spotify.com/user/angelus_,https://api.spotify.com/v1/users/angelus_,angelus_,user,spotify:user:angelus_,,True,NDksZDhmMWRiZDgxNDRjYjRlYzBhMjE3NGU2ZTFiNDQ4YT...,https://api.spotify.com/v1/playlists/6xa4DYxZ5...,186,playlist,spotify:playlist:6xa4DYxZ5IO09eyKt5be52


In [13]:
playlists_df.shape

(493, 19)

In [14]:
playlists_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 493 entries, 0 to 492
Data columns (total 19 columns):
collaborative                  493 non-null bool
external_urls.spotify          493 non-null object
href                           493 non-null object
id                             493 non-null object
images                         493 non-null object
name                           493 non-null object
owner.display_name             434 non-null object
owner.external_urls.spotify    493 non-null object
owner.href                     493 non-null object
owner.id                       493 non-null object
owner.type                     493 non-null object
owner.uri                      493 non-null object
primary_color                  0 non-null object
public                         493 non-null bool
snapshot_id                    493 non-null object
tracks.href                    493 non-null object
tracks.total                   493 non-null int64
type                           493 

In [15]:
#Exporting (uses PATH as constant previously defined):
playlists_export = playlists_df.to_csv('%s/output/spotify_playlists.csv'%PATH, index=False)

## 6. Extract track information

### Defining a function to extract (and clean and flatten) the songs of a playlist:

In [16]:
def track_extractor(df):
    final_df = pd.DataFrame()
    for i in range(len(df)):
        # Uses spotipy's user_playlist_tracks function to get the songs.
        track_list = json_normalize(sp.user_playlist_tracks(user=df['owner.id'][i], playlist_id=df['id'][i]))
        if track_list['items'][0] == []:
            continue
        else:
            tracks_df = json_normalize(track_list['items'][0])
            tracks_df['owner.id'] = df['owner.id'][i]
            tracks_df_clean = tracks_df[['track.album.href', 'track.album.id', 'track.album.name', 
                                   'track.album.release_date', 'track.album.type', 'track.artists', 'track.href',
                                   'track.id', 'track.is_local', 'track.name', 'track.popularity', 'track.type', 'owner.id']]
            final_df = final_df.append(tracks_df_clean)
    return final_df

### Looping the function over playlists_df to create a new dataframe with all the tracks of every playlist:

In [17]:
# Creating the dataframe:
songs_df = track_extractor(playlists_df)

# Resetting songs_df index:
songs_df.reset_index(drop=True, inplace=True)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

### songs_df information:

In [18]:
songs_df.head()

Unnamed: 0,track.album.href,track.album.id,track.album.name,track.album.release_date,track.album.type,track.artists,track.href,track.id,track.is_local,track.name,track.popularity,track.type,owner.id
0,https://api.spotify.com/v1/albums/6hzdQgjn3wTJ...,6hzdQgjn3wTJhOUI4cVKu2,Amelie from Montmartre,2001-04-23,album,[{'external_urls': {'spotify': 'https://open.s...,https://api.spotify.com/v1/tracks/2AkcjsKlRbIB...,2AkcjsKlRbIBYGAgpQVFii,False,"Comptine d'un autre été, l'après-midi",15.0,track,spotify
1,https://api.spotify.com/v1/albums/5Q23RuwpOqhN...,5Q23RuwpOqhN28KtBOdnGp,Tone of Love,2019-03-23,album,[{'external_urls': {'spotify': 'https://open.s...,https://api.spotify.com/v1/tracks/3Z0n8POAcDQC...,3Z0n8POAcDQCSyxA06cqbW,False,Tone of Love,61.0,track,spotify
2,https://api.spotify.com/v1/albums/2rK7rji9Qxaf...,2rK7rji9Qxafhao5PFXtu5,Longing,2019-05-30,album,[{'external_urls': {'spotify': 'https://open.s...,https://api.spotify.com/v1/tracks/1ONolOLYjM97...,1ONolOLYjM977hgtqKzXmJ,False,Longing,56.0,track,spotify
3,https://api.spotify.com/v1/albums/4zojkjd8Enxh...,4zojkjd8EnxhvBQAx1Nd4W,En t'attendant,2011-05-02,album,[{'external_urls': {'spotify': 'https://open.s...,https://api.spotify.com/v1/tracks/2rK5aTaCfPoS...,2rK5aTaCfPoSX0TqAW5YgV,False,Fin,60.0,track,spotify
4,https://api.spotify.com/v1/albums/6AUPQJfBYnyw...,6AUPQJfBYnyw47dZ9V0w89,Florentin,2019-04-03,album,[{'external_urls': {'spotify': 'https://open.s...,https://api.spotify.com/v1/tracks/6lE5ACHHqeEk...,6lE5ACHHqeEk7nLciGJH3P,False,Florentin,57.0,track,spotify


In [19]:
songs_df.shape

(22537, 13)

In [20]:
songs_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22537 entries, 0 to 22536
Data columns (total 13 columns):
track.album.href            22173 non-null object
track.album.id              22173 non-null object
track.album.name            22534 non-null object
track.album.release_date    22173 non-null object
track.album.type            22534 non-null object
track.artists               22534 non-null object
track.href                  22173 non-null object
track.id                    22173 non-null object
track.is_local              22534 non-null object
track.name                  22534 non-null object
track.popularity            22534 non-null float64
track.type                  22534 non-null object
owner.id                    22537 non-null object
dtypes: float64(1), object(12)
memory usage: 2.2+ MB


## 7. Extract artist information:

### Defining a function to extract (by means of flattenning) the artist of the songs_df:

In [21]:
def artist_extractor(item):
    if type(item) == float:
        return []
    else:
        artist_df = json_normalize(item)
        artist_list = [str(artist_df['name'][i]) for i in range(len(artist_df))]
        return artist_list

### Applying the function to the whole 'track.artists' column in order to change the json information for a more readable list of artists:

In [22]:
songs_df['track.artists'] = songs_df['track.artists'].apply(artist_extractor)

## 8. songs_df information, cleaning, and export:

In [23]:
songs_df.head()

Unnamed: 0,track.album.href,track.album.id,track.album.name,track.album.release_date,track.album.type,track.artists,track.href,track.id,track.is_local,track.name,track.popularity,track.type,owner.id
0,https://api.spotify.com/v1/albums/6hzdQgjn3wTJ...,6hzdQgjn3wTJhOUI4cVKu2,Amelie from Montmartre,2001-04-23,album,[Yann Tiersen],https://api.spotify.com/v1/tracks/2AkcjsKlRbIB...,2AkcjsKlRbIBYGAgpQVFii,False,"Comptine d'un autre été, l'après-midi",15.0,track,spotify
1,https://api.spotify.com/v1/albums/5Q23RuwpOqhN...,5Q23RuwpOqhN28KtBOdnGp,Tone of Love,2019-03-23,album,[Charles Bolt],https://api.spotify.com/v1/tracks/3Z0n8POAcDQC...,3Z0n8POAcDQCSyxA06cqbW,False,Tone of Love,61.0,track,spotify
2,https://api.spotify.com/v1/albums/2rK7rji9Qxaf...,2rK7rji9Qxafhao5PFXtu5,Longing,2019-05-30,album,[Joaquin Mans],https://api.spotify.com/v1/tracks/1ONolOLYjM97...,1ONolOLYjM977hgtqKzXmJ,False,Longing,56.0,track,spotify
3,https://api.spotify.com/v1/albums/4zojkjd8Enxh...,4zojkjd8EnxhvBQAx1Nd4W,En t'attendant,2011-05-02,album,[Mélanie Laurent],https://api.spotify.com/v1/tracks/2rK5aTaCfPoS...,2rK5aTaCfPoSX0TqAW5YgV,False,Fin,60.0,track,spotify
4,https://api.spotify.com/v1/albums/6AUPQJfBYnyw...,6AUPQJfBYnyw47dZ9V0w89,Florentin,2019-04-03,album,[Joaquin Alejandro],https://api.spotify.com/v1/tracks/6lE5ACHHqeEk...,6lE5ACHHqeEk7nLciGJH3P,False,Florentin,57.0,track,spotify


In [24]:
songs_df.shape

(22537, 13)

In [25]:
songs_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22537 entries, 0 to 22536
Data columns (total 13 columns):
track.album.href            22173 non-null object
track.album.id              22173 non-null object
track.album.name            22534 non-null object
track.album.release_date    22173 non-null object
track.album.type            22534 non-null object
track.artists               22537 non-null object
track.href                  22173 non-null object
track.id                    22173 non-null object
track.is_local              22534 non-null object
track.name                  22534 non-null object
track.popularity            22534 non-null float64
track.type                  22534 non-null object
owner.id                    22537 non-null object
dtypes: float64(1), object(12)
memory usage: 2.2+ MB


### Deleting all rows that are local songs:

In [26]:
# Excluding all rows where .is_local is True
songs_df = songs_df[songs_df['track.is_local'] == False]

### Rearranging columns:

In [27]:
songs_df = songs_df[['track.popularity', 'track.name', 'track.artists', 
             'track.album.name','track.href', 'track.album.href', 
             'track.id', 'track.album.id', 'track.album.release_date',
             'track.type', 'track.album.type', 'track.is_local', 'owner.id']]

### Renaming columns:

In [28]:
# Replaces colon for underscore to adapt column names to snake case:
snake_cols = [i.replace('.','_') for i in songs_df.columns]

# Changes column names
songs_df.columns = snake_cols

In [29]:
songs_df.shape

(22173, 13)

In [30]:
songs_df.head()

Unnamed: 0,track_popularity,track_name,track_artists,track_album_name,track_href,track_album_href,track_id,track_album_id,track_album_release_date,track_type,track_album_type,track_is_local,owner_id
0,15.0,"Comptine d'un autre été, l'après-midi",[Yann Tiersen],Amelie from Montmartre,https://api.spotify.com/v1/tracks/2AkcjsKlRbIB...,https://api.spotify.com/v1/albums/6hzdQgjn3wTJ...,2AkcjsKlRbIBYGAgpQVFii,6hzdQgjn3wTJhOUI4cVKu2,2001-04-23,track,album,False,spotify
1,61.0,Tone of Love,[Charles Bolt],Tone of Love,https://api.spotify.com/v1/tracks/3Z0n8POAcDQC...,https://api.spotify.com/v1/albums/5Q23RuwpOqhN...,3Z0n8POAcDQCSyxA06cqbW,5Q23RuwpOqhN28KtBOdnGp,2019-03-23,track,album,False,spotify
2,56.0,Longing,[Joaquin Mans],Longing,https://api.spotify.com/v1/tracks/1ONolOLYjM97...,https://api.spotify.com/v1/albums/2rK7rji9Qxaf...,1ONolOLYjM977hgtqKzXmJ,2rK7rji9Qxafhao5PFXtu5,2019-05-30,track,album,False,spotify
3,60.0,Fin,[Mélanie Laurent],En t'attendant,https://api.spotify.com/v1/tracks/2rK5aTaCfPoS...,https://api.spotify.com/v1/albums/4zojkjd8Enxh...,2rK5aTaCfPoSX0TqAW5YgV,4zojkjd8EnxhvBQAx1Nd4W,2011-05-02,track,album,False,spotify
4,57.0,Florentin,[Joaquin Alejandro],Florentin,https://api.spotify.com/v1/tracks/6lE5ACHHqeEk...,https://api.spotify.com/v1/albums/6AUPQJfBYnyw...,6lE5ACHHqeEk7nLciGJH3P,6AUPQJfBYnyw47dZ9V0w89,2019-04-03,track,album,False,spotify


### Dropping duplicates:

In [31]:
songs_df = songs_df.drop_duplicates(subset='track_id')

In [32]:
songs_df.shape

(17443, 13)

### Exporting raw dataframe:

In [33]:
#Exporting:
songs_raw_export = songs_df.to_csv('%s/output/spotify_songs_raw.csv'%PATH, index=False)

### Filtering by owner (deleting songs of followed playlists):

In [34]:
# Excludes all rows where owner.id is not in initial list of friends
songs_filtered = songs_df.loc[songs_df['owner_id'].isin(friends)]

In [35]:
songs_filtered.shape

(10659, 13)

###  TOP creators of playlists

In [36]:
songs_filtered['owner_id'].value_counts()

aechos               1767
popersea             1373
elyp                 1344
pau_ik               1072
angelus_              853
brunocossio           643
dannig012             624
chicareversible       529
42416345k             515
alequijada            388
amane88               386
nestorviolin          359
elsapato              313
marc.vallvebernal     281
jluisite              212
Name: owner_id, dtype: int64

### Exporting filtered dataframe:

In [37]:
#Exporting:
songs_filtered_export = songs_filtered.to_csv('%s/output/spotify_songs_filtered.csv'%PATH, index=False)