In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 

import spotipy
from spotipy import SpotifyOAuth

from sklearn.feature_extraction.text import CountVectorizer

import settings


In [4]:
# Client ID and Client Secret is called and used
spotify_client_id = settings.SPOTIFY_CLIENT_ID
spotify_client_secret = settings.SPOTIFY_CLIENT_SECRET
spotify_redirect_uri = "http://localhost:5000/callback"

In [5]:
# provide scope of access for Python which requests access from the current user
scope =  'user-library-read user-top-read user-read-private playlist-read-private playlist-read-collaborative playlist-modify-public playlist-modify-private'

# authentication manager ensures secured point of access from spotify to python to client (me)
auth_manager = SpotifyOAuth(
                            scope = scope, 
                            client_id = spotify_client_id,
                            client_secret = spotify_client_secret,
                            redirect_uri = spotify_redirect_uri
                            )

# initial call of spotify using authentication manager
sp = spotipy.Spotify(auth_manager = auth_manager)

In [6]:
liked_songs = []
offset = 0
users_liked = sp.current_user_saved_tracks(limit = 50)

while users_liked["next"] != None:
    liked_songs.extend(users_liked["items"])
    offset+=50
    users_liked = sp.current_user_saved_tracks(offset=offset,limit = 50)

liked_songs.extend(users_liked["items"])

In [None]:
len(liked_songs)

In [8]:
df = pd.DataFrame(list(map(lambda x: x["track"],liked_songs)))

In [9]:
unique_artists = df['artists'].apply(lambda x: x[0]['id']).drop_duplicates().reset_index(drop = True).tolist()

In [10]:
# sp.artists(unique_artists[:50])

In [None]:
len(unique_artists)

In [12]:
unique_artists_copy = unique_artists
last = len(unique_artists)%50

artists = [sp.artists(unique_artists[x:x+50])['artists'] if len(unique_artists)-x >= 50 else sp.artists(unique_artists[-last:]) for x in range(0,len(unique_artists),50)]



In [None]:
len(artists)

In [14]:
collapse_artist = [item for sublist in artists for item in sublist]
collapse_artist.extend(sp.artists(unique_artists[-last:])['artists'])

In [15]:
collapse_artist_notnull = list(map(lambda x : x if type(x) is dict else None, collapse_artist))
artists = [i for i in collapse_artist_notnull if i is not None]

In [16]:
artists_df = pd.DataFrame(artists)

In [None]:
artists_df.columns

In [18]:
unique_genres = artists_df[artists_df['genres'].apply(lambda x: len(x) > 0)]['genres'].explode().drop_duplicates().tolist()

In [19]:
# CountVectorizer(input =  vocabulary=unique_genres)

artists_df['total_followers'] = artists_df['followers'].apply(lambda x: x['total'])

In [20]:
artists_df['cnt_vectorizer'] = artists_df['genres'].apply(lambda x: CountVectorizer(input=x, vocabulary=unique_genres))

In [None]:
unique_genres

In [None]:
artists_df['genres'].apply(lambda x: CountVectorizer(vocabulary=unique_genres).fit_transform(x))[0]

In [None]:
artists_df[artists_df['genres'].apply(lambda x: len(x) >= 1)]


In [23]:
remainder = len(df['id'].tolist())%100

round(len(df['id'].tolist())/100)

audio_features = [sp.audio_features(x) for x in np.array_split(df['id'].tolist(),round(len(df['id'].tolist())/100)+1)]

In [24]:
audio_features = [item for row in audio_features for item in row]

In [25]:
features = pd.DataFrame(audio_features)

In [26]:
combined_df = df.merge(features, on = 'id')

In [None]:
combined_df.columns

In [28]:
combined_df['num_artists'] = combined_df['artists'].apply(lambda x: len(x))

In [53]:
# round(combined_df['num_artists']/combined_df['popularity'], 7)

combined_df['popularity_norm'] = combined_df['popularity'].apply(lambda x: x/100)

In [None]:
combined_df[combined_df['popularity']>0].groupby(['popularity_norm','num_artists']).agg({'popularity_norm':'mean', 'num_artists':'count'})

In [None]:
import datetime as dt
current_date = pd.to_datetime(dt.datetime.now()).date().isoformat()


In [None]:
## works 
# combined_df['album'].apply(lambda x: (pd.to_datetime(current_date)-pd.to_datetime(x['release_date'])).days)

combined_df['album'].apply(lambda x: x['release_date_precision'])

In [None]:
set([item for row in list(map(str.split, unique_genres)) for item in row])

In [187]:
# combine[artists_df['genres'].apply(lambda x: len(x)>= 1)][['genres', 'id', 'artists']]
combined_df.id

# artists_df['artists_id'] = artists_df['external_urls'].apply(lambda x: x['spotify'][32:])

# print(len('https://open.spotify.com/artist/'))

del artists_df['artists_id']

In [None]:
artists_df.columns

In [None]:
combined_df['artists'].apply(lambda x : [i['id'] for i in x ])
combined_df['artists'].apply(lambda x : [i['name'] for i in x ])