In [1]:
import pandas as pd
import json
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials, SpotifyOAuth
import spotipy.util as util
import sys
import time
import numpy as np
from collections import defaultdict
from credential import secrets
from sklearn.preprocessing import StandardScaler, MinMaxScaler

In [2]:
#Save credential from .py field
# The two first are essential for extracting general information
SPOTIPY_CLIENT_ID=secrets.get('SPOTIPY_CLIENT_ID')
SPOTIPY_CLIENT_SECRET=secrets.get('SPOTIPY_CLIENT_SECRET')
#The indirect URI is used to extarct information from a personal account with more detail (ex: user_top_tracks)
SPOTIPY_REDIRECT_URI =secrets.get('SPOTIPY_REDIRECT_URI')
username =secrets.get('username')

In [3]:
auth_manager = SpotifyClientCredentials(client_id = SPOTIPY_CLIENT_ID, client_secret = SPOTIPY_CLIENT_SECRET)
sp = spotipy.Spotify(auth_manager=auth_manager)

In [4]:
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(client_id=SPOTIPY_CLIENT_ID,
                                               client_secret=SPOTIPY_CLIENT_SECRET,
                                               redirect_uri=SPOTIPY_REDIRECT_URI))

In [136]:
# The scope is what determines the request we can execute. In this case the I'm using the "user-top-read" 
# because I want to extract the songs I've listened to the most
scope = 'user-top-read'
#Another peculiarity of this request is that it requires a token, unlike the general requests we can execute
# with basic credentials
token = util.prompt_for_user_token(username,scope,client_id=SPOTIPY_CLIENT_ID
                           ,client_secret=SPOTIPY_CLIENT_SECRET
                           ,redirect_uri=SPOTIPY_REDIRECT_URI)

sp = spotipy.Spotify(auth=token)
results = sp.current_user_top_tracks()
tracks = results['items']

while results['next']:
    results = sp.next(results)
    tracks.extend(results['items'])

my_top_songs = pd.DataFrame()
my_top_songs['track'] = [tracks[item]['name'] for item in range(0, len(tracks))]
my_top_songs['id'] = [tracks[item]['id'] for item in range(0, len(tracks))] 
my_top_songs['ranking'] = list(range(1,len(my_top_songs)+1))

Enter the URL you were redirected to: https://jperezllo.com/callback/?code=AQCHKYCF3AtQkXtckG7n6g0TyMhigCdndU9JitQr8YWaUO8TT5lP9ACcv9ZHEi_wopzBhcfX2iKu6Dy25ony7CuP275l-lbtysKp8xBJZ7MWxgUqE7sg22oXIrPlBe3o7LelC3eeCkjnQPLzfCoWd5_QWrLlVQJFZWyTO255QMzy07s5JVRsCoydv4SX4eZuZ0k


In [138]:
my_top_songs.to_csv("data\\csv\\my_top_songs.csv")

### Downloading Json files

In [9]:
history_0 = pd.read_json('data\\json\\StreamingHistory0.json')

In [10]:
history_1 = pd.read_json('data\\json\\StreamingHistory1.json')

In [11]:
my_streaming_history = pd.concat([history_0, history_1], axis = 0)

In [12]:
my_streaming_history.to_csv('data\\csv\\my_streaming_history.csv')

In [13]:
search_queries = pd.read_json('data\\json\\SearchQueries.json')

In [14]:
data = pd.read_csv('data\\csv\\tracks.csv')

In [216]:
images = pd.read_csv('data\\csv\\track_images.csv')

### Getting all artists id on my JP playlist

https://stackoverflow.com/questions/39086287/spotipy-how-to-read-more-than-100-tracks-from-a-playlist

In [39]:
scope = "playlist-read-private"
token = util.prompt_for_user_token("perezllo",scope,client_id=SPOTIPY_CLIENT_ID
                               ,client_secret=SPOTIPY_CLIENT_SECRET
                               ,redirect_uri=SPOTIPY_REDIRECT_URI)

sp = spotipy.Spotify(auth=token)
results = sp.current_user_playlists(limit=50)
playlists = results['items']
ignore_playlists = ['This Is RIOPY', '2022', 'Sons Of The East Radio',
                'Life Is  Wonderful', 'Acoustic Chill', 'Acoustic Pop Hits','Discover Weekly',
                    'Life Is Wonderful','Dance Music','Indie Folk Chill','Heart Beats','Disney','Te acuerdas?',
                   'con V de','Bad liar']

while results['next']:
    results = sp.next(results)
    playlists.extend(results['items'])
    my_playlists = [playlist for playlist in my_playlists if playlist not in ignore_playlists]

my_playlists_id = [playlists[item]['id'] for item in range(0, len(playlists))]
my_playlists_name =[playlists[item]['name'] for item in range(0, len(playlists))]
my_playlists = pd.DataFrame({"playlist":my_playlists_name, "playlist_id":my_playlists_id})
my_playlists = my_playlists[~my_playlists['playlist'].isin(ignore_playlists)]

In [208]:
my_playlists.to_csv("data\\csv\\my_playlists.csv")

### Extract my "Liked Songs" => could be included in the "House" playlist because of the style

In [85]:
scope = "user-library-read"

token = util.prompt_for_user_token(username,scope,client_id=SPOTIPY_CLIENT_ID
                           ,client_secret=SPOTIPY_CLIENT_SECRET
                           ,redirect_uri=SPOTIPY_REDIRECT_URI)

sp = spotipy.Spotify(auth=token)
results = sp.current_user_saved_tracks()
tracks = results['items']

while results['next']:
    results = sp.next(results)
    tracks.extend(results['items'])

my_liked_songs = pd.DataFrame()
my_liked_songs['track'] = [tracks[item]['track']['name'] for item in range(0, len(tracks))]
my_liked_songs['id'] = [tracks[item]['track']['id'] for item in range(0, len(tracks))] 
my_liked_songs['playlist_id'] = "my liked songs"
liked_tracks_info = {}
for track_id in my_liked_songs['id']:
    audio_features = get_track_features(track_id)
    liked_tracks_info[track_id] = audio_features
my_liked_results = pd.DataFrame.from_dict(liked_tracks_info, orient='index')
my_liked_results = my_liked_results.reset_index().rename(columns={'index':'id'})
my_liked_results = my_liked_songs.merge(my_liked_results, how='left')

Enter the URL you were redirected to: https://jperezllo.com/callback/?code=AQDxDSfKcdvNiAH8phd2a9RtBoCR7rlfqNI-AMcB4_xQnSiT3nHtr13mihqXXNCf9jMnVUzf01xyMD3HF9rJ1bnf7pYnH3ICyXE5IfHAAfU_He6N3D_Iu9FkB8mx3PEGZgba_4mP7xwHHbRgnMKye7nT2FMQ3kAPll9Zh7xdIzuguaxh2keBxmnTtx33xoQrPfkMCkT1


### Functions to extract all information from tracks

In [100]:
#def get_playlists(username):
   # my_playlists = sp.user_playlists('perezllo')['items']
   # playlist_id = []
   # for number in range(0, len(my_playlists)):
        #playlist_id[my_playlists[number]['name']] = my_playlists[number]['id']
        #playlist_id.append(my_playlists[number]['id'])
    #return playlist_id
    
scope = 'playlist-read-private'
# Function to extract all my playlists
def get_playlists(username):
    token = util.prompt_for_user_token(username,scope,client_id=SPOTIPY_CLIENT_ID
                               ,client_secret=SPOTIPY_CLIENT_SECRET
                               ,redirect_uri=SPOTIPY_REDIRECT_URI)

    sp = spotipy.Spotify(auth=token)
    results = sp.current_user_playlists(limit=50)
    playlists = results['items']
    ignore_playlists = ['This Is RIOPY', '2022', 'Sons Of The East Radio',
                'Life Is  Wonderful', 'Acoustic Chill', 'Acoustic Pop Hits','Discover Weekly',
                    'Life Is Wonderful','Dance Music','Indie Folk Chill','Heart Beats','Disney','Te acuerdas?',
                   'con V de','Bad liar']
    
    while results['next']:
        results = sp.next(results)
        playlists.extend(results['items'])
        my_playlists = [playlist for playlist in my_playlists if playlist not in ignore_playlists]
        
    my_playlists_id = [playlists[item]['id'] for item in range(0, len(playlists))]
    my_playlists_name =[playlists[item]['name'] for item in range(0, len(playlists))]
    my_playlists = pd.DataFrame({"playlist":my_playlists_name, "playlist_id":my_playlists_id})
    my_playlists = my_playlists[~my_playlists['playlist'].isin(ignore_playlists)]
    return my_playlists

In [75]:
#Once I have my playlsits and their ID I can extract the tracks id that comprise them
def playlist_tracks(playlist_id):
    #I first need to get the playlsit tracks using sp.playlist_tracks(playlist_id) which will return a dicitionary
    results = sp.playlist_tracks(playlist_id)
    #In the "items" key we have all the tracks stored, and that what needs to be used
    tracks = results['items']
    #I want to create a dicitonary with the name of the track as the key and its ID as the value to then create a DF
    track_info = {}
    #Spotipy has a limit of 20 results per request, but this can be solved by using the "Next" key inside the dicitionary.
    # This will allow us to keep extarcting results until we have all of them
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    for item in range(0, len(tracks)):
        track_info[tracks[item]['track']['name']]={"id":tracks[item]['track']['id']}
    return track_info

In [76]:
def get_track_features(track_id):
    audio_features =  sp.audio_features(track_id)
    audio_features = audio_features[0]
    #Remove unnecessary fields 
    keys_to_remove =["type", "uri", "track_href","analysis_url", "time_signature","id"]
    for key in keys_to_remove:
        try: 
            audio_features.pop(key, None)
        except:
            pass
    #Add track_name as the key and create another dicitonary where to store all the information, startin with the ID
    try:
        #Add audio features to the dicitionary containing the information of each track
        return audio_features
    except:
        pass

In [77]:
#Onec I have my playlsits and their ID I can extract the tracks id that comprise them
def get_playlist_tracks(playlist_id):
    #I first need to get the playlsit tracks using sp.playlist_tracks(playlist_id) which will return a dicitionary
    results = sp.playlist_tracks(playlist_id)
    #In the "items" key we have all the tracks stored, and that what needs to be used
    tracks = results['items']
    #I want to create a dicitonary with the name of the track as the key and its ID as the value to then create a DF
    track_info = {}
    #Spotipy has a limit of 20 results per request, but this can be solved by using the "Next" key inside the dicitionary.
    # This will allow us to keep extarcting results until we have all of them
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
     #I also want to extract the audio characteristics of each song, information I can get using sp.audio_features(track_id)
    
    #I have all the tracks stored in the tracks list and I can iterate through it to get what I need. 
    # I use a range in the for loop beacuse I need to iterate thorugh all items in the dicitionary
    for item in range(0, len(tracks)):
        #I extract the track_id
        track_id = tracks[item]['track']['id']
        audio_features = get_track_features(track_id)
        #Add track_name as the key and create another dicitonary where to store all the information, startin with the ID
        track_info[tracks[item]['track']['name']] = {"id":track_id ,"playlist_id":playlist_id}
        try:
            #Add audio features to the dicitionary containing the information of each track
            track_info[tracks[item]['track']['name']].update(audio_features)
        except:
            pass
    return  track_info


In [36]:
##Onec I have my playlsits and their ID I can extract the tracks id that comprise them
#def get_playlist_tracks(playlist_id):
#    #I first need to get the playlsit tracks using sp.playlist_tracks(playlist_id) which will return a dicitionary
#    results = sp.playlist_tracks(playlist_id)
#    #In the "items" key we have all the tracks stored, and that what needs to be used
#    tracks = results['items']
#    #I want to create a dicitonary with the name of the track as the key and its ID as the value to then create a DF
#    track_info = {}
#    #Spotipy has a limit of 20 results per request, but this can be solved by using the "Next" key inside the dicitionary.
#    # This will allow us to keep extarcting results until we have all of them
#    while results['next']:
#        results = sp.next(results)
#        tracks.extend(results['items'])
#     #I also want to extract the audio characteristics of each song, information I can get using sp.audio_features(track_id)
#    
#    #I have all the tracks stored in the tracks list and I can iterate through it to get what I need. 
#    # I use a range in the for loop beacuse I need to iterate thorugh all items in the dicitionary
#    for item in range(0, len(tracks)):
#        #I extract the track_id
#        track_id = tracks[item]['track']['id']
#        #sp.audio_features will return a dicitonary with some unnecessary information than can be discarded
#        keys_to_remove =["type", "uri", "track_href","analysis_url", "time_signature","id"]
#        #Applying sp.audio_features to each track
#        audio_features =  sp.audio_features(track_id)
#        audio_features = audio_features[0]
#        #Remove unnecessary fields 
#        for key in keys_to_remove:
#            try: 
#                audio_features.pop(key, None)
#            except:
#                pass
#        #Add track_name as the key and create another dicitonary where to store all the information, startin with the ID
#        track_info[tracks[item]['track']['name']] = {"id":track_id ,"playlist_id":playlist_id}
#        try:
#            #Add audio features to the dicitionary containing the information of each track
#            track_info[tracks[item]['track']['name']].update(audio_features)
#        except:
#            pass
#    return  track_info
#

In [101]:
def get_all_track_info(username):
    playlists = get_playlists(username)
    full_tracks = {}
    for playlist_id in playlists['playlist_id']: 
        full_tracks.update(get_playlist_tracks(playlist_id))
    results = pd.DataFrame.from_dict(full_tracks, orient='index')
    pd.concat([results,my_liked_results],axis=1,ignore_index=True)
    return results
        
        

In [102]:
tracks=get_all_track_info('perezllo')

Enter the URL you were redirected to: https://jperezllo.com/callback/?code=AQA-BpbLF7eTJRZfCUsT3XjTJnYTT36AEjogCxe2j34laUa4dxNb5_RwOLp319-d9i_6BbTBPoblLOzIbsvxpJ6r0SRzo7iVw7zIU0iWJIabQQIUj__2dXUccRkwp7kQbq2PXiJ96vBuq9iRiG11mDKCHFn3mbB31ZQi4anL232l03mYrF8pplHvi5-nPss2_GVC_FgNT2VcPg


In [115]:
tracks_combined = tracks.reset_index().rename(columns={'index':'track'})

In [116]:
tracks_combined = pd.concat([tracks_new,my_liked_results])

In [217]:
#tracks = tracks.reset_index()

In [119]:
scaler = MinMaxScaler()
values_to_normalize = tracks_combined['tempo']*-1
values_to_normalize_2d = values_to_normalize.values.reshape(-1,1)
normalized_column = scaler.fit_transform(values_to_normalize_2d)

In [118]:
tracks_combined['tempo_normalized'] = normalized_column

In [120]:
tracks_combined['loudness_normalized'] = normalized_column

In [125]:
tracks_normalized = tracks_combined.drop(columns={'tempo','loudness'})

In [126]:
tracks_normalized.to_csv("data\\csv\\tracks_normalized")

In [127]:
audio_features = ['danceability','energy','key','mode','speechiness','acousticness','instrumentalness',
                 'liveness','valence','tempo_normalized','loudness_normalized']

In [132]:
tracks_normalized_unpivot = pd.melt(tracks_normalized, id_vars=['track','id','playlist_id'
                                         ,'mode','key'],value_vars=audio_features)

In [133]:
tracks_normalized_unpivot.to_csv("data\\csv\\tracks_normalized_unpivot.csv",index=False)

### I want to have a dataframe with only the general information of the tracks without the musical characteristics

In [149]:
tracks_general = tracks_combined[['track','id','playlist_id','duration_ms']]

In [150]:
tracks_general.to_csv("data\\csv\\tracks_general.csv")

### Getting artists information

In [None]:
#The lat piece information I need for my analysis is related to the artists of my tracks
#I'm going to create an empty dictionary using the defaultdict() class from the collections module.
#This will help keeping all results and not overwrite exisiting ones because one track cna have more than one artist I want
#to get all of them
track_genres = defaultdict(list)
#I start by using the 'id' column from my recently created dataframe
for track_id in data['id']:
    #We get the basic information from each artists involved with the track.
    #Important! If a track has more that one artists involved the results will be returned in a list of dicitonaries
    #so I need to use a for loop to iterate through the entire list
    time.sleep(3)
    track_details = sp.track(track_id)
    artists = track_details['artists']
   # try:
   #     image = track_details['album']['images'][0]['url']
   # except:
       # pass
    #Again, I use a range to make sure I iterate through alll items(artists) in the list
    for artist in range(0, len(artists)):
        #Extract the id 
        artist_id = artists[artist]['id']
        artist_image = artists[artist][image][0][url]
        #Extract all information froma artist using sp.artist(artist_id)
        artist_info = sp.artist(artist_id)
        #Fill track_genres dictionary with the artists information inside a dicitonary as the value and the track_id as the key
        track_genres[track_id].append({'artist':artist_info['name'],
                                            'artist_id': artist_id,
                                            'genres':artist_info['genres'],
                                            'track_id': track_id,
                                            'popularity':artist_info['popularity']})

In [47]:
a = artists.head()

### Extract the image url of each song -> this will be used to display the image in the dashboard

In [33]:
track_images = {}
for track_id in tracks['id']:
    try:
        images = sp.track(track_id)['album']['images'][0]['url']
        track_images[track_id] = images
    except:
        pass

In [230]:
tracks['image_url'] = tracks.id.map(track_images)

### Like I did with the tracks, I want to extract the image url of the artists

In [63]:
artists_image = {}
for artist_id in artists['artist_id'].unique():
    try:
        artist_img = sp.artist(artist_id)['images'][0]['url']
    except: 
        pass
    artists_image[artist_id] = artist_img 

In [67]:
artists['image']=artists.artist_id.map(artists_image)

In [69]:
artists.to_csv('data\\csv\\artists.csv')

### Mergeing playlists and imag_url for the Imgur step -> pending revisions

In [134]:
my_streaming_history = pd.read_csv("data\\csv\\my_streaming_history.csv")

In [139]:
my_top_songs = pd.read_csv("data\\csv\\my_top_songs.csv")

In [140]:
my_streaming_history.rename(columns={'trackName':'track'},inplace=True)

In [144]:
my_streaming_history_songs_agg = my_streaming_history.groupby('track').agg({'msPlayed':'sum'})

In [145]:
my_streaming_history_songs_agg = my_streaming_history_songs_agg.reset_index()

In [146]:
my_streaming_history_artists_agg = my_streaming_history.groupby('artistName').agg({'msPlayed':'sum'})

In [147]:
my_streaming_history_artists_agg = my_streaming_history_artists_agg.reset_index()

In [151]:
tracks_complete = my_streaming_history_songs_agg.merge(tracks_general, how='left')

In [152]:
tracks_complete = tracks_complete.merge(my_top_songs, how='left')

In [155]:
my_streaming_history_songs_agg

Unnamed: 0,track,msPlayed
0,#198 - A contracorriente,167530
1,#210 - Buffett y el manual,655680
2,"#338 – Chamath Palihapitiya: Money, Success, S...",9981298
3,#341 – Guido van Rossum: Python and the Future...,1673582
4,"#345 – Coffeezilla: SBF, FTX, Fraud, Scams, Fa...",5743
...,...,...
4134,超速攻,347942
4135,進化,23634
4136,雑食,159026
4137,頂の景色,91973


In [121]:
artists = pd.read_csv("data\\csv\\artists.csv")

In [111]:
artists.drop(columns={"Unnamed: 0.1", "Unnamed: 0"},inplace=True)

In [119]:
artists = artists.rename(columns={"image":"image_url"})

In [120]:
artists.to_csv("data\\csv\\artists.csv")

In [271]:
track_images = pd.DataFrame(list(track_images.items()), columns =['id','image_url'])

In [272]:
track_images.to_csv('data\\track_images.csv')

In [24]:
data = pd.read_csv('data\\tracks_unpivot.csv')

In [51]:
data_t = pd.read_csv('data\\tracks_unpivot.csv')

In [115]:
artists.to_csv('data\\csv\\artists.csv',index=False)

In [236]:
a = tracks_general.merge(my_streaming_history_agg, how='left')

In [240]:
my_streaming_history[my_streaming_history.track=='A Sky Full of Stars - Hardwell Remix']

Unnamed: 0.1,Unnamed: 0,endTime,artistName,track,msPlayed


In [241]:
a[a.msPlayed.isnull()].head(50)

Unnamed: 0,track,id,playlist_id,duration_ms,image_url,artistName,msPlayed
1,The Best Part,04BNQFaBlFF7WDeTb1V0AP,37i9dQZF1DWSRc3WJklgBs,250000.0,https://i.scdn.co/image/ab67616d0000b2731ef333...,,
3,Way Back Home,2ZpL0ykp4gIyMxHam6vi6G,37i9dQZF1DWSRc3WJklgBs,164056.0,https://i.scdn.co/image/ab67616d0000b273d0b0c7...,,
6,State Of My Heart,2wPG888F5AMkxILKc9Pc3Q,37i9dQZF1DWSRc3WJklgBs,176123.0,https://i.scdn.co/image/ab67616d0000b273e88c8d...,,
7,Float Away,03P8OuXYbTy6dCWq007z1q,37i9dQZF1DWSRc3WJklgBs,200894.0,https://i.scdn.co/image/ab67616d0000b27377c9d2...,,
8,Inner Light,1FXlxs2DGHGlDx5ggf00hn,37i9dQZF1DWSRc3WJklgBs,208996.0,https://i.scdn.co/image/ab67616d0000b27347703e...,,
11,Heart,3Ezg9PfTvr7ESXXLtHNuuT,35oLMiAK4VM2T0ek3PTPIc,156779.0,https://i.scdn.co/image/ab67616d0000b273ebfc5e...,,
13,Moving On,7r1tJrG00pgUbVioN1hbzf,35oLMiAK4VM2T0ek3PTPIc,206090.0,https://i.scdn.co/image/ab67616d0000b2738d85c2...,,
15,IT'S YOU - ARMNHMR Remix,6EmRDtmAq556gtaj0rOjsW,37i9dQZF1DWSRc3WJklgBs,176540.0,https://i.scdn.co/image/ab67616d0000b2732b3c93...,,
20,Somebody To Love,2KTO4mRnARHCqXhV8f6MBj,37i9dQZF1DWSRc3WJklgBs,175385.0,https://i.scdn.co/image/ab67616d0000b273a85bbc...,,
22,Wild Feelings,4s4odJtCP8YlXssj1VYrp6,37i9dQZF1DWSRc3WJklgBs,161634.0,https://i.scdn.co/image/ab67616d0000b2735e2ec9...,,


## Obtaining valid URL - Scraping Imgur

In [74]:
import pandas as pd
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support import expected_conditions as ec
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
import regex as re

In [76]:
from credential import imgur_credentials as imgur
imgur_username = 'pello13'
imgur_password = 'laZenia2021!!'

In [39]:
image = pd.read_csv('data\\track_images.csv')

### Aggregate my_streaming_history to obtain that total seconds listened to each song

In [96]:
top_tracks = data_playlists[(~data_playlists.ranking.isnull()) & (data_playlists.playlist !='Gym')]

In [97]:
top_tracks = top_tracks[['track','id','playlist','artistName','msPlayed','image_url','ranking']]

### My gym playlist does not contains many songs that I actually listen to, so I'm going to exclude them

In [117]:
def extract_imgur_url(df):
    #track_image = pd.DataFrame(columns=['track', 'image_url'])
    #track_image['track'] = df.track
    PATH = 'C:\Program Files (x86)\chromedriver.exe'
    target_url = "https://imgur.com"
    driver=webdriver.Chrome(PATH)
    wait = WebDriverWait(driver, 20)
    driver.get(target_url)
    #Find sign in button and click on it
    sign_in = driver.find_element(By.CLASS_NAME, 'Navbar-signin')
    sign_in.click()

    #Send user credentials (username and password) and click on SIGN IN
    driver.find_element(By.XPATH, '/html/body/div[5]/div[3]/form/div[1]/input[1]').send_keys(imgur_username)
    driver.find_element(By.XPATH, '/html/body/div[5]/div[3]/form/div[1]/p/input').send_keys(imgur_password)
    sign_in = driver.find_element(By.XPATH, '/html/body/div[5]/div[3]/form/div[2]/button').click()

    #Open dropdown menu from my profile and enter the Images section
    driver.find_element(By.XPATH, '/html/body/div/div/div[1]/div/div[1]/div[3]/div[5]/div/div[2]/div/div[1]/span[2]').click()
    driver.find_element(By.XPATH, '/html/body/div/div/div[1]/div/div[1]/div[3]/div[5]/div/div[2]/div/div[2]/div/div[2]/a[4]').click()

    #Upload every image_url on my dataset 
    for image_url in df['image_url']:
        time.sleep(12)
        driver.find_element(By.XPATH,'/html/body/div[7]/div[2]/div[1]/div[1]').click()
        driver.find_element(By.XPATH, '/html/body/div[6]/div/span[1]/div/div/div[2]/div[3]/input').send_keys(image_url)
    urls = []

    #Extract new_url from imgur
    for number in reversed(range(1,len(df['image_url'])+1)):
        time.sleep(7)
        urls.append(re.search(r'\/\/i.imgur.com\/\w*.jpg',driver.find_element(By.XPATH, '/html/body/div[7]/div[1]/div/div[5]/div[1]/div[1]/div[{number}]'.format(number=number)).get_attribute("style")).group())
    df['imgur_image'] = urls
    return df

In [129]:
single_artists = artists[['artist','image_url','track_id']].drop_duplicates(keep='first')

In [137]:
tracks_unpivot

Unnamed: 0,track,id,playlist_id,playlist,mode,key,variable,value
0,,4X66U6SUjvZqiKYIpCWXnq,37i9dQZF1DWSRc3WJklgBs,,1.0,4.0,danceability,0.363000
1,,04BNQFaBlFF7WDeTb1V0AP,37i9dQZF1DWSRc3WJklgBs,,1.0,4.0,danceability,0.688000
2,,7HGHhLbAklVxGkYdwyeVML,5U2MYFAcQ3yT7XbHYgGPmb,House,0.0,1.0,danceability,0.718000
3,,2ZpL0ykp4gIyMxHam6vi6G,37i9dQZF1DWSRc3WJklgBs,,1.0,9.0,danceability,0.634000
4,,793OuR8rJ0zpVp708ONAxO,37i9dQZF1DWSRc3WJklgBs,,1.0,10.0,danceability,0.651000
...,...,...,...,...,...,...,...,...
28912,,38ec90xq2fKFMRJJahgnYX,5U2MYFAcQ3yT7XbHYgGPmb,House,1.0,5.0,loudness_normalized,0.232753
28913,,1t0Jmqg1pKVBbxjQFZebeR,5U2MYFAcQ3yT7XbHYgGPmb,House,0.0,5.0,loudness_normalized,0.194616
28914,,3By9IFm4E2BvKQygT04O8g,5U2MYFAcQ3yT7XbHYgGPmb,House,1.0,2.0,loudness_normalized,0.145688
28915,,530B0PHyIWouZrZ6dUWlPK,5U2MYFAcQ3yT7XbHYgGPmb,House,0.0,11.0,loudness_normalized,0.129317


In [131]:
single_artists = extract_imgur_url(single_artists)

  driver=webdriver.Chrome(PATH)


KeyboardInterrupt: 

In [103]:
most_listened_url.to_csv('data\\csv\\most_listened_to_url.csv')

## MySQL connection

In [12]:
import pandas as pd
import mysql.connector
import pymysql
from sqlalchemy import create_engine
from mysql.connector import Error

In [13]:
from credential import sql_credentials

In [14]:
host_name = sql_credentials['hostname']
user_name = sql_credentials['username']
password = sql_credentials['password']

In [15]:
connection = mysql.connector.connect(
host=host_name,
user=user_name,
passwd=password)

engine = create_engine("mysql+pymysql://{user}:{pw}@localhost/spotify_project"
                       .format(user="root",
                               pw=password))

In [9]:
data = pd.read_csv('data\\tracks.csv')

In [24]:
artists = pd.read_csv('data\\artists.csv')

In [26]:
value_normalized = pd.read_csv('data\\tracks_unpivot_normalized.csv')

In [27]:
value_normalized.to_sql('audio_features_normalized',con=engine,if_exists='append',index=False)

25929

In [44]:
my_top_songs.to_sql('top_songs',con=engine,if_exists='append',index=False)

60

In [17]:
track_images.to_sql('track_images', con=engine, if_exists='append', index=False)

2880

#### Returning names of related artiststs

In [30]:
data.playlist.unique()

array(['Indie Folk Chill', 'Acoustic Pop Hits', "Chillin'",
       'Chillout 2023', 'Acoustic Chill', 'Sons Of The East Radio',
       'House', 'JP', '2022', 'This Is RIOPY', 'Life Is Wonderful',
       'para cantar', 'Discover Weekly', 'Dance Music', 'Chill-up',
       'Disney', 'OST', 'Gym'], dtype=object)

In [None]:
def get_related_artists(artist_id):
    results = sp.artist_related_artists(artist_id)
    artists = results['artists']
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
        artists_id = [item['track']['artists'][0]['id'] for item in tracks]
    return artists_id

In [64]:
related_artists = sp.artist_related_artists('2hazSY4Ef3aB9ATXW7F5w3')

In [206]:
def related_artists_id(artists_id_list):
    # for artist_id in artist_id_list:
    related_artists = []
    related_artists_id = []
    
    for artist_id in artists_id_list: 
        related_artists.append(sp.artist_related_artists(artist_id))
    
    for n_1 in range(0, len(related_artists)):
        for n_2 in range(0, len(related_artists[n_1]['artists'])):
            if related_artists[n_1]['artists'][n_2]['name'] not in related_artists_id: 
                related_artists_id.append(related_artists[n_1]['artists'][n_2]['id'])
            else: 
                pass
            
            
    return related_artists_id
        
  #  return len(related_artists) # related_artists_id
                

In [207]:
related_artists_id = related_artists_id(artist_id)