In [54]:
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import requests
import pickle
from sklearn.preprocessing import StandardScaler


In [9]:
# 2. find url and store it in a variable
url = "https://www.billboard.com/charts/hot-100"
# 3. download html with a get request
response = requests.get(url)
# 200 status code means OK!
response

<Response [200]>

In [10]:
#Spotify
import spotipy 
from spotipy.oauth2 import SpotifyClientCredentials

#Initialize SpotiPy with user credentias
spotify = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials(
    client_id='82c7549d6f7e4453871ce606e4752b70',
    client_secret='785b5199900e4ea9a1fa042790cd2ae4'
    ))

In [11]:
hot100= BeautifulSoup (response.content,"html.parser")

In [12]:
#Own made functions
def read_text(select,attribute=0):
    text=[]
    for tag in select:
        if attribute==0:
            text.append(tag.get_text())
        else:
            text.append(tag[attribute])
    return text

                    


### Project: Scrape https://www.billboard.com/charts/hot-100 and get a dataframe with the name of the songs, the artists (and, possibly, more info).


In [13]:
title=read_text(hot100.select("span.chart-element__information__song.text--truncate.color--primary"))

In [14]:
artists=read_text(hot100.select("span.chart-element__information__artist.text--truncate.color--secondary"))

In [15]:
last_week=read_text(hot100.select("span.chart-element__meta.text--center.color--secondary.text--last"))

In [16]:
peak_rank=read_text(hot100.select("span.chart-element__information__delta__text.text--peak"))

In [17]:
weeks_on=read_text(hot100.select("span.chart-element__information__delta__text.text--week"))

In [18]:
billboard_df=pd.DataFrame({"Song title":title,
                           "Artists involved":artists,
                           "Rank Last Week":last_week,
                           "Peak Rank": peak_rank,
                           "Weeks on": weeks_on 
                          })

In [19]:
billboard_df.head(5)


Unnamed: 0,Song title,Artists involved,Rank Last Week,Peak Rank,Weeks on
0,Positions,Ariana Grande,-,1 Peak Rank,1 Weeks on Chart
1,Forever After All,Luke Combs,-,2 Peak Rank,1 Weeks on Chart
2,Mood,24kGoldn Featuring iann dior,1,1 Peak Rank,12 Weeks on Chart
3,Laugh Now Cry Later,Drake Featuring Lil Durk,3,2 Peak Rank,11 Weeks on Chart
4,Blinding Lights,The Weeknd,4,1 Peak Rank,48 Weeks on Chart


In [20]:
#Find similarity between input strings and DF in case the human writes the song wrong or unfinished
def input_song(df):
    song=""   
    song=(input("Input one of your favourite songs "))
    ret_song=[]
        
    if song.lower() in df.str.lower().values : 
        ret_song.append(True)
    else:
        ret_song.append(False)
    
    ret_song.append(song)
        
    return ret_song

In [21]:
def random_song(df):
    
    new_song=df["Song title"].sample()
    song_artist=df["Artists involved"][df["Song title"]==list(new_song)[0]]
           
    return new_song , song_artist

In [22]:
def get_playlist_tracks(username,playlist_id):
    results = spotify.user_playlist_tracks(username,playlist_id)
    tracks = results['items']
    while results['next']:
        results = spotify.next(results)
        tracks.extend(results['items'])
    return tracks

In [23]:
def get_details(result):
    song_names=[]
    song_uri=[]
    artist_names=[]
    artist_uri=[]
    features=[]
    flat_features=[]
    song_details=[]
    i=0
    for item in result:
        if item["is_local"] == False:
            i+=1
            song_names.append(item["track"]["name"])
            artist_names.append(item["track"]["artists"][0]["name"])
            artist_uri.append(item["track"]["artists"][0]["uri"])
            song_uri.append(item["track"]["uri"])
            
            if i==100:
                i=0
                features.append(spotify.audio_features(song_uri))
                song_uri=[]
    
    features.append(spotify.audio_features(song_uri)) 
    
    flat_features = [f for subfeatures in features for f in subfeatures]
    
    song_details.append(song_names)
    song_details.append(artist_names)
    song_details.append(flat_features)
        
    
    return song_details , artist_uri


In [24]:
def clean_features(featureslist):
    danceability=[]    
    energy=[]
    loudness=[]
    speechiness=[]
    acousticness=[]
    instrumentalness=[]
    liveness=[]
    valence=[]
    tempo=[]
    duration_ms=[]
    all_features=dict()
    
    for feature in featureslist:
        
            
        danceability.append(feature["danceability"])
        energy.append(feature["energy"])
        loudness.append(feature["loudness"]) 
        speechiness.append(feature["speechiness"]) 
        acousticness.append(feature["acousticness"])         
        instrumentalness.append(feature["instrumentalness"]) 
        liveness.append(feature["liveness"]) 
        valence.append(feature["valence"])
        tempo.append(feature["tempo"]) 
        duration_ms.append(feature["duration_ms"])
            
                
    all_features={"danceability" : danceability,
                  "energy" : energy,
                  "loudness" : loudness,
                  "speechiness" : speechiness,
                  "acousticness" : acousticness,
                  "instrumentalness" : instrumentalness,
                  "liveness" : liveness,
                  "valence" : valence,
                  "tempo" : tempo,
                  "duration_ms" : duration_ms                  
                 }
    return all_features
    

In [25]:
def extra_songs_from_artists(artists_uri):
    
    results = spotify.artist_top_tracks(artists_uri)
    return results
    
    

In [61]:
def run(original_df):
    df=original_df.copy()
    ask_another=""
    chosen_song = input_song(df["Song title"])
    n=0
    while chosen_song[0]==True and n==0:
        
        
        new_song=random_song(df)
        
        while (chosen_song[1].lower() in new_song[0].str.lower().values) :
            new_song=random_song(df)
            
        
        
        print("Here's our recommendation based in your previous choice :\nSong title: "+
              list(new_song[0])[0]+"\nInterpreted by : "+list(new_song[1])[0])
        
        while ask_another.lower!="n" and ask_another.lower!="y":
            
            ask_another=(input("Do you want another recommendation based on your choice? y/n : "))
            
            if ask_another.lower()=="n":
                
                print("\nThanks for using our services!")
                n=1
                break
                
            elif ask_another.lower()=="y":
                
                chosen_song[1]=list(new_song[0])[0]
                #Droping actual recommendation so it doesn't repeat for the next one
                df.drop(df[df['Song title']==list(new_song[0])[0]].index,inplace=True)
                
                break
            else:
                print("\nThat's neither 'y' or 'n', try again: do you want another recommendation? y/n : ")
            
                
        
        
            
        
    if chosen_song[0]==False:
        print("Song not hot")
        artist=(input("Write down the name of the artist interpeting your previous song choice : "))
        results = spotify.search(q="track:"+chosen_song[1]+" artist:"+artist,limit=10)
        print(results["tracks"]["items"][0]["name"])
        print(results["tracks"]["items"][0]["artists"][0]["name"])
        audio_features_df=pd.DataFrame(spotify.audio_features(results["tracks"]["items"][0]["uri"]))
        audio_features=audio_features_df[["danceability","energy","loudness","speechiness","acousticness","instrumentalness","liveness","valence","tempo"]]
        chosen_song_cluster=kmeans.predict(scaler.transform(audio_features))

In [136]:
tracks=get_playlist_tracks("spotify","spotify:playlist:1G8IpkZKobrIlXcVPoSIuf")
tracks[0]

{'added_at': '2018-07-15T21:44:28Z',
 'added_by': {'external_urls': {'spotify': 'https://open.spotify.com/user/acclaimedmusic'},
  'href': 'https://api.spotify.com/v1/users/acclaimedmusic',
  'id': 'acclaimedmusic',
  'type': 'user',
  'uri': 'spotify:user:acclaimedmusic'},
 'is_local': False,
 'primary_color': None,
 'track': {'album': {'album_type': 'album',
   'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/74ASZWbe4lXaubB36ztrGX'},
     'href': 'https://api.spotify.com/v1/artists/74ASZWbe4lXaubB36ztrGX',
     'id': '74ASZWbe4lXaubB36ztrGX',
     'name': 'Bob Dylan',
     'type': 'artist',
     'uri': 'spotify:artist:74ASZWbe4lXaubB36ztrGX'}],
   'available_markets': ['AD',
    'AE',
    'AL',
    'AR',
    'AT',
    'AU',
    'BA',
    'BE',
    'BG',
    'BH',
    'BO',
    'BR',
    'BY',
    'CA',
    'CH',
    'CL',
    'CO',
    'CR',
    'CY',
    'CZ',
    'DE',
    'DK',
    'DO',
    'DZ',
    'EC',
    'EE',
    'EG',
    'ES',
    'FI',
    'FR

In [137]:
list_of_details=get_details(tracks)

In [138]:
feats_dict=clean_features(list_of_details[0][2])

In [139]:
songs_df=pd.DataFrame({"Title":test_list[0][0],
                      "Artists":test_list[0][1]})
features_df=pd.DataFrame(feats_dict)

In [140]:
songs_df=songs_df.join(features_df)

In [142]:
songs_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9936 entries, 0 to 9935
Data columns (total 12 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Title             9936 non-null   object 
 1   Artists           9936 non-null   object 
 2   danceability      9936 non-null   float64
 3   energy            9936 non-null   float64
 4   loudness          9936 non-null   float64
 5   speechiness       9936 non-null   float64
 6   acousticness      9936 non-null   float64
 7   instrumentalness  9936 non-null   float64
 8   liveness          9936 non-null   float64
 9   valence           9936 non-null   float64
 10  tempo             9936 non-null   float64
 11  duration_ms       9936 non-null   int64  
dtypes: float64(9), int64(1), object(2)
memory usage: 931.6+ KB


In [143]:
songs_df.to_csv("songs_sample.csv")

In [34]:
kmeans = pickle.load(open("kmeans", "rb"))

In [55]:
scaler = pickle.load(open("scaler", "rb"))

In [37]:
kmeans.predict([[1,2,3,4,5,3,1,2,3]])

array([4])

In [60]:
run(billboard_df)

Input one of your favourite songs quimica
Song not hot
Write down the name of the artist interpeting your previous song choice : juancho
Química
Juancho Marqués
[3]


In [21]:
#descargar todas las canciones de un album

#aplicar funcion de descargar canciones de album a todos los artistas

In [None]:
#if song not hot: we need to know audio freatures from spotify api, sending input song and ask for audio features, and then we'll add the returns from spotify to a cluster
#get a lot of songs from spotify ,atleast get uri of the song and audio features, name of the song , artists, album?, popularity