In [5]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
pd.set_option('display.max_rows', None)
import collections
import difflib
import random

In [6]:
url = "https://playback.fm/charts/rock/2010"
response = requests.get(url)
print(response.status_code) # 200 status code means OK!
soup = BeautifulSoup(response.content, "html.parser")

url_list = ["https://playback.fm/charts/rock/" + str(y) for y in range(2010, 2022, 1)]

200


In [7]:
song_list = []
artist_list = []

for url in url_list:
    print(f"Working on {url}...")
    response = requests.get(url)
    soup = BeautifulSoup(response.content, "html.parser")
    song_list.extend([soup.select("a span.song")[song].get_text() 
                      for song in range(len(soup.select("a span.song")))])
    artist_list.extend([soup.select("tr td a.artist")[artist].get_text().strip() 
                        for artist in range(len(soup.select("tr td a.artist")))])
    
df_rock_top_100s = pd.DataFrame(data={
    "song": song_list,
    "artist": artist_list
})

Working on https://playback.fm/charts/rock/2010...
Working on https://playback.fm/charts/rock/2011...
Working on https://playback.fm/charts/rock/2012...
Working on https://playback.fm/charts/rock/2013...
Working on https://playback.fm/charts/rock/2014...
Working on https://playback.fm/charts/rock/2015...
Working on https://playback.fm/charts/rock/2016...
Working on https://playback.fm/charts/rock/2017...
Working on https://playback.fm/charts/rock/2018...
Working on https://playback.fm/charts/rock/2019...
Working on https://playback.fm/charts/rock/2020...
Working on https://playback.fm/charts/rock/2021...


In [8]:
def load_clean_edit_df(df):
    """
    Load dataframe from csv.
    Make columns lowercase.
    Add columns with simplified song names.
    Return reordered dataframe.
    """

    df.columns = [col.lower() for col in df.columns]

    df["song_lower"] = [song.lower() for song in df["song"]]
    df["song_lower_shorthand"] = [song.split("(")[0].lower().replace("'", "").replace(".", "").replace(",", "").strip() 
                                  for song in df["song"]]

    df = df[["song", "song_lower", "song_lower_shorthand", "artist"]]
    
    return df

df = load_clean_edit_df(df_rock_top_100s)

In [13]:
def get_songs(df):

    song_list = list(df["song"])
    song_list_lower = list(df["song_lower"])
    song_list_lower_shorthand = list(df["song_lower_shorthand"])
    duplicate_songs = [item for item, count in collections.Counter(song_list_lower).items() if count > 1]
    
    return song_list, song_list_lower, duplicate_songs

song_list, song_list_lower, duplicate_songs = get_songs(df)

def get_and_check_user_input():
    """
    Get user's input and check whether the song is in Top 100.
    Ask for input until valid top 100 song is given.
    """
    user_input = input("Pick a hot song: ")
    ui_easified = user_input.split("(")[0].lower().replace("'", "").replace(".", "").replace(",", "").strip() 
    
    if ui_easified in list(df["song_lower_shorthand"]):
        ui_idx = list(df["song_lower_shorthand"]).index(ui_easified)

        print('You chose:')
        return df["song"][ui_idx]

    else:
        print("------------")
        print("Your chosen song is NOT in the Top 100!")
        print("Choose another song: ")

        user_input = get_and_check_user_input()
        
        return user_input

#def get_and_check_user_input():
#    
    #user_input = input("Pick a hot song: ")
#    
    #if "'" in user_input:
        #if user_input.lower() in song_list_lower:

            #return user_input

        #else:
            #print("------------")
            #print("Your chosen song is NOT in the Top 100!")
            #print("Choose another song: ")

            #user_input = get_and_check_user_input()
            ##get_and_check_user_input()

            #return user_input
#        
    #else:
        #if user_input.lower() in song_list_lower_shorthand:

            #return user_input

        #else:
            #print("------------")
            #print("Your chosen song is NOT in the Top 100!")
            #print("Choose another song: ")

            #user_input = get_and_check_user_input()

            #return user_input
    
#def get_and_check_user_input():
#    
    #user_input = input("Pick a hot song: ")
#    
    #if user_input.lower() in song_list_lower:
        #return user_input
#    
    #else:
        #user_input = user_input.lower().replace(" ", "")

        #for idx, song in enumerate(song_list_lower):

            #song = song.lower().replace(" ", "")
            #matcher = difflib.SequenceMatcher(None, user_input, song)

            #match = max(size for _,_,size in matcher.get_matching_blocks()) >= \
                    #int(np.floor(0.7*len(song)))

            #if match:
                #congruent_song = song_list[idx]
                #print(f"I assume you mean {congruent_song}!?")
                #return congruent_song
#            
            ##else:
                ##print("Your chosen song is NOT in the Top 100!")
                ##print("Choose another song: ")
##                
                ##get_and_check_user_input()
    
def check_if_song_duplicate(user_input):
    """
    Check if chosen song exists more than once.
    Prompt user to choose if song is duplicate.
    """
    artists = list(df[df["song"].str.contains(user_input, case=False)]["artist"].values)
    
    if len(artists) > 1:
        print(f"There is more than one song named '{user_input}'")
        
        print("------------")
        
        print(f"Which of the {len(artists)} artists did you mean?")
        
        input_duplicate = input(f"Select {len(artists)-1} for {artists[0]} or \n \
        {len(artists)} for {artists[1]}")
        if input_duplicate not in [str(i) for i in range(1,len(artists)+1)]:
            print("------------")
            print("Invalid input - please choose again!")
            input_duplicate = input(f"Select {len(artists)-1} for {artists[0]} or \n \
            {len(artists)} for {artists[1]}")
        
        chosen_duplicate_artist = artists[int(input_duplicate)-1]
        
        print("------------")
        
        print(f"You chose '{user_input}' by {chosen_duplicate_artist}")
        return True, chosen_duplicate_artist
    else:
        return False, artists

In [12]:
def result():
    
    user_input = get_and_check_user_input()
    #print(user_input)
    
    artist = check_if_song_duplicate(user_input)
    #print(artist)
   
    if len(artist) == 1:
        print(f"You chose '{user_input}' by {artist}")
        
    recom_new_song = random.choice(song_list)
    recom_new_song_artist = list(df[df["song"].str.contains(recom_new_song, case=False)]["artist"].values)
    
    print("------------")    
    print(f"You might also like '{recom_new_song}' by {recom_new_song_artist}.")

result()

Pick a hot song: rad habit
I assume you meant [] [Y] / [N]y


TypeError: unhashable type: 'list'

In [14]:
get_and_check_user_input()

Pick a hot song: as it war
I assume you meant [] [Y] / [N]n
Pick a hot song: bad habit
I assume you meant [] [Y] / [N]y


In [14]:
df

Unnamed: 0,song,song_lower,song_lower_shorthand,artist
0,The Sex Is Good,the sex is good,the sex is good,Saving Abel
1,Another Way to Die,another way to die,another way to die,Disturbed
2,Resistance,resistance,resistance,Muse
3,Waiting for the End,waiting for the end,waiting for the end,Linkin Park
4,Lay Me Down,lay me down,lay me down,The Dirty Heads
5,Kings And Queens,kings and queens,kings and queens,Thirty Seconds To Mars
6,Tighten Up,tighten up,tighten up,The Black Keys
7,Porn Star Dancing,porn star dancing,porn star dancing,My Darkest Days
8,1901,1901,1901,Phoenix
9,The Catalyst,the catalyst,the catalyst,Linkin Park
