##Setup

In [1]:
# Cell: 1
# The call in this cell will install the anvil-uplink in the current Goggle Compute Engine Instance
# This must be done everytime the Colab Notebook is initiated from cold start or a new kernel is initialized.
!pip install anvil-uplink

Collecting argparse
  Using cached argparse-1.4.0-py2.py3-none-any.whl (23 kB)
Installing collected packages: argparse
Successfully installed argparse-1.4.0


In [2]:
# Cell: 2
# DO NOT REMOVE ANY PACKAGES. DOING SO WILL BREAK THE FUNCTIONS BELOW.
# YOU MAY ADD ADDITIONAL PACKAGES FOR FURTHER IMPLEMENTATIONS
import os
import pickle
import re
import pandas as pd
import anvil.server
import itertools
from collections import Counter
from google.colab import drive #Allows mounting of Google Drive

In [3]:
# Cell: 3
# Mount Google Drive
# The following code mounts the shared '6242 Final Project' Google Drive folder. 
# In order for this to work you must create a shortcut to the shared 6242 Final Project Folder to your Google Drive
# To do this follow this link: https://gsuitetips.com/tips/drive/how-to-add-shortcuts-in-google-drive/
# Of course this can all be avoided if the data set are stored in your notebook's working directory
drive.mount('/content/drive')
# If the drive is mounted correctly, you should see the list of files within the shared project drive.
os.listdir('drive/MyDrive/6242 Final Project')

Mounted at /content/drive


['nb2-part0--pairwise.html',
 'Gantt Chart.pdf',
 'Proposal.gdoc',
 'team003slides.gslides',
 'problem22--sample-solutions.html',
 'problem24-sample-solutions.html',
 'problem24-sample-solutions.gdoc',
 'nb2-part0--fa18.html',
 'Pseudo Algorithm.pdf',
 'Tasks.gdoc',
 'playlist_list.pkl',
 'team003progress.gdoc',
 'small_playlist_list1.pkl',
 'small_playlist_list2.pkl',
 'small_playlist_list3.pkl',
 'small_playlist_list4.pkl',
 'Playlist Generator',
 'Playlist_From_Song_Adjacency',
 'Playlist Generator v2.ipynb',
 'Playlist Generator v3.ipynb',
 'Final Project Survey.gform',
 'string_map_artists.pkl',
 'Anvil App Release Notes.gdoc',
 'Literature Review Docs.gsheet',
 'Playlist Generator v3-1.ipynb',
 'string_map_tracks.pkl',
 'poster.gdoc',
 '6242.pdf',
 'Read Me.gdoc',
 'Responses from Survey.gsheet',
 'team003report.gdoc',
 'team003final.zip',
 'String_mapping_code.ipynb',
 'Copy of Playlist Generator v3-2.ipynb',
 'Playlist Generator v3-2.ipynb']

In [4]:
# Cell: 4
# EDIT AS REQUIRED
# The following line allows our callable function to connect with a specific anvil.works server. Token is user dependent pulled form Uplink.
# If connecting to a new Anvil account, update the string inside the function below with own token.
# Reference this guide for more details: https://anvil.works/learn/tutorials/google-colab-to-web-app
#old token (for Nico) -----> anvil.server.connect("K347MKQKAJNTF56WEM2Y5NY2-JHAF7OFYVD276UVD")
#hayden token below
anvil.server.connect("server_NTRX6MJKLHLW4X3T4ZOWW7RN-K7SUPT52UEAUWEXQ")

Connecting to wss://anvil.works/uplink
Anvil websocket open
Connected to "Development" as SERVER


##Task 0: Load Data

In [5]:
# Cell 5
# This function loads a playlist_list pickle file and outputs a list of playlist dictionary:
# Example: [{'playlist_name': 'Throwbacks',
#   'songs': [('Missy Elliott', 'Lose Control (feat. Ciara & Fat Man Scoop)'),
#             ('Britney Spears', 'Toxic'),
#             ('Beyoncé', 'Crazy In Love'),...]}]

def load_data(filename):
  """
      :param filename: string 
      :return: list of playlists
  """
  with open('drive/MyDrive/6242 Final Project/'+filename, 'rb') as file:
        
      # Call load method to deserialze
      playlist_list = pickle.load(file)

  return playlist_list


### Important: Edit the cell below to debug code on small data or full data. Choosing to load the full data may cause your Colab compute instance to crash if using on free tier due to not enough RAM

* Due to the limitations of Colab Free Tier account, it is highly recommended to work with small data when testing or debugging new feature.
* Change the variable assignment below to either True or False.
* If True, small data (250,000 playlists) is loaded.
* If False, full data (1,000,000 playlists) is loaded.

### In addition to the playlist_list.pkl file(s), 2 other pickle files are loaded as lookup tables of artists and songs.


In [6]:
# Cell 6
small_data_flag = False # Edit as required
# If working with small playlist list, you may edit the small playlist name.  
# There are a total of 4 small playlist pickle file in the shared project drive.
# small_playlist_list1.pkl,...small_playlist_list4.pkl
if small_data_flag == True:
  playlist_list = load_data('small_playlist_list1.pkl') # edit as required
else:
  playlist_list = load_data('playlist_list.pkl')

string_map_artists = load_data('string_map_artists.pkl')
string_map_tracks = load_data('string_map_tracks.pkl')

In [7]:
# Cell 7
#STRNG CONVERSION. This will convert an artist or track name to a form that is robust to user input.
#anything after a '(' or '-' is stripped, as well as all punctuation and whitespace, and uppercase chars are converted to lowercase
#Examples: "Lose Control (feat. Ciara & Fat Man Scoop)" ----> "losecontrol"
#          "U.N. Owen Was Her? (Remix)" ----> "unowenwasher"
def convert_string(input):
    converted_string = input
    if '(' in converted_string:
        string_index = converted_string.find('(')
        converted_string = converted_string[:string_index]
    if '-' in converted_string:
        string_index = converted_string.find('-')
        converted_string = converted_string[:string_index]
    punctuation = '''!()-[]{};:'"\,<>./?@# $%^&*_~'''
 
    for punctuation_mark in punctuation:
        if punctuation_mark in punctuation:
            converted_string = converted_string.replace(punctuation_mark, "")
    return converted_string.lower()

# retrieve a fixed string based on an input string. converts using convert_string and looks up in map
def fix_input_name_artist(input):
    fixed_string = convert_string(input)
    if fixed_string in string_map_artists:
      return string_map_artists[fixed_string]
    else:
      return input

def fix_input_name_track(input):
    fixed_string = convert_string(input)
    if fixed_string in string_map_tracks:
      return string_map_tracks[fixed_string]
    else:
      return input

In [8]:
# Cell 7.2
#Sample usage
print(fix_input_name_track("lose control"))
print(fix_input_name_track("unowenwasher"))
print(fix_input_name_artist("eD ShEERaN"))
print(fix_input_name_artist("destinys child?"))

Lose Control (feat. Ciara & Fat Man Scoop)
U.N. Owen Was Her? (Remix)
Ed Sheeran
Destiny's Child


##Task 1: Find top song(s) based on keyword

In [9]:
# Cell 8
# This function takes a list of playlists and a keyword as input and returns a list of playlists 
#   containing the keyword in the playlist_name
def keyword_search(playlist_list, keyword):
    """
        :param list of playlists: python list 
        :param keyword: string
        :return: list of playlists containing keyword  
    """
    result = []
    pattern = re.compile(".*{}?.".format(keyword), re.IGNORECASE)
    for x in playlist_list:
        #if pattern.match(x["playlist_name"]) != None: PREVIOUSE CODE
        if keyword.lower() in x['playlist_name'].lower(): #HAYDEN CODE 
            result.append(x)
    if len(result) == 0:
        result = "Your keyword did not yield any results. Try a different keyword."
    return result

# This function takes a list of playlists and 'n' number of songs and outputs a list of tuples of the top 'n' songs
def top_songs(playlist_list, n = 1):
    """
        :param list of playlists: python list 
        :param top 'n' songs: integer
        :return: list of tuples containing (artist,song)  
    """
    if type(playlist_list) != list:
        return None

    if type(n) != int:
        n = int(n)

    accumulator = []
    
    for playlist in playlist_list:
        accumulator += playlist["songs"]
        
    df = pd.DataFrame(accumulator, columns =['artist', 'song'])
    df2 = df.groupby(['artist', 'song']).size().to_frame("count").reset_index()
    df2.sort_values('count', ascending=False, inplace=True)
    result = list(df2[['artist','song', 'count']].to_records(index=False))

    if n > len(result)-1:
      n = len(result)-1
    
    return result[0:n]

##Task 2: Using a given song, build a playlist

In [10]:
# Cell 9
#if input is just a song
#based off of 2nd playlist_list generated (containing playlist name) 
#WE DONT NEED THIS FUNCTINO ANYMORE
def playlist_using_song(song_name, n):
  """
      :param song_names: string 
      :param 'n' number of songs: integer
      :return: list of tuples containing (artist, song)  
  """

  contains=[]

  for i in playlist_list:
      for j in i['songs']:
          if song_name in j[1]:
              contains.append(set(i['songs']))

  songlist=list(itertools.chain.from_iterable(contains))
  return [(*key, val) for key, val in Counter(songlist).most_common() if key[1]!=song_name][:n]

## Below is our pseudo main() function which compiles all the functions above to generate a playlist. The output is automatically forwarded to the Anvil server if connected.

---



In [None]:
# Cell 10
# This is the callable function which ouputs the results to the Anvil server
@anvil.server.callable
def see_playlist(keyword, n):
  """
      :param keyword: string 
      :param 'n' number of songs: integer
      :return: string containing 'song' by 'artist' format, result in raw format for plotting  
  """
  
  if n == None:
    n = 10

  keyword_playlists = keyword_search(playlist_list, keyword)
  song = top_songs(keyword_playlists, 1)
  playlist_result = list(enumerate(playlist_using_song(song[0][1], n)))
  #print(playlist_result)
  output = '\n'.join(map(lambda x: str(x[0]) + ': ' + str(x[1][1]) + ' by ' + str(x[1][0]), playlist_result))
  return output, playlist_result

#hayden using this function below for testing, please dont remove
@anvil.server.callable
def playlist_containing_song_and_artist(song, artist, n=None):
  """
      :param song string
      :param artist string 
      :param 'n' number of songs integer
      :return: string containing 'song' by 'artist' format, result in raw format for plotting  
  """
  
  song = fix_input_name_track(song)
  artist = fix_input_name_artist(artist)

  # if n is blank limit to 10 songs
  if n == None:
    n = 10
  song2=(artist, song)
  contains2=[]
  for i in playlist_list:
      if song2 in i['songs']:
          contains2.append(set(i['songs']))

  songlist2=list(itertools.chain.from_iterable(contains2))
  output= [(key, val) for key, val in Counter(songlist2).most_common() if key!=song2][:n]
  output2 = list(enumerate(output))
  finoutput='\n'.join(map(lambda x: str(x[0]) + ': ' + str(x[1][0][1]) + ' by ' + str(x[1][0][0]), output2))
  
  return finoutput, output2
#hayden also using this function
@anvil.server.callable
def artistlist(artist1, artnum=None):
  """
      :param artist string 
      :param 'artnum' number of songs integer
      :return: string containing artists format, result in raw format for plotting  
  """
  artist1 = fix_input_name_artist(artist1)

  if artnum == None:
    artnum = 10
  artistcon=[]
  for i in playlist_list:
      for j in i['songs']:
          if artist1.lower() in j[0].lower():
              artistcon.append(set([k[0] for k in i['songs']]))

  artistlist1=list(itertools.chain.from_iterable(artistcon))
  #display(artistlist1[1:100])

  output= [val for val in Counter(artistlist1).most_common() if val[0].lower()!=artist1.lower()][:artnum]
  output2 = list(enumerate(output))
  finoutput = '\n'.join(map(lambda x: str(x[0]) + ': ' + str(x[1][0]), output2))
  return finoutput, output2
# This call will keep pinging the server for input. 
# You'll have to stop this cell's process if you want to run another cell
anvil.server.wait_forever() 

#The cell  below is a copy of the code from the front-end server for reference. Do not run the code below. It wasn't designed to run in this environment. A link to clone this app is included in the ReadMe file.