In [1]:
#Dependencies and Pre-processing
from rake_nltk import Rake
import pandas as pd
import requests
# from IPython.display import Image, display
from IPython.core.display import display, HTML
from timeit import default_timer as timer
import pprint
import sys
from timeit import default_timer as timer
import gensim
import string
from gensim.models import word2vec
import numpy as np



# Build Word2Vec Model and Song DataFrame

In [2]:
Songs = pd.read_csv("songdata.csv")
Songs.set_index("song",inplace=True)
Songs.drop(["artist","link"],inplace=True,axis=1)
model = gensim.models.KeyedVectors.load_word2vec_format('GoogleNews-vectors-negative300.bin', limit=1000000, binary=True)

In [3]:
#Functions
def pixabay_get(keywords,N):
    r = requests.get('https://pixabay.com/api/?key=6713313-c85a6d8e3f6fe1a13b85f7d78&q='+"+".join(keywords)+'&image_type=photo&page=1&per_page='+str(N))
    result=[]
    try:
        for img in r.json()['hits']:
            result.append({'url':img['webformatURL'], 'tags':img["tags"].split(", ")})
    except:
        pass
    return result

def fragment(lyrics):
    paragraphs = []
    paragraphs = lyrics.split('\n  \n')
    if (len(paragraphs)==1):
        paragraphs = lyrics.split('  \r\n  ')
    return [{'text':x} for x in paragraphs]

def extract_kw(text):
    rake = Rake()
    rake.extract_keywords_from_text(text)
    A = rake.get_ranked_phrases_with_scores()
    if not A:
        return []
    return [x[1] for x in A]

def paragraph_to_keywords(paragraphs):
    for p in paragraphs:
        listofkeywords = extract_kw(p['text'])
        if (listofkeywords == []):
            paragraphs.remove(p)
            continue
        p['keywords'] = listofkeywords

def get_images_and_tags(paragraphs):
    for p in paragraphs:
        p['imgs']= []
        for kw in p['keywords']:
            for img in pixabay_get([kw],50):
                if (img): p['imgs'].append(img)
            
def SongVisualize(index):
    #Choose a song
    lyrics = Songs.iloc[index]["text"]

    ################################################################

    #Fragment into paragraphs
    paragraphs = fragment(lyrics)

    ################################################################

    paragraph_to_keywords(paragraphs)

    ################################################################

    get_images_and_tags(paragraphs)

    ################################################################

    #Compare vectors and Pick the most relevent image for each paragraph

    # Calculate kw_avg vector for each paragraph 
    for p in paragraphs:
        kw_avg=np.zeros(300)
        kw_count=0
        for kw in p['keywords']:
            for kw_nospace in kw.split(' '):
                try: 
                    kw_avg += model.wv[kw_nospace]
                    kw_count += 1
                except: # ignore if keyword is not found by the model
                    pass
        if (kw_count > 0):
            kw_avg /= kw_count
            p['kw_avg']=kw_avg

    # Calculate vector for each image
    for p in paragraphs:
        for img in p['imgs']:
            img_tag_avg = np.zeros(300)
            img_tag_count = 0
            for tag in img['tags']:
                img_tag_count += 1
            try:
                    img_tag_avg += model.wv[tag]
            except:
                    pass
            if (img_tag_count>0):
                img['vector'] = img_tag_avg / img_tag_count

    #Pick the most relevent image
    filtered = []
    for p in paragraphs:
        difference = np.inf
        picked_url = ''
        for img in p['imgs']:
            if (img['url'] in filtered):
                continue
            if (difference > np.sum(np.abs(p['kw_avg'] - img['vector']))):
                difference = np.sum(np.abs(p['kw_avg'] - img['vector']))
                picked_url = img['url']
        p['algo1_pickedImg_url'] = picked_url 
        p['algo1_pickedImg_tags'] = img['tags']
        p['algo1_pickedImg_vector'] = img['vector']
        filtered.append(picked_url)
        
    #################
    # Algorithm 2
    #################
    filtered = []
    for p in paragraphs:
        similarity = -1
        similarity_list = []
        p['algo2_pickedImg_url'] = ''
        for img in p['imgs']:
            table = []
            if (img['url'] in filtered):
                continue
            for kw in p['keywords']:
                if (kw.find(' ')!=-1):
                    continue
                table_row = []
                for tag in img['tags']:
                    if (tag.find(' ')!=-1):
                        continue
                    try:
                        similarity_list.append(model.wv.similarity(kw,tag))
                        table_row.append(model.wv.similarity(kw,tag))
                    except:
                        table_row.append('/')
                        continue
                table.append(table_row)
            if (np.average(similarity_list) > similarity):
                similarity = np.average(similarity_list)
                p['algo2_pickedImg_url'] = img['url']
                p['algo2_pickedImg_tags'] = img['tags']
                p['algo2_pickedImg_table'] = list(table)
        filtered.append(p['algo2_pickedImg_url'])
    
    return paragraphs

def custom_display(result_para):
    html = ''
    for i,p in enumerate(result_para):
        # para h2
        html += '<h2 style="margin-bottom:1rem;">Paragraph '+str(i+1)+'</h2>'
        
        # lyrics
        lyrics = p['text']
        lyrics_html = '<div style="font-size:20px;line-height:1.5;">'
        for line in lyrics.split('\n'):
            for kw in p['keywords']:
                line = line.replace(kw, '<span style="color:#42A5F5;">'+kw+'</span>')
            if (line.strip()): lyrics_html += line + '<br>'
        lyrics_html += '</div>'
        html += lyrics_html
        
        # algo1
        algo1_html = '<h3 style="margin-bottom:1rem;">Algorithm 1</h3>'
        algo1_html += '<div style="font-size:17px;line-height:1.5;">Keywords: ' + ', '.join([ '<span style="color:#42A5F5;">'+kw+'</span>' for kw in p['keywords'] if kw.find(' ')==-1]) + '</div>'
        algo1_html += '<div style="font-size:17px;line-height:1.5;">Vector Avg: <span style="color:#BA2121;">' + str(np.average(p['kw_avg'])) + '</span></div>'
        algo1_html += '<div style="text-align:center;"><i style="border: solid grey; border-width: 0 3px 3px 0; display: inline-block; padding: 8px;transform: rotate(45deg); -webkit-transform: rotate(45deg);"></i></div>'
        algo1_html += '<div style="font-size:17px;line-height:1.5;">Best Match Image<img style="width:100%;margin-top:2rem;margin-bottom:2rem;" src="' + p['algo1_pickedImg_url'] + '"></div>'
        algo1_html += '<div style="font-size:17px;line-height:1.5;">Image tags: ' + ', '.join([ '<span style="color:#42A5F5;">'+kw+'</span>' for kw in p['algo1_pickedImg_tags'] if kw.find(' ')==-1]) + '</div>'
        algo1_html += '<div style="font-size:17px;line-height:1.5;">Vector Avg: <span style="color:#BA2121;">' + str(np.average(p['algo1_pickedImg_vector'])) + '</span></div>'
        
        # algo2
        algo2_html = '<h3 style="margin-bottom:1rem;">Algorithm 2</h3>'
        algo2_html += '<div style="font-size:17px;line-height:1.5;">Best Match Image</div>'
        algo2_html += '<table style="font-size:13px;"> <tr>'+ ''.join(['<th> </th>']+['<th>Tag: '+tag+'</th>' for tag in p['algo2_pickedImg_tags'] if tag.find(' ')==-1]) + '</tr>'
        i = -1
        for row in p['algo2_pickedImg_table']:
            i += 1
            while (p['keywords'][i].find(' ')!=-1):
                i += 1
            algo2_html += '<tr><th style="text-align:left">Keywords: '+p['keywords'][i]+'</th>'
            for s in row:
                if (not type(s) is str):
                    s = str(round(s,2))
                algo2_html += '<td>' + s + '</td>'
            algo2_html += '</tr>'
        algo2_html += '</table>'
        
        algo2_html += '<img style="margin-top:1rem;margin-bottom:1rem;" src="' + p['algo2_pickedImg_url'] + '">'
        
        #algo Flexbox
        html += '<div style="display:flex;margin-top: 10px; border: solid grey; border-width: 1px; padding: 1rem;"><div style="width:50%;padding-right:1rem;border-right: solid grey; border-width: 1px;">'+algo1_html+'</div><div style="padding-left:2rem; width:50%;">'+algo2_html+'</div></div>'
        html += '<hr>'
    display(HTML(html))


# Testing Stage

In [5]:
result = SongVisualize(101)
custom_display(result)

  avg = a.mean(axis)
  ret = ret.dtype.type(ret / rcount)


Unnamed: 0,Tag: snow,Tag: winter
Keywords: come,0.09,0.15
Keywords: sunshine,0.38,0.35
Keywords: sand,0.35,0.2
Keywords: loveland,/,/
Keywords: land,0.11,0.08
Keywords: flowers,0.2,0.2
Keywords: beaches,0.22,0.24

Unnamed: 0,Tag: love,Tag: waiting
Keywords: waiting,0.16,1.0
Keywords: sky,0.19,0.04
Keywords: paradise,0.31,0.09
Keywords: mellow,0.27,0.02
Keywords: grass,0.14,0.07
Keywords: blue,0.15,0.15

Unnamed: 0,Tag: children,Tag: laugh
Keywords: oh,0.07,0.32
Keywords: want,0.11,0.27
Keywords: share,0.13,0.13
Keywords: life,0.27,0.2

Unnamed: 0,Tag: wanderer,Tag: wandersmann,Tag: walk
Keywords: wander,0.4,/,0.56
Keywords: tree,0.14,/,0.11
Keywords: soon,0.13,/,0.12
Keywords: shade,0.13,/,0.12
Keywords: rainbow,0.16,/,0.12
Keywords: moon,0.17,/,0.13
Keywords: loveland,/,/,/
Keywords: lie,0.04,/,0.26
Keywords: land,0.16,/,0.07
Keywords: darling,0.18,/,-0.04

Unnamed: 0,Tag: dog,Tag: malinois
Keywords: take,0.1,/
Keywords: show,0.09,/
Keywords: secrets,0.12,/
Keywords: paradise,0.07,/
Keywords: hand,0.12,/
Keywords: everything,0.12,/
Keywords: bring,0.07,/

Unnamed: 0,Tag: children,Tag: laugh
Keywords: oh,0.07,0.32
Keywords: want,0.11,0.27
Keywords: share,0.13,0.13
Keywords: life,0.27,0.2

Unnamed: 0,Tag: cleveland,Tag: ohio,Tag: oh
Keywords: oh,0.36,0.34,1.0
Keywords: want,0.13,0.16,0.32
Keywords: share,0.01,0.01,0.04
