In [1]:
#Dependencies and Pre-processing
from rake_nltk import Rake
import pandas as pd
import requests
# from IPython.display import Image, display
from IPython.core.display import display, HTML
from timeit import default_timer as timer
import pprint
import sys
from timeit import default_timer as timer
import gensim
import string
from gensim.models import word2vec
import numpy as np



# Build Word2Vec Model and Song DataFrame

In [2]:
Songs = pd.read_csv("songdata.csv")
Songs.set_index("song",inplace=True)
Songs.drop(["artist","link"],inplace=True,axis=1)
model = gensim.models.KeyedVectors.load_word2vec_format('GoogleNews-vectors-negative300.bin', limit=1000000, binary=True)

In [3]:
#Functions
def pixabay_get(keywords,N):
    r = requests.get('https://pixabay.com/api/?key=6713313-c85a6d8e3f6fe1a13b85f7d78&q='+"+".join(keywords)+'&image_type=photo&page=1&per_page='+str(N))
    result=[]
    try:
        for img in r.json()['hits']:
            result.append({'url':img['webformatURL'], 'tags':img["tags"].split(", ")})
    except:
        pass
    return result

def fragment(lyrics):
    paragraphs = []
    paragraphs = lyrics.split('\n  \n')
    if (len(paragraphs)==1):
        paragraphs = lyrics.split('  \r\n  ')
    return [{'text':x} for x in paragraphs]

def extract_kw(text):
    rake = Rake()
    rake.extract_keywords_from_text(text)
    A = rake.get_ranked_phrases_with_scores()
    if not A:
        return []
    return [x[1] for x in A]

def paragraph_to_keywords(paragraphs):
    for p in paragraphs:
        listofkeywords = extract_kw(p['text'])
        if (listofkeywords == []):
            paragraphs.remove(p)
            continue
        p['keywords'] = listofkeywords

def get_images_and_tags(paragraphs):
    for p in paragraphs:
        p['imgs']= []
        for kw in p['keywords']:
            for img in pixabay_get([kw],50):
                if (img): p['imgs'].append(img)
            
def SongVisualize(index):
    #Choose a song
    lyrics = Songs.iloc[index]["text"]

    ################################################################

    #Fragment into paragraphs
    paragraphs = fragment(lyrics)

    ################################################################

    paragraph_to_keywords(paragraphs)

    ################################################################

    get_images_and_tags(paragraphs)

    ################################################################

    #Compare vectors and Pick the most relevent image for each paragraph

    # Calculate kw_avg vector for each paragraph 
    for p in paragraphs:
        kw_avg=np.zeros(300)
        kw_count=0
        for kw in p['keywords']:
            for kw_nospace in kw.split(' '):
                try: 
                    kw_avg += model.wv[kw_nospace]
                    kw_count += 1
                except: # ignore if keyword is not found by the model
                    pass
        if (kw_count > 0):
            kw_avg /= kw_count
            p['kw_avg']=kw_avg

    # Calculate vector for each image
    for p in paragraphs:
        for img in p['imgs']:
            img_tag_avg = np.zeros(300)
            img_tag_count = 0
            for tag in img['tags']:
                img_tag_count += 1
            try:
                    img_tag_avg += model.wv[tag]
            except:
                    pass
            if (img_tag_count>0):
                img['vector'] = img_tag_avg / img_tag_count

    #Pick the most relevent image
    filtered = []
    for p in paragraphs:
        difference = np.inf
        picked_url = ''
        for img in p['imgs']:
            if (img['url'] in filtered):
                continue
            if (difference > np.sum(np.abs(p['kw_avg'] - img['vector']))):
                difference = np.sum(np.abs(p['kw_avg'] - img['vector']))
                picked_url = img['url']
        p['algo1_pickedImg_url'] = picked_url 
        p['algo1_pickedImg_tags'] = img['tags']
        p['algo1_pickedImg_vector'] = img['vector']
        filtered.append(picked_url)
        
    #################
    # Algorithm 2
    #################
    filtered = []
    for p in paragraphs:
        similarity = -1
        similarity_list = []
        p['algo2_pickedImg_url'] = ''
        for img in p['imgs']:
            table = []
            if (img['url'] in filtered):
                continue
            for kw in p['keywords']:
                if (kw.find(' ')!=-1):
                    continue
                table_row = []
                for tag in img['tags']:
                    if (tag.find(' ')!=-1):
                        continue
                    try:
                        similarity_list.append(model.wv.similarity(kw,tag))
                        table_row.append(model.wv.similarity(kw,tag))
                    except:
                        table_row.append('/')
                        continue
                table.append(table_row)
            if (np.average(similarity_list) > similarity):
                similarity = np.average(similarity_list)
                p['algo2_pickedImg_url'] = img['url']
                p['algo2_pickedImg_tags'] = img['tags']
                p['algo2_pickedImg_table'] = list(table)
        filtered.append(p['algo2_pickedImg_url'])
    
    return paragraphs

def custom_display(result_para):
    html = ''
    for i,p in enumerate(result_para):
        # para h2
        html += '<h2 style="margin-bottom:1rem;">Paragraph '+str(i+1)+'</h2>'
        
        # lyrics
        lyrics = p['text']
        lyrics_html = '<div style="font-size:20px;line-height:1.5;">'
        for line in lyrics.split('\n'):
            for kw in p['keywords']:
                line = line.replace(kw, '<span style="color:#42A5F5;">'+kw+'</span>')
            if (line.strip()): lyrics_html += line + '<br>'
        lyrics_html += '</div>'
        html += lyrics_html
        
        # algo1
        algo1_html = '<h3 style="margin-bottom:1rem;">Algorithm 1</h3>'
        algo1_html += '<div style="font-size:17px;line-height:1.5;">Keywords: ' + ', '.join([ '<span style="color:#42A5F5;">'+kw+'</span>' for kw in p['keywords'] if kw.find(' ')==-1]) + '</div>'
        algo1_html += '<div style="font-size:17px;line-height:1.5;">Vector Avg: <span style="color:#BA2121;">' + str(np.average(p['kw_avg'])) + '</span></div>'
        algo1_html += '<div style="text-align:center;"><i style="border: solid grey; border-width: 0 3px 3px 0; display: inline-block; padding: 8px;transform: rotate(45deg); -webkit-transform: rotate(45deg);"></i></div>'
        algo1_html += '<div style="font-size:17px;line-height:1.5;">Best Match Image<img style="width:100%;margin-top:2rem;margin-bottom:2rem;" src="' + p['algo1_pickedImg_url'] + '"></div>'
        algo1_html += '<div style="font-size:17px;line-height:1.5;">Image tags: ' + ', '.join([ '<span style="color:#42A5F5;">'+kw+'</span>' for kw in p['algo1_pickedImg_tags'] if kw.find(' ')==-1]) + '</div>'
        algo1_html += '<div style="font-size:17px;line-height:1.5;">Vector Avg: <span style="color:#BA2121;">' + str(np.average(p['algo1_pickedImg_vector'])) + '</span></div>'
        
        # algo2
        algo2_html = '<h3 style="margin-bottom:1rem;">Algorithm 2</h3>'
        algo2_html += '<div style="font-size:17px;line-height:1.5;">Best Match Image</div>'
        algo2_html += '<table style="font-size:13px;"> <tr>'+ ''.join(['<th> </th>']+['<th>Tag: '+tag+'</th>' for tag in p['algo2_pickedImg_tags'] if tag.find(' ')==-1]) + '</tr>'
        i = -1
        for row in p['algo2_pickedImg_table']:
            i += 1
            while (p['keywords'][i].find(' ')!=-1):
                i += 1
            algo2_html += '<tr><th style="text-align:left">Keywords: '+p['keywords'][i]+'</th>'
            for s in row:
                if (not type(s) is str):
                    s = str(round(s,2))
                algo2_html += '<td>' + s + '</td>'
            algo2_html += '</tr>'
        algo2_html += '</table>'
        
        algo2_html += '<img style="margin-top:1rem;margin-bottom:1rem;" src="' + p['algo2_pickedImg_url'] + '">'
        
        #algo Flexbox
        html += '<div style="display:flex;margin-top: 10px; border: solid grey; border-width: 1px; padding: 1rem;"><div style="width:50%;padding-right:1rem;border-right: solid grey; border-width: 1px;">'+algo1_html+'</div><div style="padding-left:2rem; width:50%;">'+algo2_html+'</div></div>'
        html += '<hr>'
    display(HTML(html))


# Testing Stage

In [4]:
result = SongVisualize(101)

  avg = a.mean(axis)
  ret = ret.dtype.type(ret / rcount)


In [5]:
custom_display(result)

Unnamed: 0,Tag: sea,Tag: bay,Tag: waterfront
Keywords: come,0.07,0.06,0.1
Keywords: sunshine,0.24,0.21,0.2
Keywords: sand,0.35,0.24,0.23
Keywords: loveland,/,/,/
Keywords: land,0.29,0.16,0.41
Keywords: flowers,0.17,0.09,0.08
Keywords: beaches,0.37,0.3,0.45

Unnamed: 0,Tag: ice,Tag: winter,Tag: cold
Keywords: waiting,0.09,0.15,0.21
Keywords: sky,0.17,0.17,0.16
Keywords: paradise,0.03,0.16,0.11
Keywords: mellow,0.11,0.14,0.21
Keywords: grass,0.25,0.24,0.13
Keywords: blue,0.25,0.13,0.17

Unnamed: 0,Tag: smiley,Tag: laugh,Tag: funny
Keywords: oh,0.31,0.32,0.4
Keywords: want,0.08,0.27,0.2
Keywords: share,0.04,0.13,0.01
Keywords: life,0.13,0.2,0.18

Unnamed: 0,Tag: wanderer,Tag: wandersmann,Tag: walk
Keywords: wander,0.4,/,0.56
Keywords: tree,0.14,/,0.11
Keywords: soon,0.13,/,0.12
Keywords: shade,0.13,/,0.12
Keywords: rainbow,0.16,/,0.12
Keywords: moon,0.17,/,0.13
Keywords: loveland,/,/,/
Keywords: lie,0.04,/,0.26
Keywords: land,0.16,/,0.07
Keywords: darling,0.18,/,-0.04

Unnamed: 0,Tag: security,Tag: hand,Tag: protected
Keywords: take,0.11,0.22,0.09
Keywords: show,-0.0,0.06,0.01
Keywords: secrets,0.13,0.04,0.18
Keywords: paradise,0.08,0.0,0.16
Keywords: hand,0.07,1.0,0.02
Keywords: everything,0.11,0.25,0.13
Keywords: bring,0.1,0.12,0.02

Unnamed: 0,Tag: smiley,Tag: laugh,Tag: funny
Keywords: oh,0.31,0.32,0.4
Keywords: want,0.08,0.27,0.2
Keywords: share,0.04,0.13,0.01
Keywords: life,0.13,0.2,0.18

Unnamed: 0,Tag: cleveland,Tag: ohio,Tag: oh
Keywords: oh,0.36,0.34,1.0
Keywords: want,0.13,0.16,0.32
Keywords: share,0.01,0.01,0.04


In [8]:
result = SongVisualize(105)
custom_display(result)

Unnamed: 0,Tag: sweden,Tag: fire,Tag: flames
Keywords: watch,0.08,0.1,0.1
Keywords: yes,0.23,0.07,0.07
Keywords: waiting,-0.0,0.1,0.07
Keywords: tired,0.1,0.09,0.18
Keywords: said,-0.08,0.12,0.1
Keywords: patience,0.0,0.01,0.02
Keywords: oh,0.37,0.05,0.07
Keywords: lightning,-0.03,0.42,0.34
Keywords: la,0.26,-0.07,0.03
Keywords: go,0.12,0.07,0.09

Unnamed: 0,Tag: passenger,Tag: stranger,Tag: nature
Keywords: think,0.03,0.13,0.16
Keywords: stranger,0.14,1.0,0.21
Keywords: speak,0.02,0.15,0.1
Keywords: hate,0.06,0.11,0.18
Keywords: girl,0.22,0.41,0.1
Keywords: danger,0.11,0.22,0.15
Keywords: beginning,0.0,-0.01,0.12

Unnamed: 0,Tag: watch,Tag: timepiece,Tag: time
Keywords: watch,1.0,0.25,0.18
Keywords: yes,0.16,0.05,0.18
Keywords: waiting,0.35,0.02,0.25
Keywords: tired,0.14,-0.04,0.27
Keywords: said,0.09,0.01,0.13
Keywords: patience,0.13,0.11,0.27
Keywords: ooh,0.27,0.11,0.12
Keywords: oh,0.21,0.1,0.13
Keywords: la,0.06,0.03,-0.08
Keywords: go,0.32,-0.03,0.21

Unnamed: 0,Tag: watch,Tag: timepiece,Tag: time
Keywords: oh,0.21,0.1,0.13
Keywords: yes,0.16,0.05,0.18
Keywords: waiting,0.35,0.02,0.25
Keywords: tired,0.14,-0.04,0.27
Keywords: said,0.09,0.01,0.13
Keywords: patience,0.13,0.11,0.27
Keywords: la,0.06,0.03,-0.08
Keywords: fading,0.04,0.09,0.12
Keywords: driving,0.14,0.08,0.06
Keywords: crazy,0.25,0.02,0.18


In [10]:
result = SongVisualize(107)
custom_display(result)

  avg = a.mean(axis)
  ret = ret.dtype.type(ret / rcount)


Unnamed: 0,Tag: santa
Keywords: christmas,0.58
Keywords: wish,0.23
Keywords: bring,0.04

Unnamed: 0,Tag: cookies,Tag: cookie,Tag: pastries
Keywords: cookies,1.0,0.75,0.56
Keywords: bring,0.06,0.01,0.07
Keywords: want,0.11,0.15,0.1
Keywords: right,0.12,0.14,0.03
Keywords: milk,0.31,0.27,0.31
Keywords: christmas,0.26,0.24,0.18

Unnamed: 0,Tag: santa
Keywords: go,0.12
Keywords: get,0.11
Keywords: christmas,0.58
Keywords: bring,0.04

Unnamed: 0,Tag: santa
Keywords: christmas,0.58
Keywords: wish,0.23
Keywords: bring,0.04


In [11]:
result = SongVisualize(108)
custom_display(result)

  avg = a.mean(axis)
  ret = ret.dtype.type(ret / rcount)


Unnamed: 0,Tag: bird,Tag: looking
Keywords: went,0.06,0.17
Keywords: thing,0.12,0.17
Keywords: stand,0.03,0.15
Keywords: paper,0.11,0.03
Keywords: moon,0.23,0.08
Keywords: may,0.07,0.28
Keywords: like,0.12,0.32
Keywords: laugh,0.08,0.15
Keywords: hey,0.1,0.18
Keywords: heard,0.14,0.13

Unnamed: 0,Tag: light,Tag: road
Keywords: worth,0.07,0.07
Keywords: traveling,0.11,0.37
Keywords: test,0.09,0.1
Keywords: sacrificed,-0.01,0.08
Keywords: rest,0.14,0.22
Keywords: men,0.05,0.1
Keywords: livingstone,/,/
Keywords: lives,0.04,0.16
Keywords: lead,0.05,0.15
Keywords: help,0.09,0.12

Unnamed: 0,Tag: laughing
Keywords: way,0.18
Keywords: told,0.25
Keywords: thing,0.32
Keywords: stand,0.19
Keywords: spacemen,0.07
Keywords: say,0.21
Keywords: laugh,0.74
Keywords: know,0.34
Keywords: fellas,0.35
Keywords: answer,0.17

Unnamed: 0,Tag: light,Tag: road
Keywords: worth,0.07,0.07
Keywords: traveling,0.11,0.37
Keywords: test,0.09,0.1
Keywords: sacrificed,-0.01,0.08
Keywords: rest,0.14,0.22
Keywords: men,0.05,0.1
Keywords: livingstone,/,/
Keywords: lives,0.04,0.16
Keywords: lead,0.05,0.15
Keywords: help,0.09,0.12

Unnamed: 0,Tag: cloud,Tag: help,Tag: support
Keywords: worth,0.0,0.1,0.02
Keywords: well,0.03,0.16,0.14
Keywords: traveling,0.04,0.07,0.03
Keywords: test,0.11,0.11,0.06
Keywords: sacrificed,-0.02,0.15,0.1
Keywords: rest,0.1,0.19,0.09
Keywords: putting,0.03,0.17,0.04
Keywords: nile,/,/,/
Keywords: men,0.02,0.01,-0.01
Keywords: livingstone,/,/,/


In [13]:
result = SongVisualize(110)
custom_display(result)

Unnamed: 0,Tag: luck,Tag: heart
Keywords: teacher,0.05,0.08
Keywords: thought,0.22,0.17
Keywords: taken,0.06,0.07
Keywords: surprise,0.22,0.08
Keywords: smiled,0.24,0.07
Keywords: school,0.05,0.07
Keywords: must,0.01,0.08
Keywords: kissed,0.19,0.04
Keywords: held,0.03,0.02
Keywords: friends,0.24,0.14

Unnamed: 0,Tag: cup,Tag: top,Tag: view
Keywords: one,0.13,0.39,0.17
Keywords: dream,0.14,0.17,0.2
Keywords: care,0.09,0.04,0.17

Unnamed: 0,Tag: teacher,Tag: geometry,Tag: mathematics
Keywords: teacher,1.0,0.26,0.5
Keywords: trying,0.06,0.08,0.05
Keywords: trance,0.07,0.13,0.11
Keywords: took,0.05,0.0,0.02
Keywords: laws,0.08,0.12,0.17
Keywords: kissed,0.17,0.08,0.1
Keywords: kiss,0.14,0.05,0.06
Keywords: help,0.05,0.02,0.06
Keywords: geometry,0.26,1.0,0.43
Keywords: explain,0.1,0.09,0.15

Unnamed: 0,Tag: cup,Tag: top,Tag: view
Keywords: one,0.13,0.39,0.17
Keywords: dream,0.14,0.17,0.2
Keywords: care,0.09,0.04,0.17

Unnamed: 0,Tag: bouquet,Tag: cloves,Tag: roses
Keywords: want,0.04,0.09,0.14
Keywords: teacher,0.1,0.04,0.06
Keywords: smiled,0.27,0.09,0.21
Keywords: sense,0.1,0.05,0.12
Keywords: kissed,0.28,0.17,0.25
Keywords: hug,0.4,0.13,0.29
Keywords: held,0.03,-0.01,0.04
Keywords: breath,0.21,0.11,0.16


In [14]:
result = SongVisualize(112)
custom_display(result)

Unnamed: 0,Tag: puzzle
Keywords: traces,0.09
Keywords: start,0.05
Keywords: pick,0.13
Keywords: hearts,0.11
Keywords: frida,/
Keywords: find,0.25

Unnamed: 0,Tag: travelcard,Tag: ticket,Tag: london
Keywords: ticket,0.4,1.0,0.07
Keywords: rest,0.09,0.09,0.02
Keywords: really,0.04,0.05,0.09
Keywords: need,-0.0,0.08,0.06
Keywords: mm,0.17,-0.0,0.11
Keywords: go,0.16,0.21,0.14
Keywords: buy,0.19,0.1,0.15
Keywords: bahamas,0.13,-0.01,0.42
Keywords: agnetha,/,/,/

Unnamed: 0,Tag: kids,Tag: portraits,Tag: cute
Keywords: owe,0.1,0.13,0.04
Keywords: mm,0.02,0.05,0.05
Keywords: missing,0.04,0.06,0.13
Keywords: look,0.15,0.13,0.24
Keywords: done,0.2,0.04,0.09

Unnamed: 0,Tag: small,Tag: girl,Tag: little
Keywords: turn,0.09,0.06,0.23
Keywords: run,0.09,0.03,0.12
Keywords: ready,0.05,0.01,0.2
Keywords: owe,0.1,-0.04,0.18
Keywords: kind,0.21,0.15,0.52
Keywords: intrusion,0.1,0.06,0.13
Keywords: feel,0.16,0.14,0.39
Keywords: chance,0.09,0.11,0.26

Unnamed: 0,Tag: ruble,Tag: accounting,Tag: surprise
Keywords: us,0.1,0.05,0.07
Keywords: taken,0.03,0.01,0.05
Keywords: surprise,0.0,0.11,1.0
Keywords: state,0.1,0.08,0.06
Keywords: existence,0.08,0.07,-0.01
Keywords: entered,-0.03,0.03,-0.0

Unnamed: 0,Tag: snake,Tag: really,Tag: creep
Keywords: view,0.02,0.17,0.12
Keywords: think,0.12,0.71,0.22
Keywords: really,0.1,1.0,0.18
Keywords: mm,0.09,0.01,0.01
Keywords: look,0.08,0.38,0.26
Keywords: done,0.07,0.41,0.01
Keywords: better,0.08,0.39,0.11
Keywords: agnetha,/,/,/

Unnamed: 0,Tag: danbo,Tag: figures,Tag: love
Keywords: turn,/,0.1,0.09
Keywords: run,/,0.1,0.08
Keywords: owe,/,0.1,0.28
Keywords: mm,/,-0.02,0.0
Keywords: missing,/,0.08,0.09

Unnamed: 0,Tag: small,Tag: girl,Tag: little
Keywords: owe,0.1,-0.04,0.18
Keywords: mm,0.22,0.12,0.05
Keywords: look,0.15,0.08,0.29
Keywords: feel,0.16,0.14,0.39
Keywords: done,0.07,0.07,0.22
Keywords: chance,0.09,0.11,0.26

Unnamed: 0,Tag: danbo,Tag: figures,Tag: love
Keywords: turn,/,0.1,0.09
Keywords: run,/,0.1,0.08
Keywords: owe,/,0.1,0.28
Keywords: mm,/,-0.02,0.0
Keywords: missing,/,0.08,0.09

Unnamed: 0,Tag: small,Tag: girl,Tag: little
Keywords: owe,0.1,-0.04,0.18
Keywords: mm,0.22,0.12,0.05
Keywords: look,0.15,0.08,0.29
Keywords: feel,0.16,0.14,0.39
Keywords: done,0.07,0.07,0.22
Keywords: chance,0.09,0.11,0.26

Unnamed: 0,Tag: danbo,Tag: figures,Tag: love
Keywords: turn,/,0.1,0.09
Keywords: run,/,0.1,0.08
Keywords: owe,/,0.1,0.28
Keywords: mm,/,-0.02,0.0
Keywords: missing,/,0.08,0.09

Unnamed: 0,Tag: small,Tag: girl,Tag: little
Keywords: owe,0.1,-0.04,0.18
Keywords: mm,0.22,0.12,0.05
Keywords: look,0.15,0.08,0.29
Keywords: feel,0.16,0.14,0.39
Keywords: done,0.07,0.07,0.22
Keywords: chance,0.09,0.11,0.26

Unnamed: 0,Tag: danbo,Tag: figures,Tag: love
Keywords: turn,/,0.1,0.09
Keywords: run,/,0.1,0.08
Keywords: owe,/,0.1,0.28
Keywords: mm,/,-0.02,0.0
Keywords: missing,/,0.08,0.09

Unnamed: 0,Tag: small,Tag: girl,Tag: little
Keywords: owe,0.1,-0.04,0.18
Keywords: mm,0.22,0.12,0.05
Keywords: look,0.15,0.08,0.29
Keywords: feel,0.16,0.14,0.39
Keywords: done,0.07,0.07,0.22
Keywords: chance,0.09,0.11,0.26

Unnamed: 0,Tag: golf,Tag: court
Keywords: owe,0.07,0.1
Keywords: missing,0.1,0.1
