# Libs

In [1]:
import os
import faiss
import hnswlib
import spotipy
import pandas as pd
from dotenv import load_dotenv
from spotipy.oauth2 import SpotifyClientCredentials
from sentence_transformers import SentenceTransformer

# Data

In [2]:
df = pd.read_csv('../data/tracks.csv', sep=';')
df

Unnamed: 0,track_id,artists,track_name
0,5SuOikwiRyPMVoIQDJUgSV,Gen Hoshino,Comedy
1,4qPNDBW1i3p13qLCt0Ki3A,Ben Woodward,Ghost - Acoustic
2,6lfxq3CG4xtTiEg7opyCyx,Kina Grannis,Can't Help Falling In Love
3,5vjLSffimiIP26QG5WcN2K,Chord Overstreet,Hold On
4,01MVOl9KtVTNfFiBU9I7dc,Tyrone Wells,Days I Will Remember
...,...,...,...
81339,42bdU7oDyRvyRXaKbUrtfu,Bethel Music,Victory Is Yours - Live (feat. Bethany Wohrle)
81340,1eZYPovTvmxk3QlVD2VpCX,Bethel Music,We Will Not Be Shaken - Live (feat. Brian John...
81341,7mD7yAQm4GcifSnWqJdZHi,Bethel Music,Ain’t No Grave - Live (feat. Molly Skaggs)
81342,2yAo8cJDVoMjBMS5MgrO8P,Hillsong Worship,This Is Living (feat. Hillsong Young & Free)


# Embeddings

In [3]:
tracks = (df['track_name'].str.lower() + ' - ' + df['artists'].str.lower()).values
# tracks = df['track_name'].str.lower().values

### Pre-Trained Models

In [4]:
models = ['sentence-transformers/all-mpnet-base-v2', 'sentence-transformers/all-MiniLM-L6-v2', 'sentence-transformers/all-MiniLM-L12-v2', 'sentence-transformers/multi-qa-MiniLM-L6-cos-v1', 'sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2', 'sentence-transformers/paraphrase-MiniLM-L6-v2', 'msmarco-MiniLM-L-12-v3', 'bert-base-uncased']

In [5]:
for model in models:
    st = SentenceTransformer(model)
    embeddings = st.encode(tracks)
    index = hnswlib.Index(space='cosine', dim=st.get_sentence_embedding_dimension())
    index.init_index(max_elements=tracks.shape[0], ef_construction=200, M=16)
    index.add_items(embeddings)
    
    search_string = 'Drake'
    query = st.encode([search_string])
    i, _ = index.knn_query(query, k=5)
    print(model)
    print(df.iloc[i[0]])
    print('-'*30)

sentence-transformers/all-mpnet-base-v2
                     track_id       artists                 track_name
8326   4Cg0paoTgxlo2LC85HKaab    Nick Drake                 Which Will
7985   0hNVjU6JKydHts0SAjHCno    Nick Drake  One Of These Things First
13765  2cEmWBBlCfR9wWrY9JmTyt          ZAYN                     Let Me
23076  1parCywpMDF9i6sO13kyvc         K.I.Z              Rap über Hass
50934  0L1qOe6ZGo8NZeU3yLclZP  Zach Diamond                       Wavy
------------------------------
sentence-transformers/all-MiniLM-L6-v2
                     track_id     artists                 track_name
8326   4Cg0paoTgxlo2LC85HKaab  Nick Drake                 Which Will
30832  6DCZcSspjsKoFjzjrWoCdn       Drake                 God's Plan
7985   0hNVjU6JKydHts0SAjHCno  Nick Drake  One Of These Things First
8208   5ir0VEsMI7cLhN6SEiaKol  Nick Drake                       Road
8325   1drLQNS9D5z9lBP4DbtO2R  Nick Drake               Saturday Sun
------------------------------
sentence-transforme

No sentence-transformers model found with name /Users/thiagoosorio/.cache/torch/sentence_transformers/bert-base-uncased. Creating a new one with MEAN pooling.


bert-base-uncased
                     track_id  artists track_name
37895  1kR4gIb7nGxHPI3D2ifs59      NaN        NaN
26631  2gpkmR9oX3Jk6rDI6KUwHj    Glare      Blank
11518  4q0CSHFWnVajd3sMEwyH4f   SIERRA   Unbroken
17757  5ABDsQEkVZ1ECGPGQl6Yfr     NERO  Innocence
28572  2dEgwtRjYl6TJkIwjRwn6z  Warrant     Heaven
------------------------------


### Best Pre-Trained Model

In [6]:
model = SentenceTransformer('msmarco-MiniLM-L-12-v3')

In [7]:
embeddings = model.encode(tracks)

# Semantic Search

### HNSWLib

In [8]:
index = hnswlib.Index(space='cosine', dim=model.get_sentence_embedding_dimension())
index.init_index(max_elements=tracks.shape[0], ef_construction=200, M=16)

In [9]:
index.add_items(embeddings)

In [10]:
# index.save_index('../data/hnswlib_index.bin')

In [11]:
search_string = 'Drake'

In [12]:
query = model.encode([search_string])

In [13]:
i, d = index.knn_query(query, k=5)
i

array([[30832,  8326,  8208,  8325,  7916]], dtype=uint64)

In [14]:
d

array([[0.33998847, 0.42415565, 0.4311428 , 0.46342802, 0.48567998]],
      dtype=float32)

In [15]:
df.iloc[i[0]]

Unnamed: 0,track_id,artists,track_name
30832,6DCZcSspjsKoFjzjrWoCdn,Drake,God's Plan
8326,4Cg0paoTgxlo2LC85HKaab,Nick Drake,Which Will
8208,5ir0VEsMI7cLhN6SEiaKol,Nick Drake,Road
8325,1drLQNS9D5z9lBP4DbtO2R,Nick Drake,Saturday Sun
7916,3EtIraJEHVSbBvLw5msioH,Nick Drake,Northern Sky


### FAISS

In [16]:
index = faiss.IndexFlatL2(embeddings.shape[1])

In [17]:
index.add(embeddings)

In [18]:
faiss.write_index(index, '../data/faiss_index.bin')

In [20]:
search_string = 'Drake'

In [21]:
query = model.encode([search_string])

In [22]:
k = 5
top_k = index.search(query, k)

In [23]:
df.iloc[top_k[1].tolist()[0]][['artists', 'track_name', 'track_id']]

Unnamed: 0,artists,track_name,track_id
30832,Drake,God's Plan,6DCZcSspjsKoFjzjrWoCdn
8326,Nick Drake,Which Will,4Cg0paoTgxlo2LC85HKaab
8208,Nick Drake,Road,5ir0VEsMI7cLhN6SEiaKol
8325,Nick Drake,Saturday Sun,1drLQNS9D5z9lBP4DbtO2R
72128,Drake,One Dance (feat. Wizkid / Kyla),5ZKG94fnjiuMH5yrC5S9lS


# Final Model

In [37]:
music_index = faiss.read_index('../data/faiss_index.bin')

In [38]:
search_string = 'beatles'

In [39]:
query = model.encode([search_string])
k = 5
top_k = index.search(query, k)

In [40]:
def get_album_cover(spotify_session, track_id):
    try:
        url = spotify_session.track(track_id)['album']['images'][0]['url']
    except:
        url = 'https://www.lifewire.com/thmb/5Y8ggTdQiyLdq9us-IMpsACJP-s=/1500x0/filters:no_upscale():max_bytes(150000):strip_icc()/alert-icon-5807a14f5f9b5805c2aa679c.PNG'
    return url

In [41]:
load_dotenv('../src/creds.env')
client_id = os.getenv('client_id')
client_secret = os.getenv('client_secret')

sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=client_id, client_secret=client_secret))

In [42]:
# Hnswlib
# found_tracks = df.iloc[i[0]][['artists', 'track_name', 'track_id']]

# FAISS
found_tracks = df.iloc[top_k[1].tolist()[0]][['artists', 'track_name', 'track_id']]

In [43]:
found_tracks.reset_index(drop=True, inplace=True)
found_tracks

Unnamed: 0,artists,track_name,track_id
0,The Beatles,Help!,6EdSN1iGtLPhcz43QDRkdK
1,The Beatles,Yellow Submarine,1tdltVUBkiBCW1C3yB4zyD
2,The Beatles,Hey Jude,1eT2CjXwFXNx6oY5ydvzKU
3,The Beatles,A Day In The Life,4XiDAxr6alWzxm24i2Rt4K
4,The Beatles,All You Need Is Love,5zqJlEJcn0EfnvAScH8swK


In [44]:
found_tracks['album_cover'] = found_tracks['track_id'].map(lambda x: get_album_cover(sp, x))
found_tracks

Unnamed: 0,artists,track_name,track_id,album_cover
0,The Beatles,Help!,6EdSN1iGtLPhcz43QDRkdK,https://i.scdn.co/image/ab67616d0000b27330503d...
1,The Beatles,Yellow Submarine,1tdltVUBkiBCW1C3yB4zyD,https://i.scdn.co/image/ab67616d0000b273d807dd...
2,The Beatles,Hey Jude,1eT2CjXwFXNx6oY5ydvzKU,https://i.scdn.co/image/ab67616d0000b27330503d...
3,The Beatles,A Day In The Life,4XiDAxr6alWzxm24i2Rt4K,https://i.scdn.co/image/ab67616d0000b27330503d...
4,The Beatles,All You Need Is Love,5zqJlEJcn0EfnvAScH8swK,https://i.scdn.co/image/ab67616d0000b273d807dd...


In [45]:
html_table = '''
    <!DOCTYPE html>
    <html>
    <head>
    <meta charset="UTF-8">
    <title>Sua Página</title>
    <link href="https://fonts.googleapis.com/css?family=Open+Sans:400,600" rel="stylesheet">

    <style>
    *, *:before, *:after {
    margin: 0;
    padding: 0;
    box-sizing: border-box;
    }

    body {
    background: #ffffff;
    font-family: 'Open Sans', sans-serif;
    }

    table {
    background: #ffffff;
    border-radius: 0.25em;
    border-collapse: collapse;
    margin: 1em;
    }

    th {
    border-bottom: 1px solid #000000;
    color: #000000;
    font-size: 0.85em;
    font-weight: 600;
    padding: 0.5em 1em;
    text-align: center;
    vertical-align: middle;
    }

    td {
    color: #000000;
    font-weight: 400;
    padding: 0.65em 1em;
    text-align: center;
    vertical-align: middle;
    }

    .disabled td {
    color: #959595;
    }

    tbody tr {
    transition: background 0.25s ease;
    }

    tbody tr:hover {
    background: #959595;
    }
    </style>
    </head>
    <body>
    <table align="center">
    <thead>
    <tr>
    <th>Name</th>
    <th>Artists</th>
    <th>Album Cover</th>
    </tr>
    </thead>
    <tbody>
'''

In [46]:
for recommendation in found_tracks.values:
    html_table += f'''
    <tr>
    <td>{recommendation[1]}</td>
    <td>{recommendation[0].replace(';', ' | ')}</td>
    <td><img src="{recommendation[3]}" width="200" height="150"></td>
    </tr>
    '''

In [47]:
html_table += '''
    </tbody>
    </table>
    </body>
    </html>
'''

In [48]:
print(html_table)


    <!DOCTYPE html>
    <html>
    <head>
    <meta charset="UTF-8">
    <title>Sua Página</title>
    <link href="https://fonts.googleapis.com/css?family=Open+Sans:400,600" rel="stylesheet">

    <style>
    *, *:before, *:after {
    margin: 0;
    padding: 0;
    box-sizing: border-box;
    }

    body {
    background: #ffffff;
    font-family: 'Open Sans', sans-serif;
    }

    table {
    background: #ffffff;
    border-radius: 0.25em;
    border-collapse: collapse;
    margin: 1em;
    }

    th {
    border-bottom: 1px solid #000000;
    color: #000000;
    font-size: 0.85em;
    font-weight: 600;
    padding: 0.5em 1em;
    text-align: center;
    vertical-align: middle;
    }

    td {
    color: #000000;
    font-weight: 400;
    padding: 0.65em 1em;
    text-align: center;
    vertical-align: middle;
    }

    .disabled td {
    color: #959595;
    }

    tbody tr {
    transition: background 0.25s ease;
    }

    tbody tr:hover {
    background: #959595;
    }
    </st