In [44]:
from langchain_community.document_loaders import TextLoader #this will convert our descriptions into something langchain can understand
from langchain_text_splitters import CharacterTextSplitter # will basically divide the document created into individual descriptions
from langchain_huggingface import HuggingFaceEmbeddings # an open source model we will use to convert text into numerical vectors
from langchain_chroma import Chroma # a vector database to store our vectors

In [45]:
import pandas as pd

In [46]:
# Since textloader doesn't read pandas' dataframes, so we convert the column we are going to use to make the vector database into a text file
songs = pd.read_csv("dataset/final_track_list.csv")
songs["tagged_description"].to_csv("dataset/tagged_description.txt", index=False, header=False)

In [47]:
raw_doc = TextLoader("dataset/tagged_description.txt").load()
# initialize splitter
splitter = CharacterTextSplitter(chunk_size=0, chunk_overlap=0, separator="\n")
# chunk overlap and separator and their values are self-explanatory, for chunk size we set it to zero because we want to prioritize splitting on \n, if we set it to some non-zero size, maybe that size comprised of multiple lines, so splitter wouldn't care for \n.

#apply splitter on the document
doc = splitter.split_documents(raw_doc)

Created a chunk of size 308, which is longer than the specified 0
Created a chunk of size 298, which is longer than the specified 0
Created a chunk of size 337, which is longer than the specified 0
Created a chunk of size 316, which is longer than the specified 0
Created a chunk of size 337, which is longer than the specified 0
Created a chunk of size 314, which is longer than the specified 0
Created a chunk of size 301, which is longer than the specified 0
Created a chunk of size 263, which is longer than the specified 0
Created a chunk of size 289, which is longer than the specified 0
Created a chunk of size 289, which is longer than the specified 0
Created a chunk of size 258, which is longer than the specified 0
Created a chunk of size 281, which is longer than the specified 0
Created a chunk of size 261, which is longer than the specified 0
Created a chunk of size 278, which is longer than the specified 0
Created a chunk of size 272, which is longer than the specified 0
Created a 

In [48]:
# checking if splitting was done correctly
doc[0]

Document(metadata={'source': 'dataset/tagged_description.txt'}, page_content='"2ZzZ2qwZBWlDKs420hhloc A laid-back desi-pop groove with mellow rap verses and smooth vocals. Warm guitar licks and a head-nodding beat lend moderate danceability, while lyrics about cyclical love tug at the heart. Feels reflective and subtly upliftingâ€”perfect for late-night drives or chilled gatherings."')

In [49]:
embedder = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [50]:
# Making the vector database
db_songs = Chroma.from_documents(doc,embedding=embedder)

This is, believe it or not pretty much it for the basic analyser

In [51]:
# Test
query = "An indie song about heartbreak"
response = db_songs.similarity_search(query, k = 5)

In [52]:
response

[Document(id='8c7c44b2-634a-443a-bd58-ea973efe7e5b', metadata={'source': 'dataset/tagged_description.txt'}, page_content='"1kxeWHF9PrCVZHvVskv8lg An experimental track blending electronic beats with themes of heartbreak. Its haunting vocals create a sense of isolation. [Popularity: Kinda popular, but not exactly a hit.] [Genres: rap] A nostalgic song from the 2000s."'),
 Document(id='a823f35c-32ce-45bf-b745-fa08e68494e2', metadata={'source': 'dataset/tagged_description.txt'}, page_content='1kxeWHF9PrCVZHvVskv8lg An experimental track blending electronic beats with themes of heartbreak. Its haunting vocals create a sense of isolation. [Popularity: Decently popular.] [Genres: rap] An old song from my childhood'),
 Document(id='0b3f2e80-2ad7-4204-af5e-90f678ebe059', metadata={'source': 'dataset/tagged_description.txt'}, page_content='"2LCvypYL9a21Hln4A4EdWU Reflective, indie pop. Gentle, melancholic, and hopeful, itâ€™s about loss and acceptance. Best for introspective walks or healing af

Alright now that this is working fine, we need to use the id we previously attached to descriptions to fetch the actual songs and return the songs instead.

In [53]:
songs[songs["id"] == response[0].page_content.split()[0].strip('"')]

Unnamed: 0,id,name,artist,album,release_date,duration_ms,genres,popularity,explicit,description,Unnamed: 10,tagged_description,popularity_description,release_year,release_age,recency_description
860,1kxeWHF9PrCVZHvVskv8lg,Love Lockdown,Kanye West,808s & Heartbreak,2008-11-24,270306,rap,66,False,An experimental track blending electronic beat...,,1kxeWHF9PrCVZHvVskv8lg An experimental track b...,Decently popular.,2008,17,An old song from my childhood


Now that we have configured fetching a song, we can make a function to fetch however many songs we need to fetch in a bundle.

In [54]:
def fetch_songs(query: str, size: int=10) -> pd.DataFrame:
    response = db_songs.similarity_search(query, k=size)
    result = []
    for i in range(0,len(response)):
        result+= [response[i].page_content.split()[0].strip('"')]
    return songs[songs["id"].isin(result)]

In [73]:
fetch_songs("classic funny rap songs", 10)

Unnamed: 0,id,name,artist,album,release_date,duration_ms,genres,popularity,explicit,description,Unnamed: 10,tagged_description,popularity_description,release_year,release_age,recency_description
452,7lQ8MOhq6IN2w8EYcFNSUk,Without Me,Eminem,The Eminem Show,2002-05-26,290320,"hip hop, rap",89,True,"Playful, brash rap. Satirical and confident, w...",,"7lQ8MOhq6IN2w8EYcFNSUk Playful, brash rap. Sat...","Massively popular, a huge hit.",2002,23,A very old classical song from the 90s.
607,75IN3CtuZwTHTnZvYM4qnJ,My Name Is,Eminem,The Slim Shady LP,1999-02-23,268400,"hip hop, rap",76,True,"Playful, irreverent rap. Satirical lyrics and ...",,"75IN3CtuZwTHTnZvYM4qnJ Playful, irreverent rap...","Massively popular, a huge hit.",1999,26,A very old classical song from the 90s.
631,2YzmfPLqUx5CJOaw5ThsBV,Plain Jane REMIX (feat. Nicki Minaj),"A$AP Ferg, Nicki Minaj",Plain Jane REMIX (feat. Nicki Minaj),2017-12-15,202636,rap,71,True,"Hard-hitting, braggadocious hip hop. Bold, che...",,"2YzmfPLqUx5CJOaw5ThsBV Hard-hitting, braggadoc...","Massively popular, a huge hit.",2017,8,An old song from my childhood
780,7yNK27ZTpHew0c55VvIJgm,Dark Fantasy,Kanye West,My Beautiful Dark Twisted Fantasy,2010-11-22,280786,rap,72,True,An ambitious hip-hop track with cinematic prod...,,7yNK27ZTpHew0c55VvIJgm An ambitious hip-hop tr...,"Massively popular, a huge hit.",2010,15,An old song from my childhood
803,5KUNwkaNf8l5A9sXZhiCgI,Rapp Snitch Knishes,"MF DOOM, Mr. Fantastik",MM..FOOD,2004-11-16,172893,"alternative hip hop, east coast hip hop, exper...",77,True,A satirical hip-hop track critiquing self-incr...,,5KUNwkaNf8l5A9sXZhiCgI A satirical hip-hop tra...,"Massively popular, a huge hit.",2004,21,A very old classical song from the 90s.
854,1PS1QMdUqOal0ai3Gt7sDQ,Gold Digger,"Kanye West, Jamie Foxx",Late Registration,2005-08-30,207626,rap,82,True,A satirical take on materialistic relationship...,,1PS1QMdUqOal0ai3Gt7sDQ A satirical take on mat...,"Massively popular, a huge hit.",2005,20,An old song from my childhood


Damn okay these responses are way better than what i expected. Spotify can learn a thing or two from me

I don't know why but I am getting duplicate IDs after messing with some stuff and I can't fix it, so here are 6 responses on 10 requests. (it works fine on gradio though since I didn't mess with that so phew)