## Song Info Retrieval
#### Author: Kenneth Leung
- Using LLM agents to retrieve song information

In [1]:
import csv
import os

import box
import pandas as pd
import yaml
from dotenv import load_dotenv
from langchain.agents import (AgentExecutor, AgentType, OpenAIFunctionsAgent,
                              Tool, initialize_agent)
from langchain.agents.format_scratchpad import \
    format_to_openai_function_messages
from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser
from langchain.callbacks import get_openai_callback
from langchain.chat_models import ChatOpenAI
from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
from langchain.prompts import (ChatPromptTemplate, MessagesPlaceholder,
                               PromptTemplate)
from langchain.tools.render import format_tool_to_openai_function
from langchain_community.tools.ddg_search.tool import DuckDuckGoSearchRun
from langchain_community.utilities.wikipedia import WikipediaAPIWrapper

In [2]:
os.chdir('..')

with open('config/config.yaml', 'r', encoding='utf8') as ymlfile:
    cfg = box.Box(yaml.safe_load(ymlfile))
load_dotenv(dotenv_path=cfg.ENVDIR, verbose=True)

True

In [3]:
df = pd.read_csv('data/input/chart2000-song-2010-decade-0-3-0070.csv')
df.head()

Unnamed: 0,decade,position,artist,song,indicativerevenue,us,uk,de,fr,ca,au
0,2010s,1,Ed Sheeran,Shape Of You,33410.177486,1,1,1,1,1,1
1,2010s,2,Luis Fonsi & Daddy Yankee,Despacito,27489.887061,1,1,1,1,1,1
2,2010s,3,Ed Sheeran,Perfect,25964.69124,1,1,1,1,1,1
3,2010s,4,Mark Ronson & Bruno Mars,Uptown Funk,24203.410576,1,1,3,1,1,1
4,2010s,5,Pharrell Williams,Happy,23873.104765,1,1,1,1,1,1


In [4]:
len(df)

200

___

In [5]:
llm = ChatOpenAI(model="gpt-3.5-turbo-1106", 
                 temperature=0,
                 model_kwargs={"seed": 0}
                 )

In [7]:
# functions = [
#     {
#     "name": "get_song_info",
#     "description": "Retrieves a set of details about a song by a music artiste",
#     "parameters": {
#         "type": "object",
#         "properties": {
#             "lyricist": {
#                 "type": "string",
#                 "description": "Name of the person (or persons) who wrote the lyrics of the song",
#             },
#             "songwriter": {
#                 "type": "string",
#                 "description": "Name of the person (or persons) who composed the song (aka songwriter)",
#             },
#             "producer": {
#                 "type": "string",
#                 "description": "Name of the person (or persons) who produced the song (aka producer)",
#             },
#             "production_company": {
#                 "type": "string",
#                 "description": "Name of the company which produced the song",
#             },
#             "genre": {
#                 "type": "string",
#                 "description": "Genre of the song",
#             },
#             "language": {
#                 "type": "string",
#                 "description": "Main language of the song",
#             },
#         },
#         "required": ["lyricist", "songwriter", "producer", "production_company", "genre", "language"],
#         },
#     },     
# ]

In [6]:
wikipedia = WikipediaAPIWrapper()
wikipedia_tool = Tool(
    name='wikipedia',
    func= wikipedia.run,
    description="Useful for when you need to look up the songwriters, genre, and producers for a song on wikipedia"
)

# search = DuckDuckGoSearchRun()
# duckduckgo_tool = Tool(
#     name='DuckDuckGo_Search',
#     func= search.run,
#     description="Useful for when you need to do a search on the internet to find information that the other tools can't find."
# )

In [7]:
tools = [wikipedia_tool, 
         # duckduckgo_tool
        ]
llm_with_tools = llm.bind(functions=[format_tool_to_openai_function(t) for t in tools])

In [8]:
system_prompt = """
You are a helpful assistant and an expert in all things related to music and songs. 
Your output should only be the answer without additional explanation and in a JSON format 
Example:
'Songwriter': 'John Johnson, Adam Smith', 
'Producers': 'Alex Boh, Betty, Germaine',
'Label': 'Sony Music',
'Genre': 'Disco, Pop',
'Main Language': 'Korean'
"""

In [9]:
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("user", "{input}"),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
    ]
)

In [10]:
agent = (
        {
        "input": lambda x: x["input"],
        "agent_scratchpad": lambda x: format_to_openai_function_messages(
                                    x["intermediate_steps"]
        ),
        }
        | prompt
        | llm_with_tools
        | OpenAIFunctionsAgentOutputParser()
    )

agent_executor = AgentExecutor(agent=agent, 
                               tools=tools, 
                               verbose=False)

In [11]:
def generate_input_prompt(song_title, performer):
    input_prompt = f"""
    You are a helpful assistant and an expert in all things related to music and songs
    
    This is the song to review: 
    - Song title: {song_title}
    - Performing Artiste: {performer}
    
    Based on the above song, answer the following questions: 
    - Who are the songwriters of the song?
    - Who are the producers of the song?
    - What is the name of the record company (aka label)?
    - What is the genre of the song?         
    - What is the main language of the song?
    """

    return input_prompt

In [13]:
output_file = 'data/output/songs_metadata.csv'

for _, row in df.iterrows():
    song = row['song']
    artist = row['artist']
    if os.path.exists(output_file):
        df_song_info = pd.read_csv(output_file, encoding='utf-8')
    else:
        df_song_info = pd.DataFrame(columns=['song', 'artist', 'output', 'cost'])
        df_song_info.to_csv(output_file, index=False)

    existing_ids = df_song_info['song'].unique().tolist()
    if song not in existing_ids:
        print(f'***** Processing {song} by {artist} *****')
        input_prompt = generate_input_prompt(song, artist)
        with get_openai_callback() as cb:
            response = agent_executor.invoke(
                                {"input": input_prompt})
            output = response['output']
            print(output)
            cost = cb.total_cost
    
            new_row_data = {
                'song': song,
                'artist': artist,
                'output': output,  
                'cost': cost,
            }
            
            with open(output_file, 'a', newline='', encoding='utf-8') as file:
                writer = csv.writer(file)
                writer.writerow(new_row_data.values())

***** Processing Perfect by Ed Sheeran *****
{
  "Songwriters": "Ed Sheeran",
  "Producers": "Ed Sheeran, Will Hicks",
  "Label": "Asylum, Atlantic Records",
  "Genre": "Pop, Ballad",
  "Main Language": "English"
}
***** Processing Uptown Funk by Mark Ronson & Bruno Mars *****

{
  "Songwriters": "Mark Ronson, Bruno Mars, Jeff Bhasker, Philip Lawrence",
  "Producers": "Mark Ronson, Bruno Mars, Jeff Bhasker",
  "Label": "RCA Records",
  "Genre": "Funk-Pop, Soul, Boogie, Disco-Pop, Minneapolis Sound",
  "Main Language": "English"
}
***** Processing Happy by Pharrell Williams *****



KeyboardInterrupt

