In [1]:
from langchain.chat_models import ChatOpenAI
from langchain.agents import tool
from langchain.schema import HumanMessage
from langchain.tools import Tool

import requests
from bs4 import BeautifulSoup
import datetime
import json
import os
import dateparser
import urllib.parse
import tiktoken
from langchain.utilities import GoogleSearchAPIWrapper
with open("./keys.json", 'r') as j:
    keys = json.loads(j.read())
    os.environ["OPENAI_API_KEY"] = keys["OPENAI_API_KEY"]
    os.environ["GOOGLE_API_KEY"] = keys["GOOGLE_API_KEY"]


In [2]:
LLM_FUNCTION_CALLING = ChatOpenAI(model='gpt-3.5-turbo-0613', temperature=0)
LLM = ChatOpenAI(model='gpt-3.5-turbo', temperature=0)
ENCODING = tiktoken.encoding_for_model("gpt-3.5-turbo")


def google_search_playlists(query):
    os.environ["GOOGLE_CSE_ID"] = keys["GOOGLE_CSE_ID_SPOTIFY"]
    gs = GoogleSearchAPIWrapper()
    search = Tool(
        name="Google Search",
        description="Search Google for spotify playlists",
        func= lambda query: gs.results(query, 3),
    )
    return [s["link"] for s in search.run(query)]

def google_search_image(query):
    query = urllib.parse.quote(query)
    url = f"https://customsearch.googleapis.com/customsearch/v1?cx={keys['GOOGLE_CSE_ID_IMAGE']}&imgSize=LARGE&imgType=photo&q={query}&searchType=image&key={keys['GOOGLE_API_KEY']}"
    resp = requests.get(url).json()
    return resp["items"][0]["link"]
    


def num_tokens_from_string(string: str) -> int:
    """Returns the number of tokens in a text string."""
    num_tokens = len(ENCODING.encode(string))
    return num_tokens

In [4]:
google_search_image("freeze corleone l'attaque des clone")

'https://i1.sndcdn.com/artworks-pIXpVdIKMRx0cicf-4PzCpA-t500x500.jpg'

In [3]:
def get_text_from_url(url: str) -> str:
    """
    get all the text of a webpage from the url
    """
    headers = { 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0' }
    html = requests.get(url, headers=headers).text
    soup = BeautifulSoup(html, features="html.parser")
    text = soup.get_text()
    lines = (line.strip() for line in text.splitlines())
    return '\n'.join(line for line in lines if line)

function_descriptions = [
            {
                "name": "get_album_list_from_text",
                "description": "Get a list of album release from a text coming from a webpage",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "albums": {
                            "type": "array",
                            "description": "the name of the released album",
                            "items":{
                                "type":"object",
                                "properties":{
                                    "title" : {
                                        "type": "string",
                                        "description" :"the title of the album",
                                    },
                                    "artist" : {
                                        "type": "string",
                                        "description" :"the artist of the album",
                                    },
                                    "date" : {
                                        "type": "string",
                                        "description" :f"the release date of the album. The date format should be day month year without coma for example 21 May 2023. Add the year {datetime.datetime.now().year} if no year is supplied",
                                    },

                                }    
                            }
                        }
                    },
                    "required": ["albums"],
                },
            },
        ] 


def get_album_list_from_url(url):
    """
    The current function makes two query : one to get the response, one to parse it with function calling
    Theoretically everything could be performed in one direct query with function calling but it is not working well at the moment.
    My theory is that the function calling structure take parts of the context lenght so that the prompt is truncated and not every albums are returned
    I would expect that using gpt4 with function calling would make in work in a single query
    """
    text = get_text_from_url(url)
    user_request = f"""
    Can you get a list of all the album releases from this text, for each album give the the title, the artist and the relase date, sort album by descending date. The date format should be day month year without coma for example 21 May 2023: {text}. Add the year {datetime.datetime.now().year} if no year is supplied
    """
    print(num_tokens_from_string(user_request))
    non_structured_album_list = LLM.predict_messages([HumanMessage(content=user_request)]).content
    #print(non_structured_album_list)
    user_request = f"""
    Can you get a list of all the album releases from this text {non_structured_album_list}. The date format should be day month year without coma for example 21 May 2023: {text}. Add the year {datetime.datetime.now().year} if no year is supplied
    """
    print(num_tokens_from_string(user_request))
    structured_album_list = LLM_FUNCTION_CALLING.predict_messages([HumanMessage(content=user_request)], functions=function_descriptions)
    #print(structured_album_list)
    return json.loads(structured_album_list.additional_kwargs["function_call"]["arguments"])["albums"]

def string_to_timestamp(string, format):
    return datetime.datetime.strptime(string, format)

def create_playlist_title_from_album(album):
    user_request = f"""creé un titre court pour une playlist spotify qui porte sur l'album : {album["title"]}, et l'artiste {album["artist"]}"""
    playlist_title = LLM.predict_messages([HumanMessage(content=user_request)]).content
    return playlist_title

def create_playlist_description_from_album(album):
    user_request = f"""Crée un description courte d'une phrase maximum pour un playlist spotify qui porte sur l'album : {album["title"]}, et l'artiste {album["artist"]}"""
    playlist_title = LLM.predict_messages([HumanMessage(content=user_request)]).content
    return playlist_title

def create_upcoming_release_playlist_candidates_from_url(url):
    playlist_candidates= []
    album_list = get_album_list_from_url(url)
    unreleased_album_list = parse_album_list(album_list)
    for a in unreleased_album_list:
        title = create_playlist_title_from_album(a)
        description= create_playlist_description_from_album(a)
        influences = google_search_playlists(f"{a['artist']}, {a['title']} playlist")
        image = google_search_image(f"{a['artist']}, {a['title']}")
        playlist_candidates.append({"title": title, "description": description, "influences":",".join(influences), "additional infos":f"Playlist created for release {a['title']} by {a['artist']}", "image": image})
    return playlist_candidates
    
    
def parse_album_list(album_list):
    album_list_with_parsed_date = []
    for a in album_list:
        date = dateparser.parse(a["date"])
        a["date"] = date
        # keep only parsable date
        if date:
            album_list_with_parsed_date.append(a)
    unreleased_album_list = [a for a in album_list_with_parsed_date if a["date"]>datetime.datetime.today()]
    print(unreleased_album_list)
    return unreleased_album_list



In [4]:
create_upcoming_release_playlist_candidates_from_url("https://www.trackmusik.fr/media/albums-rap-francais")

1314
2146
[{'title': "L'Attaque Des Clones", 'artist': 'Freeze Corleone', 'date': datetime.datetime(2023, 9, 11, 0, 0)}, {'title': 'ADIEU', 'artist': 'KIK', 'date': datetime.datetime(2023, 9, 8, 0, 0)}]


[{'title': '"Clone Attack: Freeze Corleone"',
  'description': '"Plongez dans l\'univers sombre et captivant de l\'album \'L\'Attaque Des Clones\' de l\'artiste Freeze Corleone avec cette playlist intense et percutante."',
  'influences': 'https://open.spotify.com/playlist/2aFedFncWFocj7fKNeIsBW,https://open.spotify.com/playlist/2Wle1vgoUPgJNv2JAwpGXY,https://open.spotify.com/artist/76Pl0epAMXVXJspaSuz8im',
  'additional infos': "Playlist created for release L'Attaque Des Clones by Freeze Corleone",
  'image': 'https://i1.sndcdn.com/artworks-pIXpVdIKMRx0cicf-4PzCpA-t500x500.jpg'},
 {'title': '"ADIEU: KIK\'s Melodic Farewell"',
  'description': '"Plongez dans l\'univers mélancolique et captivant de l\'artiste KIK avec sa playlist ADIEU, où chaque chanson vous transporte dans un voyage émotionnel unique."',
  'influences': 'https://open.spotify.com/playlist/37i9dQZF1DZ06evO2h94tp,https://open.spotify.com/playlist/13EmQEpspghofPpx9zT6Ig,https://open.spotify.com/artist/3U0pUvFUMv6gJiTFnXiu

In [7]:
create_upcoming_release_playlist_candidates_from_url("https://www.billboard.com/lists/new-albums-2023-calendar-new-music-releases-this-year/june-3/")

1978
2299
[{'title': 'Various artists - Yellowjackets season two soundtrack', 'artist': 'Various artists', 'date': datetime.datetime(2023, 9, 1, 0, 0)}, {'title': 'Olivia Rodrigo - GUTS', 'artist': 'Olivia Rodrigo', 'date': datetime.datetime(2023, 9, 8, 0, 0)}, {'title': 'V - Layover', 'artist': 'V', 'date': datetime.datetime(2023, 9, 8, 0, 0)}, {'title': 'Demi Lovato - Revamped', 'artist': 'Demi Lovato', 'date': datetime.datetime(2023, 9, 15, 0, 0)}, {'title': 'Diddy - The Love Album: Off the Grid', 'artist': 'Diddy', 'date': datetime.datetime(2023, 9, 15, 0, 0)}, {'title': 'Kelly Clarkson - Chemistry (Deluxe)', 'artist': 'Kelly Clarkson', 'date': datetime.datetime(2023, 9, 22, 0, 0)}, {'title': 'Kylie Minogue - Tension', 'artist': 'Kylie Minogue', 'date': datetime.datetime(2023, 9, 22, 0, 0)}, {'title': "Taylor Swift - 1989 (Taylor's Version)", 'artist': 'Taylor Swift', 'date': datetime.datetime(2023, 10, 27, 0, 0)}, {'title': 'Various artists - A Tribute to The Judds', 'artist': 'Va

[{'title': '"Yellowjackets S2 OST: Eclectic Vibes"',
  'description': 'La playlist ultime pour les fans de la série Yellowjackets saison deux, avec la bande originale captivante interprétée par divers artistes.',
  'influences': 'https://open.spotify.com/track/3L2LHQIzwLshvWKz07NZv5,https://open.spotify.com/album/1khiHkdRH0RX3dOf2ZTKXY,https://open.spotify.com/album/44MoiIXWGc5WhwB7AJwnCB',
  'additional infos': 'Playlist created for release Various artists - Yellowjackets season two soundtrack by Various artists',
  'image': 'https://img.broadtime.com/418467253006:500.webp'},
 {'title': '"GUTS: Olivia Rodrigo\'s Musical Journey"',
  'description': '"Plongez dans l\'univers émotionnellement puissant d\'Olivia Rodrigo avec son album GUTS, où sa voix captivante et ses paroles sincères vous transportent dans un voyage introspectif."',
  'influences': 'https://open.spotify.com/playlist/4aurFBWbfxIOae2vkkdAP9,https://open.spotify.com/artist/1McMsnEElThX1knmY4oliG,https://open.spotify.com/pl