In [201]:
# INITS + IMPORTS
from typing import List
from openai import OpenAI
import spotipy
from spotipy.oauth2 import SpotifyOAuth
import os
from dotenv import load_dotenv
from datasets import load_dataset
import pandas as pd
import dataclasses


BASE_URL = "http://199.94.61.113:8000/v1/"
API_KEY=api_key="yen.k@northeastern.edu:79wlaJxIMpzvkt61S8Xi"
assert BASE_URL is not None
assert API_KEY is not None

client = OpenAI(base_url=BASE_URL, api_key=API_KEY)

load_dotenv() 

# Set up the client id and client secret (you can find the client secret 
# from the spotify dev page)
CLIENT_ID = os.getenv("SPOTIFY_CLIENT_ID")
CLIENT_SECRET = os.getenv("SPOTIFY_CLIENT_SECRET")
REDIRECT_URI = 'http://localhost:8888/callback'
SCOPE = 'playlist-modify-public user-modify-playback-state user-top-read'

# Create a spotify object passing the client id, client secret, 
# redirct url (which doesn't matter, just set it as your local host
# as shown below), and scope
spotify = spotipy.Spotify(auth_manager=SpotifyOAuth(client_id=CLIENT_ID,
                                               client_secret=CLIENT_SECRET,
                                               redirect_uri=REDIRECT_URI,
                                               scope=SCOPE,
                                               cache_path=".cache-<your-username>"))


# TODO define the data classes

GENRES = ['acoustic', 'afrobeat', 'alt-rock', 'alternative', 'ambient', 'anime', 'black-metal', 'bluegrass', 'blues', 'bossanova', 'brazil', 'breakbeat', 'british', 'cantopop', 'chicago-house', 'children', 'chill', 'classical', 'club', 'comedy', 'country', 'dance', 'dancehall', 'death-metal', 'deep-house', 'detroit-techno', 'disco', 'disney', 'drum-and-bass', 'dub', 'dubstep', 'edm', 'electro', 'electronic', 'emo', 'folk', 'forro', 'french', 'funk', 'garage', 'german', 'gospel', 'goth', 'grindcore', 'groove', 'grunge', 'guitar', 'happy', 'hard-rock', 'hardcore', 'hardstyle', 'heavy-metal', 'hip-hop', 'holidays', 'honky-tonk', 'house', 'idm', 'indian', 'indie', 'indie-pop', 'industrial', 'iranian', 'j-dance', 'j-idol', 'j-pop', 'j-rock', 'jazz', 'k-pop', 'kids', 'latin', 'latino', 'malay', 'mandopop', 'metal', 'metal-misc', 'metalcore', 'minimal-techno', 'movies', 'mpb', 'new-age', 'new-release', 'opera', 'pagode', 'party', 'philippines-opm', 'piano', 'pop', 'pop-film', 'post-dubstep', 'power-pop', 'progressive-house', 'psych-rock', 'punk', 'punk-rock', 'r-n-b', 'rainy-day', 'reggae', 'reggaeton', 'road-trip', 'rock', 'rock-n-roll', 'rockabilly', 'romance', 'sad', 'salsa', 'samba', 'sertanejo', 'show-tunes', 'singer-songwriter', 'ska', 'sleep', 'songwriter', 'soul', 'soundtracks', 'spanish', 'study', 'summer', 'swedish', 'synth-pop', 'tango', 'techno', 'trance', 'trip-hop', 'turkish', 'work-out', 'world-music']

In [500]:
@dataclasses.dataclass
class AgentResponse:
    """
    The superclass for all agent responses.
    """
    text: str

@dataclasses.dataclass
class GetRecommendationsResponse(AgentResponse):
    """
    The agent used the `get_recommendations` tool and found the following recommendations.
    """
    recommendations: List[any]


@dataclasses.dataclass
class AddArtistResponse(AgentResponse):
    """
    The agent added the following artist to the seed artists.
    """
    artist_seeds: List[str]

@dataclasses.dataclass
class AddTracksResponse(AgentResponse):
    """
    The agent added the following tracks to the seed tracks.
    """
    track_seeds: List[str]

@dataclasses.dataclass
class AddGenresResponse(AgentResponse):
    """
    The agent added the following genres to the seed genres.
    """
    genre_seeds: List[str]

@dataclasses.dataclass
class ModifyMoodResponse(AgentResponse):
    """
    The agent modified the mood to the following values.
    """
    valence: float
    energy: float
    danceability: float

@dataclasses.dataclass
class AddToPlaylistResponse(AgentResponse):
    """
    The agent added the following tracks to the playlist.
    """
    tracks: List[str]

@dataclasses.dataclass
class TextResponse(AgentResponse):
    pass

class SpotifyAgent:
    conversation: List[dict]
    client: OpenAI
    spotify: SpotifyOAuth
    playlist_id: str
    seed_artists: List[str]
    seed_tracks: List[str]
    seed_genres: List[str]
    possible_genres: List[str]
    # MOOD DETERMINATORS
    target_valence: float # 0.0 to 1.0 = musical positiveness conveyed by a track. high valence = positive
    target_energy: float # 0.0 to 1.0 = perceptual measure of intensity and activity. high energy = fast, loud, noisy
    target_danceability: float # 0.0 to 1.0 = how suitable a track is for dancing. high danceability = easy to dance to
    spoitfy_tracks_df: pd.DataFrame
    system_prompt = """
You are a helpful and friendly Spotify chatbot. Respond to queries with a single Python block of code that uses the following functions:
def add_to_seed_artists(artist_name):
    ...
def add_to_seed_tracks(track_name):
    ...
def add_to_seed_genres(genre_name):
    ...
def add_tracks_to_playlist(track_titles_or_uris):
    ...
# valence, energy, danceability are floats between 0 and 1
def modify_mood(valence: float, energy: float, danceability: float):
    ...
Return the result in a variable called result.
Users might thank you or chat with topics unrelated to the functions.
You might need to use multiple function stubs. For example if the user mentions a track and by an artist. 
Do not redefine the function stubs just use this existing method.
Don't correct their questions.
"""
    few_shot_prompt = "I like the artist bobbin."
    few_shot_response = """
```python
result = add_to_seed_artists("bobbin")
print(result)
```
"""
    few_shot_prompt2 = "Get me reccomendations."
    few_shot_response2 = """
```python
result = get_recommendations()
print(result)
```
"""
    few_shot_response_another = """
Here are some recommendations for you:
Love Story uri: spotify:track:1CkvWZme3pRgbzaxZnTl5X
Blank Space uri: spotify:track:1p80LdxRV74UKvL8gnD7ky
"""
    few_shot_prompt3 = "Add the second song to the playlist."
    few_shot_response3 = """
```python
result = add_tracks_to_playlist("spotify:track:1p80LdxRV74UKvL8gnD7ky")
print(result)
```
"""
    few_shot_prompt4 = "Add the song True Blue to the playlist."
    few_shot_response4 = """
```python
result = add_tracks_to_playlist("True Blue")
print(result)
```
"""
    # clear playlist if exists, else create a new one
    def create_playlist(self):
        user_id = self.spotify.current_user()['id']  
        playlist_name = 'Spotify Chatbot Session'
        playlist_description = 'A playlist created by the Spotify Chatbot.'
        # Get the current user's playlists
        playlists = self.spotify.current_user_playlists()

        # Look for the playlist by name
        existing_playlist = None
        for playlist in playlists['items']:
            if playlist['name'] == playlist_name:
                existing_playlist = playlist
                break

        if existing_playlist:
            # If the playlist exists, take action: delete all tracks from it
            playlist_id = existing_playlist['id']
            print(f"Found existing playlist '{playlist_name}', clearing tracks...")
            
            # Get all tracks
            track_uris = []
            results = self.spotify.playlist_items(playlist_id)
            for item in results['items']:
                if item and item['track']:
                    track_uris.append(item['track']['uri'])
            # Remove all tracks
            if track_uris:
                self.spotify.playlist_remove_all_occurrences_of_items(playlist_id, track_uris)
                print(f"All tracks removed from playlist '{playlist_name}'.")
            return playlist_id
        else:
            # If the playlist does not exist, create a new one
            print(f"Playlist '{playlist_name}' does not exist. Creating a new one...")
            res = self.spotify.user_playlist_create(user=user_id, 
                                    name=playlist_name, 
                                    public=True, 
                                    description=playlist_description)
            print(f"Playlist '{playlist_name}' created successfully.")
            return res['id']

    def get_track_uri(self, track_name):
        results = self.spotify.search(q='track:' + track_name, type='track')
        items = results['tracks']['items']
        if items:
            return items[0]['uri']
        return None
    # Add one song or multiple songs to current playlist
    def add_tracks_to_playlist(self, track_titles_or_uris):
        if type(track_titles_or_uris) is not list:
            track_titles_or_uris = [track_titles_or_uris]

        track_uris = []
        for track in track_titles_or_uris:
            if track.startswith('spotify:track:'):
                track_uris.append(track)
            else: 
                result = self.get_track_uri(track)
                if result:
                    track_uris.append(result)
        spotify.playlist_add_items(self.playlist_id, track_uris)
        return track_titles_or_uris

    # SUBSTITUTE RECCOMENDATIONS ALGORITHM FOR SPOTIFY API... WE COULD IMPROVE THIS LATER
    def recommend_songs(self, seed_artists=None, seed_tracks=None, seed_genres=None, valence=None, energy=None, danceability=None, limit=5):
        recommendations = self.spotify_tracks_df
        
        if seed_artists:
            artist_recommendations = recommendations[recommendations['artists'].isin(seed_artists)]
        else:
            artist_recommendations = pd.DataFrame()

        
        if seed_tracks:
            track_recommendations = recommendations[recommendations['track_id'].isin(seed_tracks)]
        else:
            track_recommendations = pd.DataFrame()

        if seed_genres:
            genre_recommendations = recommendations[recommendations['track_genre'].isin(seed_genres)]
        else:
            genre_recommendations = pd.DataFrame()
        

        # Use artist_recommendations if available, else track_recommendations, else the full dataset

        if not artist_recommendations.empty:
            recommendations = artist_recommendations
        elif not track_recommendations.empty:
            recommendations = track_recommendations
        elif not genre_recommendations.empty:
            recommendations = genre_recommendations
        else:
            recommendations = self.spotify_tracks_df

        if valence is not None:
            recommendations = recommendations[recommendations['valence'] >= valence]
        
        if energy is not None:
            recommendations = recommendations[recommendations['energy'] >= energy]
        
        if danceability is not None:
            recommendations = recommendations[recommendations['danceability'] >= danceability]

        if recommendations.empty:
            recommendations = self.spotify_tracks_df
        result = []
        for index, track in recommendations.head(limit).iterrows():
            artists = track['artists'].split(';')
            track_info = {'name': track['track_name'], 'artists': artists, 'uri': "spotify:track:" + track['track_id']}
            result.append(track_info)
        return result

    # Get recommendations based on seeds
    def get_recommendations(self):
        limit = 10
        # Regardless of mood values you need seeds to get recommendations
        if len(self.seed_artists) == 0 and len(self.seed_tracks) == 0 and len(self.seed_genres) == 0:
            if self.target_valence == None and self.target_energy == None and self.target_danceability == None:
                print("No seeds or Mood provided. Returning top tracks.")
                top_tracks = spotify.current_user_top_tracks(limit=limit, offset=0, time_range='short_term')['items']
                formatted_top_tracks = []
                for track in top_tracks:
                    track_info = {'name': track['name'], 'artists': [artist['name'] for artist in track['artists']], 'uri': track['uri']}
                    formatted_top_tracks.append(track_info)
                return formatted_top_tracks


        recommendations = self.recommend_songs(
            seed_artists=self.seed_artists,
            seed_tracks=self.seed_tracks,
            seed_genres=self.seed_genres,
            valence=self.target_valence,
            energy=self.target_energy,
            danceability=self.target_danceability,
            limit=limit)
        return recommendations
    
    def add_to_seed_artists(self, artist_name):
        # Perform a search query for the artist
        results = spotify.search(q='artist:' + artist_name, type='artist')
        items = results['artists']['items']
        if items:
            artist = items[0]
            # self.seed_artists.append(artist['uri']) # add first artist
            self.seed_artists.append(artist['name']) # reccomendations can only do name now
        
         # TODO Figure out error handling

    def add_to_seed_tracks(self, track_name):
        results = spotify.search(q='track:' + track_name, type='track')
        items = results['tracks']['items']
        if items:
            track = items[0]
            # self.seed_tracks.append(track['uri'])
            self.seed_tracks.append(track['name'])
    
    def add_to_seed_genres(self, genre_name):
        if genre_name.lower() in self.possible_genres:
            self.seed_genres.append(genre_name.lower())

    def modify_mood(self, valence: float, energy: float, danceability: float):
        self.target_valence = valence
        self.target_energy = energy
        self.target_danceability = danceability

    def get_artist(self, artist_uri): 
        return spotify.artist(artist_uri)
    
    def get_track(self, track_uri):
        return spotify.track(track_uri)

    def extract_code(self, resp_text):
        code_start = resp_text.find("```")
        code_end = resp_text.rfind("```")
        if code_start == -1 or code_end == -1:
            return "pass"
        
        return resp_text[code_start + 3 + 7:code_end]
    
    def run_code(self, code_text):
        globals = { 
            "add_tracks_to_playlist": self.add_tracks_to_playlist, 
            "get_recommendations": self.get_recommendations,
            "add_to_seed_artists": self.add_to_seed_artists,
            "add_to_seed_tracks": self.add_to_seed_tracks,
            "add_to_seed_genres": self.add_to_seed_genres,
            "modify_mood": self.modify_mood,
        }
        exec(code_text, globals)
        return globals["result"]
    
    def say(self, user_message: str) -> AgentResponse:
        if len(self.conversation) > 20:
            self.conversation.pop(1)
            self.conversation.pop(2)
        # Add the user message to the conversation.
        self.conversation.append({"role": "user", "content": user_message})
        
        # Get the response from the model.
        resp = self.client.chat.completions.create(
            messages = self.conversation,
            model = "meta-llama/Meta-Llama-3.1-8B-Instruct",
            temperature=0)
        
        resp_text = resp.choices[0].message.content

        print('RESP TEXT', resp_text)
        self.conversation.append({"role": "system", "content": resp_text })
        code_text = self.extract_code(resp_text)
        # TODO if prompt was relating to add seed artists, delete from conversation
        # store conversation to display
        try:
            res = self.run_code(code_text)
        except: 
            print('ERROR')
            return TextResponse(text='An Error occured:' + resp_text)
        
        # TODO perhaps a list of responses
        responses = []
        if "get_recommendations" in code_text:
            system_recc_resp = "Here are some recommendations for you:"
            user_recc_resp = "Here are some recommendations for you:"
            for track in res:
                system_recc_resp += '\n' + track['name'] + ' uri: ' + track['uri']
                # user_recc_resp += '\n' + track['name'] + ' by ' + track['artists'][0]['name']
                user_recc_resp += '\n' + track['name'] + ' by ' + "' ".join(track['artists'])
            self.conversation.append({"role": "system", "content": system_recc_resp})
            responses.append(GetRecommendationsResponse(text=user_recc_resp, recommendations=res))
        if "add_to_seed_artists" in code_text:
            new_artist = self.seed_artists[-1]
            # message += f"Added {self.get_artist(new_artist)['name']} to seed artists.\n"
            message = f"Added {new_artist} to seed artists."
            responses.append(AddArtistResponse(text=message, artist_seeds=self.seed_artists))
        if "add_to_seed_tracks" in code_text:
            # message = f"Added {self.get_track(new_track)['name']} to seed tracks."
            new_track = self.seed_tracks[-1]
            message = f"Added {new_track} to seed tracks."
            responses.append(AddTracksResponse(text=message, track_seeds=self.seed_tracks))
        if "add_to_seed_genres" in code_text:
            new_genre = self.seed_genres[-1]
            message = f"Added {new_genre} to seed genres."
            responses.append(AddGenresResponse(text=message, genre_seeds=self.seed_genres))
        if "modify_mood" in code_text:
            message = f"Modified mood to valence: {self.target_valence}, energy: {self.target_energy}, danceability: {self.target_danceability}."
            responses.append(ModifyMoodResponse(text=message, valence=self.target_valence, energy=self.target_energy, danceability=self.target_danceability))
        if "add_tracks_to_playlist" in code_text:
            message = "Added tracks to playlist.\n" # some how also add a spotify iframe... 
            responses.append(AddToPlaylistResponse(text=message, tracks=res))
        if "get_recommendations" not in code_text \
            and "add_to_seed_artists" not in code_text \
                and "add_to_seed_tracks" not in code_text \
                    and "add_to_seed_genres" not in code_text \
                        and "modify_mood" not in code_text \
                            and "add_tracks_to_playlist" not in code_text:
            responses.append(TextResponse(text=res))
        return responses
    
    def __init__(self, client: OpenAI, spotify: SpotifyOAuth):
        self.client = client
        self.spotify = spotify
        self.playlist_id = self.create_playlist() # Create a playlist for the session
        # self.possible_genres = spotify.recommendation_genre_seeds()['genres'] request too slow
        self.possible_genres = GENRES
        self.conversation = [{ "role": "system", "content": self.system_prompt },
                             { "role": "user", "content": self.few_shot_prompt },
                             { "role": "system", "content": self.few_shot_response}, 
                             { "role": "user", "content": self.few_shot_prompt2 },
                             { "role": "system", "content": self.few_shot_response2 },
                             { "role": "system", "content": self.few_shot_response_another},
                             { "role": "user", "content": self.few_shot_prompt3 },
                             { "role": "system", "content": self.few_shot_response3}]
        # TODO? add back fewshot response 4?
        self.seed_artists = []
        self.seed_tracks = []
        self.seed_genres = []
        self.target_valence = None
        self.target_energy = None
        self.target_danceability = None

        # Load the dataset
        # https://huggingface.co/datasets/maharshipandya/spotify-tracks-dataset/viewer/default/train?q=4JyT0CxxEic1JhENHbXfR1
        self.spotify_tracks_df = load_dataset("maharshipandya/spotify-tracks-dataset", split="train").to_pandas()
        # self.current_mood = 0


In [495]:
agent = SpotifyAgent(client, spotify)
# agent.add_to_seed_artists("Taylor Swift")
# agent.add_to_seed_artists("Big Red Machine")

# res = agent.say('I love the song don\'t stop me now!')

res = agent.say("get me some recommendations")
res = agent.say("Thank you")
print('HELLOS', res)
# res = agent.say("i like pop music")
# res = agent.say("get me some recommendations")
# res = agent.say("I like the artist taylor swif.")
# # res = agent.say("I want to listen to punk music")
# res = agent.say("get me some recomendations")
# res = agent.say("i like the song kyoto by the artist phoebe bridgers")

# res = agent.say("add all the first two songs to the playlist")
# res = agent.say('add the song Crazy Train to the playlist')
# res = agent.say('im feeling really jittery and i want to calm down')
# res = agent.say("get me some recomendations")
# res = agent.say("add all songs to playlist")

# agent.add_tracks_to_playlist(["spotify:track:6vJaXvEXh6HOeiEZLFDhrp", "spotify:track:6X7pjnNm5dHSUot6hp3KFU"])


Found existing playlist 'Spotify Chatbot Session', clearing tracks...
RESP TEXT ```python
result = get_recommendations()
print(result)
```
No seeds or Mood provided. Returning top tracks.
[{'name': 'Two Step Plan', 'artists': ['Hollie Col'], 'uri': 'spotify:track:2gy4PyehpOiEPb0WzXRKYH'}, {'name': 'teenage dirtbag', 'artists': ['Cat Burns'], 'uri': 'spotify:track:7DMJ3MN5qwncdPUuZAtpRg'}, {'name': 'She Knows', 'artists': ['Hollie Col'], 'uri': 'spotify:track:1YmZPhHGz1N64jVsgzOYtO'}, {'name': 'Unholy', 'artists': ['Hollie Col'], 'uri': 'spotify:track:4c4jW8pyMAgTvJqxMBxJvL'}, {'name': 'odoriko - Cover', 'artists': ['Kita Nanachi'], 'uri': 'spotify:track:5KzRmsiul5ti9MN3MXz1il'}, {'name': "California Dreamin'", 'artists': ['The Mamas & The Papas'], 'uri': 'spotify:track:1ZEOIhSn6BKErV59bIgn76'}, {'name': "The Ballad of the Witches' Road (Agatha Through Time Version)", 'artists': ['Kathryn Hahn', 'Sasheer Zamata', 'Ali Ahn', 'Patti LuPone', 'Agatha All Along - Cast'], 'uri': 'spotify:tra

In [503]:
import yaml
import dataclasses
from pathlib import Path

@dataclasses.dataclass
class EvaluationResult():
    num: float
    conversation: List[dict]

def eval_agent(client: OpenAI, steps: List[dict], spotify: SpotifyOAuth) -> float:
    """
    Evaluate the agent on the given benchmark YAML file.
    """
    agent = SpotifyAgent(client, spotify)
    for n, step in enumerate(steps):
        responses = agent.say(step["prompt"])
        if not isinstance(responses, list): 
            responses = [responses]
        # Ensure that the right number of Functions Stubs were called in a multistep conversation
        if "expected_responses" in step:
            expected_responses = step["expected_responses"]
        else: 
            expected_responses = [step]
        if len(responses) != len(expected_responses):
            return EvaluationResult(n / len(steps), agent.conversation)

        for response, expected_response in zip(responses, expected_responses):
            expected_type = expected_response["expected_type"]
            if expected_type == "text":
                if not isinstance(response, TextResponse):
                    return EvaluationResult(n / len(steps), agent.conversation)
            elif expected_type == "get-recommendations":
                if not isinstance(response, GetRecommendationsResponse):
                    return EvaluationResult(n / len(steps), agent.conversation)
            elif expected_type == "add-artist":
                if not isinstance(response, AddArtistResponse):
                    return EvaluationResult(n / len(steps), agent.conversation)
                if response.artist_seeds != expected_response["expected_result"]:
                    return EvaluationResult(n / len(steps), agent.conversation)
                break
            elif expected_type == "add-tracks":
                if not isinstance(response, AddTracksResponse):
                    return EvaluationResult(n / len(steps), agent.conversation)
                if response.track_seeds !=  expected_response["expected_result"]:
                    return EvaluationResult(n / len(steps), agent.conversation)
            elif expected_type == "add-genres":
                if not isinstance(response, AddGenresResponse):
                    return EvaluationResult(n / len(steps), agent.conversation)
                if response.genre_seeds !=  expected_response["expected_result"]:
                    return EvaluationResult(n / len(steps), agent.conversation)
            elif expected_type == "modify-mood":
                if not isinstance(response, ModifyMoodResponse):
                    return EvaluationResult(n / len(steps), agent.conversation)
            elif expected_type == "add-to-playlist":
                if not isinstance(response, AddToPlaylistResponse):
                    return EvaluationResult(n / len(steps), agent.conversation)
                # TODO: could multisteps results in an error here?
                if "expected_result" in step and response.tracks != step["expected_result"]:
                    return EvaluationResult(n / len(steps), agent.conversation)
                if "expected_size" in step and len(response.tracks) != step["expected_size"]:
                    return EvaluationResult(n / len(steps), agent.conversation)
    return EvaluationResult(1.0, agent.conversation)   



def load_yaml(p):
    with open(p, 'r') as file:  # Open the file and pass the file object
        return yaml.safe_load(file)  # Pass the file object to yaml.safe_load

client = OpenAI(base_url=BASE_URL, api_key=API_KEY)  
files =  sorted(Path("./benchmarks").glob("*.yaml"))
all_benchmarks = [load_yaml(p) for p in files]
for index, benchmark in enumerate(all_benchmarks):  
    print(index + 1, eval_agent(client, benchmark, spotify)) 


Found existing playlist 'Spotify Chatbot Session', clearing tracks...
RESP TEXT ```python
result = "Nice to meet you too!"
print(result)
```
Nice to meet you too!
RESP TEXT ```python
result = add_to_seed_artists("Backstreet Boys")
print(result)
```
None
RESP TEXT ```python
result = "You're welcome! I'm here to help."
print(result)
```
You're welcome! I'm here to help.
RESP TEXT ```python
result = add_to_seed_artists("*NSYNC")
print(result)
```
None
RESP TEXT ```python
result = add_to_seed_tracks("'I Want It That Way'")
print(result)
```
None
RESP TEXT ```python
result = add_to_seed_genres("punk")
print(result)
```
None
RESP TEXT ```python
result = "That sounds like an amazing experience! I hope you had a blast!"
print(result)
```
That sounds like an amazing experience! I hope you had a blast!
RESP TEXT ```python
result = get_recommendations()
print(result)
```
[{'name': 'I Want It That Way', 'artists': ['Backstreet Boys'], 'uri': 'spotify:track:47BBI51FKFwOMlIiX6m8ya'}]
RESP TEXT ```py

In [101]:

# res = agent.say("I like phoebe bridgers. Get me some reccomendations.")
# print(res)
# res = agent.say("I like the hollie col.")
# res = agent.say("I like the artist taylor swif.")
# res = agent.say("I want to listen to punk music")
# res = agent.say("get me some recomendations")
# res = agent.say("add all the first two songs to the playlist")
# res = agent.say('add the song Crazy Train to the playlist')
# res = agent.say('im feeling really jittery and i want to calm down')
# res = agent.say("get me some recomendations")
# res = agent.say("add all songs to playlist")


# ENSURE GRACEFUL ERROR HANDLING ON APP
# MAKE INTERFACE LOOK NICER
# GRACEFULLY HANDLE ERRORS

# TODO: make the bot more conversational
# TODO: figure out to combine prompts... what would that look like
# TODO MODIFY CHATIFY SO THAT THE RIGHT VALUES ARE USED
# TODO: having trouble handling: can you get me some sad songs
# TODO: having trouble handling: i'm feeling sad. can you reccomend me some songs
# TODO: it is having trouble combining prompts!!!! i like noah kahan. get me some reccomendations ... figure out how we want to return this
# TODO: ask for chat gpt 4.0
# NOTES
# TODO: spelling errors are handled by spotify
# TODO: handling artists that aren't so well known
# TODO: handle removing from the playlist
# TODO: add more complex functionality
# Add all the variable stuff similar to the travel agent, also add benchmark tests.
# do we actually need to print anything?
# Really slow getting reccomendations after adding seeds... come back later? too many requests?
# maybe don't override playlist
# TODO figure out deletion and how to display
# if this is the first prompt it would know how to handle it<add all songs to playlist>

In [None]:

def eval_agent(client: OpenAI, steps: List[dict], spotify: SpotifyOAuth) -> float:
    """
    Evaluate the agent on the given benchmark YAML file.
    """
    agent = SpotifyAgent(client, spotify)
    # with open(benchmark_file, "r") as file:
    #     steps = yaml.safe_load(file)
    for n, step in enumerate(steps):
        responses = agent.say(step["prompt"])
        print('LOOK AT RESPONSES', responses)
        if not isinstance(responses, list): 
            responses = [responses]
        # Ensure that the right number of Functions Stubs were called in a multistep conversation
        if len(responses) != len(step["expected_type"]):
            print('IN HERE?')
            return EvaluationResult(n / len(steps), agent.conversation)
        for response in responses:
            expected_type = step["expected_type"]
            if expected_type == "text":
                if not isinstance(response, TextResponse):
                    return EvaluationResult(n / len(steps), agent.conversation)
            elif expected_type == "get-recommendations":
                if not isinstance(response, GetRecommendationsResponse):
                    return EvaluationResult(n / len(steps), agent.conversation)
            elif expected_type == "add-artist":
                if not isinstance(response, AddArtistResponse):
                    return EvaluationResult(n / len(steps), agent.conversation)
                if response.artist_seeds != step["expected_result"]:
                    return EvaluationResult(n / len(steps), agent.conversation)
                break
            elif expected_type == "add-tracks":
                if not isinstance(response, AddTracksResponse):
                    return EvaluationResult(n / len(steps), agent.conversation)
                if response.track_seeds != step["expected_result"]:
                    return EvaluationResult(n / len(steps), agent.conversation)
            elif expected_type == "add-genres":
                if not isinstance(response, AddGenresResponse):
                    return EvaluationResult(n / len(steps), agent.conversation)
                if response.genre_seeds != step["expected_result"]:
                    return EvaluationResult(n / len(steps), agent.conversation)
            elif expected_type == "modify-mood":
                if not isinstance(response, ModifyMoodResponse):
                    return EvaluationResult(n / len(steps), agent.conversation)
            elif expected_type == "add-to-playlist":
                if not isinstance(response, AddToPlaylistResponse):
                    return EvaluationResult(n / len(steps), agent.conversation)
                # TODO: could multisteps results in an error here?
                if "expected_result" in step and response.tracks != step["expected_result"]:
                    return EvaluationResult(n / len(steps), agent.conversation)
                if "expected_size" in step and len(response.tracks) != step["expected_size"]:
                    return EvaluationResult(n / len(steps), agent.conversation)
    return EvaluationResult(1.0, agent.conversation)   
