# Generate SongSnap summaries using ChatGPT API

In [1]:
!pip install openai
!pip install python-dotenv

Collecting openai
  Downloading openai-1.23.2-py3-none-any.whl (311 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m311.2/311.2 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
Collecting httpx<1,>=0.23.0 (from openai)
  Downloading httpx-0.27.0-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai)
  Downloading httpcore-1.0.5-py3-none-any.whl (77 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.9/77.9 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai)
  Downloading h11-0.14.0-py3-none-any.whl (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.3/58.3 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: h11, httpcore, httpx, openai
Successfully installed h11-0.14.0 httpcore-1.0.5 ht

In [2]:
# import os
# from dotenv import load_dotenv
from openai import OpenAI

# # Load environment variables from .env file
# # OPENAI_API_KEY=__
# load_dotenv()
from google.colab import userdata
api_key = userdata.get('OPENAI_API_KEY')

# # Get API key from environment variable
# api_key = os.getenv("OPENAI_API_KEY")
if api_key is None:
    raise ValueError("OPENAI_API_KEY environment variable is not set")

client = OpenAI(api_key=api_key)

In [3]:
def generate_song_summary(song_info):
    prompt = f"""Generate a fun and creative summary of the song {song_info['song_name']}. \\
    This song is {'popular' if song_info['popularity'] else 'not popular'}. \\
    The sentiment of the song is {song_info['sentiment']}. \\
    The genre of the song is {song_info['genre']}
    The energy rating (between 0 and 1) of the song is {song_info['energy']}.\\
    The danceability rating (between 0 and 1) of the song is {song_info['danceability']}.\\
    Most importantly, state whether this song is popular or not. \\
    Keep the summary to 50 words."""

    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {
                "role": "system",
                "content": "You are a helpful music assistant that provides information to users about songs."
            },
            {
                "role": "user",
                "content": prompt
            },
            {
                "role": "assistant",
                "content": f"Sample data:\nsong_name = \"{song_info['song_name']}\"\nsong_popularity = {song_info['popularity']}\nsentiment = \"{song_info['sentiment']}\"\nsong_genre = {song_info['genre']}\nenergy = {song_info['energy']}\ndanceability = {song_info['danceability']}"
            }
        ],
        temperature=1,
        max_tokens=256,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0
    )
    return response

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
import pandas as pd
import numpy as np

In [23]:
path_to_output = "/content/drive/MyDrive/AppliedCV_Spring2024/output_w_sentiment.csv"

output_df = pd.read_csv(path_to_output)

In [20]:
# need to convert danceability and energy to float
output_df['danceability_output'].iloc[0]

'[0.63750064]'

In [24]:
get_float = lambda x: float(x.strip('[]'))
output_df['danceability_output'] = output_df['danceability_output'].apply(get_float)
output_df['energy_output'] = output_df['energy_output'].apply(get_float)

In [26]:
output_df.rename(columns={'song_name': 'song_name',
                          'genre_output': 'genre',
                          'popularity_output': 'popularity',
                          'danceability_output': 'danceability',
                          'energy_output': 'energy',
                          'sentiment': 'sentiment'}, inplace=True)

In [27]:
output_df

Unnamed: 0,song_name,genre,popularity,danceability,energy,sentiment
0,WhatTheWaterGaveMe-Florence+TheMachine,3,0,0.637501,0.611000,sadness
1,thinkingofyou-mommy,3,0,0.527886,0.354076,joy
2,Cantando-VicenteFernández,4,0,0.640459,0.617881,joy
3,Blackbird-LordMelody,3,0,0.706858,0.301884,sadness
4,Toss-Up-N2Deep,3,0,0.742125,0.723912,joy
...,...,...,...,...,...,...
987,BulletWithButterflyWings-TheSmashingPumpkins,4,0,0.643063,0.756215,joy
988,HappyWithYou-SamanthaCole,3,0,0.726647,0.649824,joy
989,IWantCandy-BowWowWow,4,0,0.653321,0.738749,joy
990,SomethingInMyHouse-DeadOrAlive,4,0,0.735446,0.833525,fear


In [38]:
songsnaps = [""]*len(output_df)

for i in range(len(output_df)):
    song = output_df.iloc[i]
    songsnaps[i] = generate_song_summary(song).choices[0].message.content

In [39]:
output_df["songsnap"] = songsnaps

In [40]:
import gzip

# save full output dataframe as gzip file
filepath = '/content/drive/MyDrive/AppliedCV_Spring2024/output_w_songsnaps.gz'
with gzip.open(filepath, 'wb') as f:
   output_df.to_csv(f, index=False)

In [None]:
# output_df.to_csv('/content/drive/MyDrive/AppliedCV_Spring2024/output_w_songsnaps.csv', index=False)

In [None]:
for idx, response in enumerate(songsnaps):
    print(f"Summary for {output_df.iloc[idx]['song_name']}:")
    print(response.choices[0].message.content)
    print("")

In [None]:
# songs = [
#     {"song_name": "Bohemian Rhapsody",
#      "song_popularity": True,
#      "sentiment": "happy",
#      "energy": 0.8,
#      "danceability": 0.5
#     },
#     {"song_name": "Imagine",
#      "song_popularity": False,
#      "sentiment": "sad",
#      "energy": 0.2,
#      "danceability": 0.3
#     },
#     {"song_name": "Thriller",
#      "song_popularity": True,
#      "sentiment": "angry",
#      "energy": 0.9,
#      "danceability": 0.95
#     },
# ]