In [None]:
import requests
from bs4 import BeautifulSoup
import os
import re
import lyricsgenius
import pandas as pd
from tqdm import tqdm
from openai import OpenAI
from PIL import Image
import requests
import os

In [None]:
from secrets import *
cwd = os.getcwd()
file_name = "data/MacMiller_AD.csv"
file_path = os.path.join(cwd, file_name)

### Preprocess

In [None]:
genius = lyricsgenius.Genius(GENIUS_ACCESS_TOKEN)
genius.verbose = False # Turn off status messages
genius.remove_section_headers = True # Remove section headers (e.g. [Chorus]) from lyrics when searching
genius.skip_non_songs = False # Include hits thought to be non-songs (e.g. track lists)
genius.excluded_terms = ["(Remix)", "(Live)", "Remix"] # Exclude songs with these words in their title

In [None]:
def generate_Mac_AD():
    artists = ['Mac Miller']
    artist_df = pd.DataFrame()
    albums = ['Blue Slide Park', 'Watching Movies with the Sound Off', 'Faces',
             'GO:OD AM', 'The Divine Feminine', 'Swimming', 'Circles',
             'K.I.D.S.', 'Best Day Ever', 'I Love Life, Thank You', 'Macadelic', 'Delusional Thomas',
             'You'] #'Live from Space',

    for album_name in tqdm(albums):

        if album_name not in ['You', 'Delusional Thomas']:
            album = genius.search_album(album_name, artists[0])
        elif album_name == 'You':
            album = genius.search_album(album_name, 'Larry Lovestein & The Velvet Revival')
        elif album_name == 'Delusional Thomas':
            album = genius.search_album(album_name, 'Delusional Thomas')
        assert len(album.tracks) != 0, 'Empty album'

        d = []
        i=0
        for i in range (len(album.tracks)):
            if "remix" in album.tracks[i].song.title.lower():
                break
            else:
                d.append(
                    {
                        'artist': (album.tracks[i].song.artist),
                        'album': (album.name),
                        'release_date' : (album.release_date_components.strftime('%Y-%m-%d')),
                        'track_no' : (album.tracks[i].number),
                        'title': (album.tracks[i].song.title),
                        'lyrics': (album.tracks[i].song.lyrics),
                        'art': (album.cover_art_url),
                        'url': (album.tracks[0].song.url)
                    }
                )

        album_df = pd.DataFrame(d)
        album_df['lyrics'] = album_df['lyrics'].replace(to_replace ='\[.*?\]', value = '', regex = True)
        album_df['lyrics'] = album_df['lyrics'].replace(to_replace =r'^.*?Lyrics', value = '', regex = True)
        album_df['lyrics'] = album_df['lyrics'].replace(to_replace = r'\d{2}Embed$', value = '', regex = True)
        album_df['lyrics'] = album_df['lyrics'].replace(to_replace = r'\d{1}Embed$', value = '', regex = True)
        album_df['track_no'] = album_df['track_no'].astype(int)

        artist_df = pd.concat([artist_df, album_df]).reset_index(drop=True)
        assert artist_df[artist_df['lyrics'].isna()].shape[0] == 0

    artist_df = artist_df[~artist_df['title'].str.contains('live|remix', case=False)].reset_index(drop=True)
    artist_df = artist_df.sort_values(by=['release_date','track_no']).reset_index(drop=True)
    artist_df.to_csv("data/MacMiller_AD.csv", index=False)
    
    return artist_df

### GPT Sentiment Analysis

In [None]:
openai_client = OpenAI(api_key=OPEN_AI_TOKEN,)
cwd = os.getcwd()
file_name = "data/GPT_Results.csv"
file_path = os.path.join(cwd, file_name)

In [None]:
def extract_first_word(theme_list):
    return [theme.split()[0] if ' ' in theme else theme for theme in theme_list]

In [None]:
if not os.path.isfile(file_path):
    df = pd.read_csv("data/MacMiller_AD.csv")
    df['lyrics'] = df['lyrics'].astype(str)
    df['lyrics'] = df['lyrics'].replace('\n', ' ')
    df['score'] = ""
    df['themes'] = ""

    df = df.loc[df['lyrics'] != 'nan'].reset_index(drop=True)
    # Profile of the person who would write these lyrics

    for index, row in tqdm(df.iterrows(), total=len(df), desc="Processing rows"):
        lyrics = row['lyrics']

        completion = openai_client.chat.completions.create(
          model="gpt-3.5-turbo",

          messages=[
            {"role": "system", "content": '''You are skilled at understanding the nuance of lyrics, 
            especially hip-hop. This means understanding metaphor and slang and knowing if something is 
            said positively or negatively. I will send you lyrics from a song. I would like you to respond 
            with a scaling of sentiment from 1 as most negative to 10 as most positive. Followed by your 
            numerical rating, you'll write // and 5 individual words expressing the overall theme of 
            feeling conveyed by the lyrics. Your final output should be a number // 5 comma separated words'''},

            {"role": "user", "content": lyrics}
          ]
        )

        rating = completion.choices[0].message.content.split("//")[0].strip()
        themes = completion.choices[0].message.content.split("//")[1].strip().strip(".").split(",")

        df.at[index, 'score'] = rating
        df.at[index, 'themes'] = themes

    df.loc[df['score'] == 'Sentiment: 7', 'score'] = '7'
    df['score'] = df['score'].astype(float)

    for index, row in tqdm(df.iterrows(), total=len(df), desc="Processing rows"):
        sublist = row['themes']

        for i in range(len(sublist)):
            sublist[i] = ''.join(char for char in sublist[i] if not char.isdigit())
            sublist[i] = sublist[i].strip()

        row['themes'] = sublist

    # Apply the function to the 'themes' column
    df['themes'] = df['themes'].apply(extract_first_word)

    df.to_csv("data/GPT_Results.csv", index = False)

### Post Processing

In [None]:
df = pd.read_csv("data/GPT_Results.csv")
albums = df[['album', 'score', 'themes']].reset_index(drop=True)
album_score = albums.groupby(['album']).agg(score = ("score","mean")).reset_index()

for index, row in albums.iterrows():
    x = albums.iloc[index, 2]
    x = x[1:-1]
    x = x.replace('\'', '')
    albums.iloc[index, 2] = x
    
album_theme = albums.groupby(['album'])['themes'].apply(','.join).reset_index()

albums = album_score.merge(album_theme, on=['album'])
album_info = df[['album','release_date','art']].drop_duplicates().reset_index(drop=True)
albums = albums.merge(album_info, on=['album'])
albums.to_csv("data/MM_Albums_sentiment.csv", index=False)

In [None]:
df = pd.read_csv("data/GPT_Results.csv")
for index, row in df.iterrows():
    x = df.iloc[index, 9]
    x = x[1:-1]
    x = x.replace('\'', '')
    df.iloc[index, 9] = x

df.to_csv("data/MM_AllSongs_sentiment.csv", index=False)

In [None]:
albums['persona'] = ''
albums['ai_image'] = ''

In [None]:
file_name = "data/MM_Albums_sentiment_with_DallE.csv"
file_path = os.path.join(cwd, file_name)

In [None]:
if not os.path.isfile(file_path):
    for index, row in tqdm(albums.iterrows(), total=len(albums), desc="Processing rows"):
        if row['persona'] == '':
            themes = row['themes']
            album_name = row['album']

            any_image_prompt = 'generate an image using the following list of thematic words. Try to encapsulate the overall concept in a picture: '
            person_prompt = 'I will share a group of thematic words. Generate the image of a person that fits the overall theme of the group of words: '

            response = openai_client.images.generate(
              model="dall-e-3",
              prompt=f"{person_prompt} {themes}",
              size="1024x1024",
              quality="standard",
              n=1,
            )

            image_url = response.data[0].url

            albums.at[index, 'persona'] = image_url

            im = Image.open(requests.get(image_url, stream=True).raw)
            im = im.save(f"data/persona_img/{album_name}_Persona.jpg")

In [None]:
if not os.path.isfile(file_path):
    for index, row in tqdm(albums.iterrows(), total=len(albums), desc="Processing rows"):
        if row['ai_image'] == '':
            themes = row['themes']
            album_name = row['album']

            any_image_prompt = 'generate an image using the following list of thematic words. Try to encapsulate the overall concept in a picture: '
            person_prompt = 'I will share a group of thematic words. Generate the image of a person that fits the overall theme of the group of words: '

            response = openai_client.images.generate(
              model="dall-e-3",
              prompt=f"{any_image_prompt} {themes}",
              size="1024x1024",
              quality="standard",
              n=1,
            )

            image_url = response.data[0].url

            albums.at[index, 'ai_image'] = image_url

            im = Image.open(requests.get(image_url, stream=True).raw)
            im = im.save(f"data/ai_img/{album_name}_Img.jpg")    

In [None]:
if not os.path.isfile(file_path):
    albums.to_csv("data/MM_Albums_sentiment_with_DallE.csv", index=False)

In [None]:
df = pd.read_csv("data/MM_Albums_sentiment_with_DallE.csv")

In [None]:
if 1+1 == 3:
    df['album_theme'] = ''
    for index, row in tqdm(df.iterrows(), total=len(df), desc="Processing rows"):
        lyrics = row['themes']

        completion = openai_client.chat.completions.create(
          model="gpt-3.5-turbo",

          messages=[
            {"role": "system", "content": '''I will pass you a list of words, each of which represents the theme 
            of a body of art. I want you to consolidate the list into 5 words. Your output should be solely 5 words 
            separated by commas.'''},

            {"role": "user", "content": lyrics}
          ]
        )

        album_theme = completion.choices[0].message.content
        df.at[index, 'album_theme'] = album_theme

In [None]:
if 1+1 == 3:
    df['album_summary'] = ''
    for index, row in tqdm(df.iterrows(), total=len(df), desc="Processing rows"):
        lyrics = row['themes']

        completion = openai_client.chat.completions.create(
          model="gpt-3.5-turbo",

          messages=[
            {"role": "system", "content": '''I will pass you a list of words, each of which represents the theme 
            of an album. Respond with "The ideal place to listen to this record would be: " and then share 
            the best place to listen to that album. Limit responses to one sentence.'''},

            {"role": "user", "content": lyrics}
          ]
        )

        album_summary = completion.choices[0].message.content
        df.at[index, 'album_summary'] = album_summary

In [None]:
if 1+1 == 3:
    df.to_csv("data/MM_Albums_sentiment_with_DallE.csv", index=False)

In [None]:
df = pd.read_csv("data/MM_AllSongs_sentiment.csv")

In [None]:
if 1+1 == 3:
    df['death_counter'] = ''
    for index, row in tqdm(df.iterrows(), total=len(df), desc="Processing rows"):
        lyrics = row['lyrics']

        completion = openai_client.chat.completions.create(
          model="gpt-3.5-turbo",

          messages=[
            {"role": "system", "content": '''I will pass you song lyrics. I want you to count the number 
            of times that death or death-adjacent themes are mentioned. This should include suicide, homicide, 
            heaven, hell, etc. Pay attention for slang that may indicate death. You should only return a number, no other text.'''},

            {"role": "user", "content": lyrics}
          ]
        )

        death_count = completion.choices[0].message.content
        df.at[index, 'death_counter'] = death_count

In [None]:
if 1+1 == 3:
    for index, row in tqdm(df.iterrows(), total=len(df), desc="Processing rows"):
        text = row['death_counter']
        new_text = re.sub('\D', '', text)
        df.at[index, 'death_counter'] = new_text

    df['death_counter'] = df['death_counter'].astype(int)
    album_death_counter = df.groupby(['album'])['death_counter'].sum().reset_index()
    album_death_counter.to_csv("data/album_death_counter.csv", index=False)

In [None]:
##########

### Appendix

In [None]:
# import pandas as pd
# import os
# from nltk.corpus import stopwords
# from textblob import TextBlob
# from textblob import Word
# from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
# pd.options.display.max_colwidth = 17

# def vader_analysis():
#     '''
#     '''
#     analyzer = SentimentIntensityAnalyzer()

#     artist_df = pd.read_csv("data/MacMiller_AD.csv")
#     mac = artist_df.copy()
#     mac['lyrics'] = mac['lyrics'].astype(str)
#     mac['lyrics'] = mac['lyrics'].replace('\n', ' ')

#     # 2) Basic Pre-Processing
#     mac['polarity'] = mac['lyrics'].apply(lambda x: TextBlob(x).sentiment.polarity*100)
#     mac['polarity'] = mac['polarity'].round(3)

#     mac['subjectivity'] = mac['lyrics'].apply(lambda x: TextBlob(x).sentiment.subjectivity*100)
#     mac['subjectivity'] = mac['subjectivity'].round(3)

#     mac['Positive_Score'] = mac['lyrics'].apply(lambda x: analyzer.polarity_scores(x)['pos']*100)
#     mac['Negative_Score'] = mac['lyrics'].apply(lambda x: analyzer.polarity_scores(x)['neg']*100)
#     mac['Neutral_Score'] = mac['lyrics'].apply(lambda x: analyzer.polarity_scores(x)['neu']*100)
#     mac['Compound_Score'] = mac['lyrics'].apply(lambda x: analyzer.polarity_scores(x)['compound']*100)
#     mac = mac.sort_values('Compound_Score', ascending=False)

#     # Calculate album-level metrics
#     album_metrics = mac.groupby(['album']).agg({'Positive_Score': 'mean', 
#                                                 'Negative_Score': 'mean', 
#                                                 'Neutral_Score': 'mean', 
#                                                 'Compound_Score': 'mean'})

#     # Calculate overall average
#     overall_average = mac[['Positive_Score', 'Negative_Score', 'Neutral_Score', 'Compound_Score']].mean()

#     # Calculate relative difference
#     relative_difference = (album_metrics - overall_average) / overall_average

#     compound_sort = album_metrics.sort_values(by=['Compound_Score'], ascending = False)
#     positive_sort = album_metrics.sort_values(by=['Positive_Score'], ascending = False)
#     negative_sort = album_metrics.sort_values(by=['Negative_Score'], ascending = False)
#     neutral_sort = album_metrics.sort_values(by=['Neutral_Score'], ascending = False)

#     album_metrics['Variability'] = abs(album_metrics['Positive_Score'] - album_metrics['Negative_Score'])

#     mac.to_csv(f"data/MM_AllSongs_sentiment.csv", index = False)
#     album_metrics = album_metrics.reset_index()
#     album_metrics.to_csv(f"data/MM_Albums_sentiment.csv", index = False)

#     return album_metrics