In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from colorama import Fore, init
import plotly.express as px
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import string
import re
from nltk.stem import PorterStemmer
from nltk.sentiment import SentimentIntensityAnalyzer
from nltk import tokenize
from nltk.tokenize import sent_tokenize
from nltk.tokenize import word_tokenize
from tqdm.notebook import tqdm
from collections import Counter

# download nltk corpus (first time only)
#nltk.download('all')


# Evermore

In [None]:
df = pd.read_csv('/Users/kaylaanderson/CodingProjects/album_sentiment_analysis/taylor_swift_lyrics_dataset/09-evermore_deluxe_version.csv')
df.head()

In [None]:
def null_count():
    return pd.DataFrame({'features': df.columns,
                'dtypes': df.dtypes.values,
                'NaN count': df.isnull().sum().values,
                'NaN percentage': df.isnull().sum().values/df.shape[0]}).style.background_gradient(cmap='swift.evermore',low=0.1,high=0.01)
null_count()

In [None]:
df.columns

In [None]:
for column in df.columns:
    num_distinct_values = len(df[column].unique())
    print(f"{column}: {num_distinct_values} distinct values")

In [None]:
df['track_title'].value_counts()

In [None]:
analyzer = SentimentIntensityAnalyzer()

df['Vader_Score'] = df['lyric'].apply(lambda text: analyzer.polarity_scores(text)['compound'])

df['Sentiment'] = df['Vader_Score'].apply(lambda score: 'positive' if score >= 0.05 else ('negative' if score <= -0.05 else 'neutral'))

print(df[['lyric', 'Vader_Score', 'Sentiment']].head())

In [None]:

colors = ['#cc621b', '#c2dfff', '#38a3a5']

explode = (0.1, 0, 0)  

sentiment_counts = df.groupby("Sentiment").size()

fig, ax = plt.subplots()

wedges, texts, autotexts = ax.pie(
    x=sentiment_counts, 
    labels=sentiment_counts.index,
    autopct=lambda p: f'{p:.2f}%\n({int(p*sum(sentiment_counts)/100)})', 
    wedgeprops=dict(width=0.7),
    textprops=dict(size=10, color="b"),  
    pctdistance=0.7,
    colors=colors,
    explode=explode,
    shadow=True)

center_circle = plt.Circle((0, 0), 0.6, color='white', fc='white', linewidth=1.25)
fig.gca().add_artist(center_circle)

ax.text(0, 0, 'Sentiment\nDistribution', ha='center', va='center', fontsize=14, fontweight='bold', color='#333333')

ax.legend(sentiment_counts.index, title="Sentiment", loc="center left", bbox_to_anchor=(1, 0, 0.5, 1))

ax.axis('equal')  

plt.show()

In [None]:
from swiftascmaps import red
from matplotlib.pyplot import imshow
from numpy import random

In [None]:
plt.figure(figsize=(12, 6))
sns.countplot(x='track_title', hue='Sentiment', data=df, palette='swift.evermore')
plt.title('Evermore: Relationship between Track Title and Sentiment')
plt.xlabel('Track Title')
plt.ylabel('Count')
plt.xticks(rotation=90)
plt.show()