In [None]:
#this file shows our code for one countr "Canada" which was duplicated for the other 4 countries

In [None]:
import pandas as pd
df = pd.read_excel("matchedcanada.xls")
print(df.head())

In [None]:
df["Lyrics"] = df["Lyrics"].str.strip()

df = df.dropna(subset=["Lyrics"])

df["Lyrics"] = df["Lyrics"].apply(lambda x: x[:512] if len(x) > 512 else x)

df["Date"] = pd.to_datetime(df["Date"])

In [None]:
from transformers import pipeline

sentiment_analyzer = pipeline("sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment")

# Define a function to categorize sentiment scores
def categorize_sentiment(score):
    if score <= 0.4:
        return "Negative"
    elif 0.4 < score <= 0.6:
        return "Neutral"
    else:
        return "Positive"

def analyze_sentiment(lyrics):
    try:
        result = sentiment_analyzer(lyrics)
        sentiment_label = result[0]["label"]
        sentiment_score = result[0]["score"]
        category = categorize_sentiment(sentiment_score)
        return category, sentiment_score
    except Exception as e:
        return None, None

# Apply the function to the DataFrame
df["Sentiment"], df["Score"] = zip(*df["Lyrics"].apply(analyze_sentiment))

# Print the first few rows of the DataFrame
print(df[["Date", "Rank", "Song", "Artist", "Sentiment", "Score"]].head())

In [None]:
#Plotting yearly Sentiment trend 
df["Year"] = df["Date"].dt.year

sentiment_trends = df.groupby("Year")["Sentiment"].value_counts(normalize=True).unstack()

import matplotlib.pyplot as plt

sentiment_trends.plot(kind="bar", stacked=True, figsize=(12, 6), colormap="coolwarm")
plt.title("Sentiment Trends in Top Songs_Canada (2019-2022)")
plt.xlabel("Year")
plt.ylabel("Proportion of Songs")
plt.legend(title="Sentiment")
plt.show()

In [None]:
#Plotting monthly Sentiment trend 
df["Month"] = df["Date"].dt.to_period("M")

sentiment_trends = df.groupby("Month")["Sentiment"].value_counts(normalize=True).unstack()

import matplotlib.pyplot as plt

sentiment_trends.plot(kind="bar", stacked=True, figsize=(12, 6), colormap="coolwarm")
plt.title("Sentiment Trends in Top Songs in Canada by month (2019-2022)")
plt.xlabel("Month")
plt.ylabel("Proportion of Songs")
plt.legend(title="Sentiment")
plt.show()

In [None]:
#Plotting a radar chart

import numpy as np
import matplotlib.pyplot as plt

sentiment_values = sentiment_trends.mean()  

# Calculate the number of categories (sentiments)
categories = sentiment_values.index
N = len(categories)

# Compute angles for the radar chart
angles = np.linspace(0, 2 * np.pi, N, endpoint=False).tolist()

# Make the plot
fig, ax = plt.subplots(figsize=(8, 6), subplot_kw=dict(polar=True))
ax.fill(angles, sentiment_values, color="b", alpha=0.25)  # Fill under the curve
ax.plot(angles, sentiment_values, color="b", linewidth=2)  # Plot the line

# Labels for each sentiment
ax.set_yticklabels([])
ax.set_xticks(angles)
ax.set_xticklabels(categories)

plt.title("Radar Chart: Sentiment Proportions Over Time_Canada (2019-2022)", fontsize=14, y=1.1)
plt.show()

In [None]:
#Plotting a radar chart with bars and line overlay 


sentiment_values = sentiment_trends.mean()  

# Calculate the number of categories (sentiments)
categories = sentiment_values.index
N = len(categories)

# Compute angles for the radar chart
angles = np.linspace(0, 2 * np.pi, N, endpoint=False).tolist()

# Set up the plot
fig, ax = plt.subplots(figsize=(8, 6), subplot_kw=dict(polar=True))

# Plot the bars
ax.bar(angles, sentiment_values, color='b', alpha=0.25, linewidth=2, edgecolor='black', zorder=1)

# Plot the line overlay
ax.plot(angles, sentiment_values, color='b', linewidth=2, linestyle='-', zorder=2)

# Labels for each sentiment
ax.set_yticklabels([])
ax.set_xticks(angles)
ax.set_xticklabels(categories)

# Title
plt.title("Radar Chart with Bars and Line Overlay (2019-2022)", fontsize=14, y=1.1)
plt.show()

In [None]:
#Saving new excel files 
df.to_excel("songs_with_sentiment.xlsx", index=False)
from google.colab import files
files.download("songs_with_sentiment_Canada.xlsx")