In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
#Plotting sentiment trends for the 4 countries

# Load data from Excel files
usa_df = pd.read_excel("USA_songs_with_sentiment (2).xlsx")
canada_df = pd.read_excel("songs_with_sentiment _canada(3).xlsx")
uk_df = pd.read_excel("songs_with_sentiment_UK(2).xlsx")
australia_df = pd.read_excel("songs_with_sentiment_australia (2).xlsx")

# Add a country column to each DataFrame
usa_df["Country"] = "USA"
canada_df["Country"] = "Canada"
uk_df["Country"] = "UK"
australia_df["Country"] = "Australia"

# Combine all data
combined_df = pd.concat([usa_df, canada_df, uk_df, australia_df], ignore_index=True)

# Convert Date column to datetime format
combined_df["Date"] = pd.to_datetime(combined_df["Date"])

# Extract Month and Year for grouping
combined_df["Month"] = combined_df["Date"].dt.to_period("M")

# Your classification function for sentiment
def classify_sentiment(score):
    if score <= 0.4:
        return "Negative"
    elif 0.4 < score <= 0.6:
        return "Neutral"
    else:
        return "Positive"

# Apply the sentiment classification
combined_df['Sentiment_Category'] = combined_df['Score'].apply(classify_sentiment)

# Plotting sentiment trends
sentiment_trend = combined_df.groupby(['Month', 'Sentiment_Category']).size().unstack().fillna(0)
sentiment_trend.plot(kind='line', figsize=(12, 6))
plt.title('Sentiment Trends Over Time')
plt.xlabel('Month')
plt.ylabel(Total Number of Songs)
plt.legend(title='Sentiment')
plt.show()

In [None]:
# Define a function to calculate monthly averages
def calculate_monthly_avg(df, date_col, score_col):
    df[date_col] = pd.to_datetime(df[date_col])  # Ensure date column is in datetime format
    df['Month'] = df[date_col].dt.to_period('M')  # Create a "Month" column (Year-Month)
    monthly_avg = df.groupby('Month')[score_col].mean().reset_index()  # Calculate average score by month
    monthly_avg.rename(columns={score_col: 'Monthly_Avg_Score'}, inplace=True)
    df = df.merge(monthly_avg, on='Month', how='left')  # Merge the averages back to the original dataframe
    return df

# Apply the function to each dataset
usa_df = calculate_monthly_avg(usa_df, 'Date', 'Score')
canada_df = calculate_monthly_avg(canada_df, 'Date', 'Score')
uk_df = calculate_monthly_avg(uk_df, 'Date', 'Score')
australia_df = calculate_monthly_avg(australia_df, 'Date', 'Score')

# Save the updated datasets if needed
usa_df.to_excel("USA_songs_with_monthly_avg.xlsx", index=False)
canada_df.to_excel("Canada_songs_with_monthly_avg.xlsx", index=False)
uk_df.to_excel("UK_songs_with_monthly_avg.xlsx", index=False)
australia_df.to_excel("Australia_songs_with_monthly_avg.xlsx", index=False)

In [None]:
import matplotlib.pyplot as plt

# Ploting Proportion of Monthly Average Scores Over Time for the 4 countries
# Prepare the data

usa_df['Proportion'] = usa_df['Monthly_Avg_Score'] / usa_df['Monthly_Avg_Score'].sum()
canada_df['Proportion'] = canada_df['Monthly_Avg_Score'] / canada_df['Monthly_Avg_Score'].sum()
uk_df['Proportion'] = uk_df['Monthly_Avg_Score'] / uk_df['Monthly_Avg_Score'].sum()
australia_df['Proportion'] = australia_df['Monthly_Avg_Score'] / australia_df['Monthly_Avg_Score'].sum()

# Combine the data into a single DataFrame for easier plotting
usa_df['Country'] = 'USA'
canada_df['Country'] = 'Canada'
uk_df['Country'] = 'UK'
australia_df['Country'] = 'Australia'

combined_df = pd.concat([usa_df, canada_df, uk_df, australia_df])

# Convert the "Month" column back to datetime for plotting
combined_df['Month'] = combined_df['Month'].dt.to_timestamp()

# Plot the data
plt.figure(figsize=(12, 6))
for country in combined_df['Country'].unique():
    subset = combined_df[combined_df['Country'] == country]
    plt.plot(subset['Month'], subset['Proportion'], label=country)

# Customize the plot
plt.title('Proportion of Monthly Average Scores Over Time', fontsize=16)
plt.xlabel('Date', fontsize=14)
plt.ylabel('Proportion', fontsize=14)
plt.legend(title='Country', fontsize=12)
plt.grid(visible=True, linestyle='--', alpha=0.7)
plt.tight_layout()

# Show the plot
plt.show()

In [None]:
#Wordcloud for each country in its peak month 


import pandas as pd
import matplotlib.pyplot as plt
from wordcloud import WordCloud, STOPWORDS

# Load each country data
usa_df = pd.read_excel("USA_songs_with_sentiment (2).xlsx")
canada_df = pd.read_excel("songs_with_sentiment _canada(3).xlsx")
uk_df = pd.read_excel("songs_with_sentiment_UK(2).xlsx")
australia_df = pd.read_excel("songs_with_sentiment_australia (2).xlsx")

# Define a function to filter data by month and year, and generate a word cloud
def generate_wordcloud(df, year, month, country_name):
    # Filter data for the specified year and month
    df['Date'] = pd.to_datetime(df['Date'])
    filtered_df = df[(df['Date'].dt.year == year) & (df['Date'].dt.month == month)]

    # Combine all lyrics into a single text
    all_lyrics = " ".join(filtered_df['Lyrics'].dropna())

    # Add additional stopwords 
    custom_stopwords = set(STOPWORDS).union({"chorus", "verse", "repeat", "oh", "yeah", "na" })

    # Generate a word cloud
    wordcloud = WordCloud(width=800, height=400, background_color='white', stopwords=custom_stopwords).generate(all_lyrics)

    # Plot the word cloud
    plt.figure(figsize=(10, 5))
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis('off')
    plt.title(f'Word Cloud for {country_name} - {month}/{year}', fontsize=16)
    plt.show()

# Generate word clouds for the specified countries and months
generate_wordcloud(australia_df, 2020, 11, "Australia")
generate_wordcloud(canada_df, 2020, 11, "Canada")
generate_wordcloud(uk_df, 2020, 8, "UK (August)")
generate_wordcloud(uk_df, 2020, 10, "UK (October)")
generate_wordcloud(usa_df, 2020, 12, "USA")