In [None]:

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import missingno as msno

df = pd.read_csv("data/raw/the_grammy_awards.csv")
df.shape, df.columns, df.dtypes


In [None]:

df.info()
df.describe(include='all')
df.isnull().sum()
msno.matrix(df)
plt.show()


In [None]:

df['published_at'] = pd.to_datetime(df['published_at'])
df['updated_at'] = pd.to_datetime(df['updated_at'])
df['year'] = df['year'].astype(str)
df.dropna(subset=['nominee', 'artist'], inplace=True)
df.fillna('', inplace=True)


In [None]:

top_categories = df['category'].value_counts().head(10)
plt.figure(figsize=(10,6))
sns.barplot(x=top_categories.values, y=top_categories.index)
plt.title("Top 10 Most Frequent Categories")
plt.xlabel("Count")
plt.ylabel("Category")
plt.tight_layout()
plt.show()


In [None]:

nominees_per_year = df.groupby('year')['nominee'].count().reset_index()
px.line(nominees_per_year, x='year', y='nominee', title="Number of Nominees per Year")


In [None]:

top_artists = df['artist'].value_counts().head(10)
plt.figure(figsize=(10,6))
sns.barplot(x=top_artists.values, y=top_artists.index)
plt.title("Top 10 Most Nominated Artists")
plt.xlabel("Count")
plt.ylabel("Artist")
plt.tight_layout()
plt.show()


In [None]:

winner_counts = df['winner'].value_counts()
labels = ['No', 'Yes']
plt.pie(winner_counts, labels=labels, autopct='%1.1f%%', startangle=140)
plt.axis('equal')
plt.title("Winners Distribution")
plt.show()


In [None]:

artist_wins = df[df['winner'] == True]['artist'].value_counts().head(10)
plt.figure(figsize=(10,6))
sns.barplot(x=artist_wins.values, y=artist_wins.index)
plt.title("Top 10 Winning Artists")
plt.xlabel("Number of Wins")
plt.ylabel("Artist")
plt.tight_layout()
plt.show()


In [None]:

wins_per_year = df[df['winner'] == True].groupby('year').size().reset_index(name='wins')
fig = px.bar(wins_per_year, x='year', y='wins', title='Total Wins by Year')
fig.show()


In [None]:

# This cell assumes future integration with sentiment dataset via track_id or title
# Example: merged = df.merge(sentiment_df, on='track_id')
