In [8]:
import pandas as pd
df = pd.read_csv("prep_songs.csv")

In [119]:
df.columns

Index(['track_name', 'track_artist', 'track_popularity', 'track_album_name',
       'track_album_release_date', 'playlist_name', 'playlist_genre',
       'playlist_subgenre', 'danceability', 'energy', 'key', 'loudness',
       'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness',
       'valence', 'tempo', 'duration_ms', 'duration_min'],
      dtype='object')

In [130]:
df['key'].value_counts()


1     4010
0     3454
7     3352
9     3027
11    2994
2     2827
5     2680
6     2668
8     2430
10    2272
4     2201
3      913
Name: key, dtype: int64

In [126]:
int(df.loc[0, ['mode']]) == 1

True

In [27]:
album_series = df.value_counts(subset=['track_artist', 'track_album_name'])
album_counts = album_series.to_frame(name='n_songs')

album_counts.loc[album_counts['n_songs'] > 1]

Unnamed: 0_level_0,Unnamed: 1_level_0,n_songs
track_artist,track_album_name,Unnamed: 2_level_1
Ballin Entertainment,Ultimate Freestyle Mega Mix,42
Miguel Rios,Rock & Rios (Remastered),29
Bad Bunny,X 100PRE,28
Anderson .Paak,Malibu,28
Guns N' Roses,Appetite For Destruction,28
...,...,...
Big Moe,Moe Life,2
Da Brat,Funkdafied,2
Daddy Yankee,King Daddy,2
21 Savage,Without Warning,2


## First Viz --> Song Releases over time per artist

In [98]:
import altair as alt
import pandas as pd

# Assuming 'visualization_data' is your DataFrame containing the artist's songs and release dates
df['track_album_release_date'] = pd.to_datetime(df['track_album_release_date'])
artists_of_interest = ['The Chainsmokers', 'David Guetta', 'Amy Winehouse']  # Example list
filtered_df = df[df['track_artist'].isin(artists_of_interest)]
# filtered_df = filtered_df[~filtered_df['track_name'].str.contains('remix', case=False)]
filtered_df = filtered_df[filtered_df['track_name'].str.contains('remix', case=False)]


# Create a marker for each song release
chart = alt.Chart(filtered_df).mark_circle(size=60).encode(
    x=alt.X('track_album_release_date:T', title='Release Date'),
    y=alt.Y('track_artist:N', title='Artist'),
    color=alt.Color('track_popularity:Q', scale=alt.Scale(scheme='yellowgreenblue'), title='Track Popularity'),  # Using the artist's name as a constant value on the y-axis
    tooltip=[alt.Tooltip('track_name:N', title='Track Name'),
             alt.Tooltip('track_album_name:N', title='Album Name'),
             alt.Tooltip('track_album_release_date:T', title='Release Date'),
             alt.Tooltip('track_popularity:Q', title='Popularity')]
).properties(
    title=f"Song Release Timeline",
    width=800,
    height=100  # Adjust height as needed, considering it's a timeline view
).interactive()

# Display the chart
chart.display()


In [123]:
import plotly.express as px

# Assuming 'filtered_df' is your DataFrame ready for plotting
# Ensure 'track_album_release_date' is a datetime column and 'track_popularity' is numeric
filtered_df['track_album_release_date'] = pd.to_datetime(filtered_df['track_album_release_date'])
filtered_df['track_popularity'] = filtered_df['track_popularity'].astype(float)

# Creating the Plotly visualization
fig = px.scatter(
    filtered_df,
    x='track_album_release_date',
    y='track_artist',
    color='track_popularity',
    color_continuous_scale='deep',  # This matches the Altair's color scheme
    size_max=60,
    hover_name='track_name',
    hover_data={
        'track_album_name': True,
        'track_album_release_date': True,
        'track_popularity': True,
        # Since y-axis is categorical (artist names), it doesn't make sense to have it in the tooltip
        'track_artist': False
    },
    title="Song Release Timeline",
    height=500  # Adjusting height for readability
)

# Update layout for a better match with the Altair chart's appearance
fig.update_layout(
    yaxis_categoryorder='total ascending',  # Sort artists based on the number of songs
    xaxis_title='Release Date',
    yaxis_title='Artist',
    coloraxis_colorbar=dict(title='Track Popularity')
)

# Show the figure
fig.show()


## Second Viz --> Song characteristics

In [118]:
import plotly.graph_objects as go

# Sample data: Characteristics of three songs
categories = ['Danceability', 'Energy', 'Valence', 'Acousticness', 'Popularity']

# Values for each song
song_a = [0.8, 0.7, 0.9, 0.2, 0.85]
song_b = [0.6, 0.9, 0.5, 0.4, 0.75]
song_c = [0.7, 0.6, 0.8, 0.1, 0.65]

songs = [song_a, song_b, song_c]

# Create Radar Chart
fig = go.Figure()
for song in songs:
  fig.add_trace(go.Scatterpolar(
      r=song,
      theta=categories,
      fill='toself',
      name='Song A'
  ))

# Update the layout
fig.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=True,
      range=[0, 1]
    )),
  showlegend=True,
  title=dict(
    text="Comparison of Song Characteristics",  # Title text
    font=dict(
      size=20,  # Optional: Adjust the font size
      color="RebeccaPurple"  # Optional: Adjust the font color
    ),
    x=0.5,  # Position the title in the center
    xanchor='center'  # Ensure the title's center is at x=0.5
  )
)

# Show the figure
fig.show()


In [132]:
song_features = ['danceability', 'energy', 'loudness', 'speechiness']

df[song_features].values.tolist()

[[0.748, 0.916, -2.634, 0.0583],
 [0.726, 0.815, -4.969, 0.0373],
 [0.675, 0.931, -3.432, 0.0742],
 [0.718, 0.93, -3.778, 0.102],
 [0.65, 0.833, -4.672, 0.0359],
 [0.675, 0.919, -5.385, 0.127],
 [0.449, 0.856, -4.788, 0.0623],
 [0.542, 0.903, -2.419, 0.0434],
 [0.594, 0.935, -3.562, 0.0565],
 [0.642, 0.818, -4.552, 0.032],
 [0.679, 0.923, -6.5, 0.181],
 [0.437, 0.774, -4.918, 0.0554],
 [0.744, 0.726, -4.675, 0.0463],
 [0.572, 0.915, -4.451, 0.0625],
 [0.69, 0.78, -4.446, 0.0594],
 [0.805, 0.835, -4.603, 0.0896],
 [0.694, 0.901, -4.322, 0.0948],
 [0.678, 0.747, -5.289, 0.165],
 [0.746, 0.557, -6.722, 0.0542],
 [0.467, 0.821, -5.466, 0.0934],
 [0.572, 0.934, -5.053, 0.0446],
 [0.708, 0.913, -4.329, 0.0348],
 [0.684, 0.818, -4.477, 0.128],
 [0.732, 0.85, -5.999, 0.0444],
 [0.62, 0.889, -3.785, 0.0352],
 [0.675, 0.76, -7.037, 0.0434],
 [0.682, 0.851, -7.117, 0.0363],
 [0.663, 0.905, -3.781, 0.0511],
 [0.625, 0.938, -3.983, 0.0517],
 [0.679, 0.909, -2.929, 0.105],
 [0.641, 0.869, -4.754, 0.

## Third Viz - Bar Chart Race

In [163]:
import pandas as pd
import plotly.express as px

# Selecting the top N artists based on the number of tracks

top_artists = ['Ariana Grande', 'Ed Sheeran', 'The Chainsmokers', '2Pac']
# Filtering the dataset for only the top N artists
top_artists_data = df[df['track_artist'].isin(top_artists)]

# Aggregating the number of tracks per playlist for the top N artists
playlist_distribution = top_artists_data.groupby(['playlist_name', 'track_artist']).size().reset_index(name='track_count')

# Trimming playlist names to a fixed length (e.g., 15 characters)
playlist_distribution['playlist_name'] = playlist_distribution['playlist_name'].apply(lambda x: x[:15] + '...' if len(x) > 15 else x)

# Creating the bar chart with Plotly Express
fig = px.bar(playlist_distribution, x='playlist_name', y='track_count', color='track_artist',
             title='Distribution of Tracks Across Playlists for Top Artists',
             labels={'playlist_name': 'Playlist', 'track_count': 'Number of Tracks'},
             height=600)

# Improving layout
fig.update_layout(barmode='stack', xaxis={'categoryorder': 'total descending'})
fig.update_xaxes(tickangle=-45)

# Display the figure
fig.show()


In [165]:
playlist_appearances = df.groupby('track_artist')['playlist_name'].nunique().reset_index(name='unique_playlists')

# Sorting artists by the number of unique playlist appearances
playlist_appearances_sorted = playlist_appearances.sort_values(by='unique_playlists', ascending=False)

# Creating the bar chart
fig = px.bar(playlist_appearances_sorted, x='track_artist', y='unique_playlists',
             title='Most Playlist Appearances per Artist',
             labels={'track_artist': 'Artist', 'unique_playlists': 'Number of Unique Playlists'},
             height=600)

# Improving layout
fig.update_xaxes(tickangle=-45)

# Display the figure
fig.show()