In [300]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import colors
from matplotlib import font_manager

font_dirs = ['Lato']
font_files = font_manager.findSystemFonts(fontpaths=font_dirs)
for font_file in font_files:
    font_manager.fontManager.addfont(font_file)

plt.rcParams['font.family'] = 'Lato'

%config InlineBackend.figure_format = 'retina'

In [306]:
df = pd.read_csv('spotify.csv', encoding='latin1')
df.head(20)

FileNotFoundError: [Errno 2] No such file or directory: 'spotify.csv'

In [None]:
df.info()

In [None]:
df.shape

In [None]:
df.iloc[0:10]

In [None]:
df['Release Date'] = pd.to_datetime(df['Release Date'])
numeric_columns = ['Spotify Streams', 'Spotify Playlist Count', 'Spotify Playlist Reach', 'YouTube Views', 'YouTube Likes', 'TikTok Posts', 'TikTok Likes', 'TikTok Views', 'YouTube Playlist Reach', 'AirPlay Spins', 'SiriusXM Spins', 'Deezer Playlist Reach', 'Pandora Streams', 'Pandora Track Stations', 'Soundcloud Streams', 'Shazam Counts']
df[numeric_columns] = df[numeric_columns].replace(',', '', regex=True).apply(pd.to_numeric)
df['All Time Rank'] = pd.to_numeric(df['All Time Rank'].replace(',', '', regex=True), downcast='integer')
df['Explicit Track'] = df['Explicit Track'].replace({0: False, 1: True}).astype('category')
df.info()

In [None]:
df.head()

In [None]:
df.isnull().any()

In [None]:
df.isnull().sum()

In [None]:
df.isnull().sum() / len(df)

In [None]:
df = df.fillna(np.nan)

In [None]:
df_spotify = df.drop(columns=['TIDAL Popularity', 'AirPlay Spins', 'SiriusXM Spins', 'Deezer Playlist Count', 'Deezer Playlist Reach', 'Amazon Playlist Count', 'Pandora Streams', 'Pandora Track Stations', 'Soundcloud Streams'])
df_spotify.head(20)

### Use df_spotify for dataset

In [None]:
df_spotify.describe()

### 1. Basic Data: Top 10 Artists / Songs with Most Spotify Streams, YouTube Views, TikTok Posts (Bar Graph), Year Variation (Line Graph), Average Stream per Song (Table?)

### 2. See which score has the highest correlation to the Spotify Streams (All Time Rank, Track Score, Spotify Popularity) (correlation table)

#### This will be helpful in deciding which score has most relevance with streams and which score needs to be emphasized.

### 3. Correlation between YouTube variables and Spotify vs Tik Tok variables and Spotify (correlation table)

#### This will be helpful in deciding which third pary app to look at when analyzing song performance.

### 4. Compare a song from two artists that were released in the same date vs released in a different date and look into their performances (bar graph)

### 5. Average popularity (using the score metric with the highest correlation in number 2) for songs released per year

#### Will be able to see which year had the most popular songs

In [None]:
# Top 10 Streamed Songs on Spotify and its release data

In [None]:
# Top 10 Streamed Artists on Spotify

In [None]:
# Yearly Users
mau = pd.Series([208,271,345,406,489,602,626])
mau

In [None]:
sub = pd.Series([96,124,155,180,205,236,246])
sub

In [None]:
x = np.arange(len(mau))

In [None]:
fig, ax = plt.subplots(figsize=(8,6))
ax.bar(x, mau, width=0.68, label='Monthly Active Users (MAU)', color='#1DB954')
ax.bar(x, sub, width=0.68, label='Premium Subscribers', color='#99FFB9')

for i, label in enumerate(range(len(mau))):
    ax.annotate(f"{mau.iloc[i]}M", (i-0.25, mau.iloc[i]+10), size=10, annotation_clip=False, color='white', weight='bold')
    ax.annotate(f"{sub.iloc[i]}M", (i-0.25, sub.iloc[i]+10), size=10, annotation_clip=False, color='black', weight='bold')

ax.set_xlabel('Year', color='white')
ax.set_xticks(x)
ax.set_xticklabels([2018,2019,2020,2021,2022,2023,2024], color='white')
ax.set_ylabel('Users in Millions', color='white')
ax.set_yticks(np.arange(0, 700, 100))
ax.set_yticklabels(['0M', '100M', '200M', '300M', '400M', '500M', '600M'], color='white')
ax.set_title('Spotify Users from 2018 to 2024 Q2', color='white', fontweight='bold', x=0.27, y=1.03)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_color('white')
ax.spines['left'].set_color('white')
ax.xaxis.set_label_coords(0.95, -0.08)
ax.yaxis.set_label_coords(-0.1, 0.88)
ax.set_facecolor(color='#1b212c')
fig.patch.set_facecolor('#1b212c')
plt.legend()
plt.show()