In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn as sk
import seaborn as sns

In [2]:
df = pd.read_csv('data.csv')
df.head()

Unnamed: 0,track_name,artist(s)_name,artist_count,released_year,released_month,released_day,in_spotify_playlists,in_spotify_charts,streams,in_apple_playlists,...,bpm,key,mode,danceability_%,valence_%,energy_%,acousticness_%,instrumentalness_%,liveness_%,speechiness_%
0,Seven (feat. Latto) (Explicit Ver.),"Latto, Jung Kook",2,2023,7,14,553,147,141381703,43,...,125,B,Major,80,89,83,31,0,8,4.0
1,LALA,Myke Towers,1,2023,3,23,1474,48,133716286,48,...,92,C#,Major,71,61,74,7,0,10,4.0
2,vampire,Olivia Rodrigo,1,2023,6,30,1397,113,140003974,94,...,138,F,Major,51,32,53,17,0,31,6.0
3,Cruel Summer,Taylor Swift,1,2019,8,23,7858,100,800840817,116,...,170,A,Major,55,58,72,11,0,11,15.0
4,WHERE SHE GOES,Bad Bunny,1,2023,5,18,3133,50,303236322,84,...,144,A,Minor,65,23,80,14,63,11,6.0


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 953 entries, 0 to 952
Data columns (total 24 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   track_name            953 non-null    object 
 1   artist(s)_name        953 non-null    object 
 2   artist_count          953 non-null    int64  
 3   released_year         953 non-null    int64  
 4   released_month        953 non-null    int64  
 5   released_day          953 non-null    int64  
 6   in_spotify_playlists  953 non-null    int64  
 7   in_spotify_charts     953 non-null    int64  
 8   streams               953 non-null    int64  
 9   in_apple_playlists    953 non-null    int64  
 10  in_apple_charts       953 non-null    int64  
 11  in_deezer_playlists   953 non-null    object 
 12  in_deezer_charts      953 non-null    int64  
 13  in_shazam_charts      903 non-null    object 
 14  bpm                   953 non-null    object 
 15  key                   8

In [4]:
df.columns

Index(['track_name', 'artist(s)_name', 'artist_count', 'released_year',
       'released_month', 'released_day', 'in_spotify_playlists',
       'in_spotify_charts', 'streams', 'in_apple_playlists', 'in_apple_charts',
       'in_deezer_playlists', 'in_deezer_charts', 'in_shazam_charts', 'bpm',
       'key', 'mode', 'danceability_%', 'valence_%', 'energy_%',
       'acousticness_%', 'instrumentalness_%', 'liveness_%', 'speechiness_%'],
      dtype='object')

In [5]:
#show unique values in each column
for col in df.columns:
    print(col, df[col].nunique())

    

track_name 943
artist(s)_name 645
artist_count 8
released_year 50
released_month 12
released_day 31
in_spotify_playlists 879
in_spotify_charts 82
streams 949
in_apple_playlists 234
in_apple_charts 172
in_deezer_playlists 348
in_deezer_charts 34
in_shazam_charts 198
bpm 125
key 12
mode 3
danceability_% 72
valence_% 94
energy_% 81
acousticness_% 98
instrumentalness_% 39
liveness_% 68
speechiness_% 48


In [6]:
#drop key and mode columns
df = df.drop(['key', 'mode'], axis=1)
df.columns

Index(['track_name', 'artist(s)_name', 'artist_count', 'released_year',
       'released_month', 'released_day', 'in_spotify_playlists',
       'in_spotify_charts', 'streams', 'in_apple_playlists', 'in_apple_charts',
       'in_deezer_playlists', 'in_deezer_charts', 'in_shazam_charts', 'bpm',
       'danceability_%', 'valence_%', 'energy_%', 'acousticness_%',
       'instrumentalness_%', 'liveness_%', 'speechiness_%'],
      dtype='object')

In [7]:
#combine 'released_year', 'released_month', 'released_day' columns into one column with a datetime data type
df['released'] = pd.to_datetime(df['released_year'].astype(str) + '-' + df['released_month'].astype(str) + '-' + df['released_day'].astype(str))
df = df.drop(['released_year', 'released_month', 'released_day'], axis=1)
df.head()

Unnamed: 0,track_name,artist(s)_name,artist_count,in_spotify_playlists,in_spotify_charts,streams,in_apple_playlists,in_apple_charts,in_deezer_playlists,in_deezer_charts,in_shazam_charts,bpm,danceability_%,valence_%,energy_%,acousticness_%,instrumentalness_%,liveness_%,speechiness_%,released
0,Seven (feat. Latto) (Explicit Ver.),"Latto, Jung Kook",2,553,147,141381703,43,263,45,10,826,125,80,89,83,31,0,8,4.0,2023-07-14
1,LALA,Myke Towers,1,1474,48,133716286,48,126,58,14,382,92,71,61,74,7,0,10,4.0,2023-03-23
2,vampire,Olivia Rodrigo,1,1397,113,140003974,94,207,91,14,949,138,51,32,53,17,0,31,6.0,2023-06-30
3,Cruel Summer,Taylor Swift,1,7858,100,800840817,116,207,125,12,548,170,55,58,72,11,0,11,15.0,2019-08-23
4,WHERE SHE GOES,Bad Bunny,1,3133,50,303236322,84,133,87,15,425,144,65,23,80,14,63,11,6.0,2023-05-18


In [8]:
#graph of top 10 streamed songs
top10 = df.sort_values(by='streams', ascending=False).head(10)
top10

Unnamed: 0,track_name,artist(s)_name,artist_count,in_spotify_playlists,in_spotify_charts,streams,in_apple_playlists,in_apple_charts,in_deezer_playlists,in_deezer_charts,in_shazam_charts,bpm,danceability_%,valence_%,energy_%,acousticness_%,instrumentalness_%,liveness_%,speechiness_%,released
55,Blinding Lights,The Weeknd,1,43899,69,3703895074,672,199,3421,20,,171,50,38,80,0,0,9,7.0,2019-11-29
179,Shape of You,Ed Sheeran,1,32181,10,3562543890,33,0,6808,7,0.0,96,83,93,65,58,0,9,8.0,2017-01-06
86,Someone You Loved,Lewis Capaldi,1,17836,53,2887241814,440,125,1800,0,,110,50,45,41,75,0,11,3.0,2018-11-08
620,Dance Monkey,Tones and I,1,24529,0,2864791672,533,167,3595,6,,98,82,54,59,69,0,18,10.0,2019-05-10
41,Sunflower - Spider-Man: Into the Spider-Verse,"Post Malone, Swae Lee",2,24094,78,2808096550,372,117,843,4,69.0,90,76,91,50,54,0,7,5.0,2018-10-09
162,One Dance,"Drake, WizKid, Kyla",3,43257,24,2713922350,433,107,3631,0,26.0,104,77,36,63,1,0,36,5.0,2016-04-04
84,STAY (with Justin Bieber),"Justin Bieber, The Kid Laroi",2,17050,36,2665343922,492,99,798,31,0.0,170,59,48,76,4,0,10,5.0,2021-07-09
140,Believer,Imagine Dragons,1,18986,23,2594040133,250,121,2969,10,31.0,125,77,74,78,4,0,23,11.0,2017-01-31
725,Closer,"The Chainsmokers, Halsey",2,28032,0,2591224264,315,159,2179,0,44.0,95,75,64,52,41,0,11,3.0,2016-05-31
48,Starboy,"The Weeknd, Daft Punk",2,29536,79,2565529693,281,137,2445,1,140.0,186,68,49,59,16,0,13,28.0,2016-09-21
