## Kaggle Data Set https://www.kaggle.com/datasets/nelgiriyewithana/most-streamed-spotify-songs-2024


### Step 1: Load and Clean the Data

In [28]:
import pandas as pd

# Load the dataset
df = pd.read_csv('Most Streamed Spotify Songs 2024.csv', encoding='ISO-8859-1')

In [29]:
print(df)

                                Track                        Album Name  \
0                 MILLION DOLLAR BABY      Million Dollar Baby - Single   
1                         Not Like Us                       Not Like Us   
2          i like the way you kiss me        I like the way you kiss me   
3                             Flowers                  Flowers - Single   
4                             Houdini                           Houdini   
...                               ...                               ...   
4595                For the Last Time                 For the Last Time   
4596                 Dil Meri Na Sune  Dil Meri Na Sune (From "Genius")   
4597            Grace (feat. 42 Dugg)                           My Turn   
4598              Nashe Si Chadh Gayi             November Top 10 Songs   
4599  Me Acostumbre (feat. Bad Bunny)   Me Acostumbre (feat. Bad Bunny)   

              Artist Release Date          ISRC All Time Rank  Track Score  \
0      Tommy Richman 

In [30]:
print(df.head())

                        Track                    Album Name          Artist  \
0         MILLION DOLLAR BABY  Million Dollar Baby - Single   Tommy Richman   
1                 Not Like Us                   Not Like Us  Kendrick Lamar   
2  i like the way you kiss me    I like the way you kiss me         Artemas   
3                     Flowers              Flowers - Single     Miley Cyrus   
4                     Houdini                       Houdini          Eminem   

  Release Date          ISRC All Time Rank  Track Score Spotify Streams  \
0    4/26/2024  QM24S2402528             1        725.4     390,470,936   
1     5/4/2024  USUG12400910             2        545.9     323,703,884   
2    3/19/2024  QZJ842400387             3        538.4     601,309,283   
3    1/12/2023  USSM12209777             4        444.9   2,031,280,633   
4    5/31/2024  USUG12403398             5        423.3     107,034,922   

  Spotify Playlist Count Spotify Playlist Reach  ...  SiriusXM Spins  \
0 

In [31]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4600 entries, 0 to 4599
Data columns (total 29 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Track                       4600 non-null   object 
 1   Album Name                  4600 non-null   object 
 2   Artist                      4595 non-null   object 
 3   Release Date                4600 non-null   object 
 4   ISRC                        4600 non-null   object 
 5   All Time Rank               4600 non-null   object 
 6   Track Score                 4600 non-null   float64
 7   Spotify Streams             4487 non-null   object 
 8   Spotify Playlist Count      4530 non-null   object 
 9   Spotify Playlist Reach      4528 non-null   object 
 10  Spotify Popularity          3796 non-null   float64
 11  YouTube Views               4292 non-null   object 
 12  YouTube Likes               4285 non-null   object 
 13  TikTok Posts                3427 

In [32]:
print(df['Release Date'])

0       4/26/2024
1        5/4/2024
2       3/19/2024
3       1/12/2023
4       5/31/2024
          ...    
4595     9/5/2017
4596    7/27/2018
4597    2/28/2020
4598    11/8/2016
4599    4/11/2017
Name: Release Date, Length: 4600, dtype: object


In [22]:
print(pd.to_datetime(df['Release Date']))

0      2024-04-26
1      2024-05-04
2      2024-03-19
3      2023-01-12
4      2024-05-31
          ...    
4595   2017-09-05
4596   2018-07-27
4597   2020-02-28
4598   2016-11-08
4599   2017-04-11
Name: Release Date, Length: 4600, dtype: datetime64[ns]


In [34]:
#Convert 'release_date' to datetime format
df['Release Date'] = pd.to_datetime(df['Release Date'])

In [35]:
print(df['Release Date'])

0      2024-04-26
1      2024-05-04
2      2024-03-19
3      2023-01-12
4      2024-05-31
          ...    
4595   2017-09-05
4596   2018-07-27
4597   2020-02-28
4598   2016-11-08
4599   2017-04-11
Name: Release Date, Length: 4600, dtype: datetime64[ns]


### Step 2: Analyze the Data

#### Calculate Streams Per Artist

In [37]:
# Total streams per artist
total_spotify_streams_per_artist = df.groupby('Artist')['Spotify Streams'].sum()
print(total_spotify_streams_per_artist)

Artist
"XY"                                                                                           0
$OHO BANI                                                                             54,065,563
$uicideboy$                                    44,027,794675,082,623189,588,27073,979,851204,...
&ME                                                                                   34,601,626
(G)I-DLE                                             289,304,213286,016,224242,708,36858,909,647
                                                                     ...                        
ýýýýýýýýýýýýýýý ýýýýýýýýýýýýýýý                                                        2,409,532
ýýýýýýýýýýýýýýý(ýýýýýýýýýýýýýýý)                                                         548,408
ýýýýýýýýýýýýýýýýýý                                                                     4,943,057
ýýýýýýýýýýýýýýýýýýýýý ýýýýýýýýýýýýýýýýýýýýý                                              456,034
ýýýýýýýýýýýýýýýýýýýýýýý

In [39]:
df['Spotify Streams'] = df['Spotify Streams'].str.replace(',', '').astype(float)

In [40]:
most_streamed_songs = df.nlargest(10, 'Spotify Streams')
print(most_streamed_songs)

                                              Track  \
55                                  Blinding Lights   
4254                                Blinding Lights   
53                                     Shape of You   
1637                                   Shape of You   
132                               Someone You Loved   
114   Sunflower - Spider-Man: Into the Spider-Verse   
19                                        As It Was   
3406                                      As It Was   
262                                         Starboy   
519                                       One Dance   

                                         Album Name         Artist  \
55                                  Blinding Lights     The Weeknd   
4254                                Blinding Lights        xSyborg   
53                                     Shape of You     Ed Sheeran   
1637                                   Shape of You        xSyborg   
132                                         