In [1]:
import pandas as pd
import numpy as np
import os

df = pd.read_csv('Top 100 most Streamed - Sheet1.csv')
df.head(2)

Unnamed: 0,title,artist,top genre,year,beats.per.minute,energy,danceability,loudness.dB,liveness,valance,length,acousticness,speechiness,popularity
0,Blinding Lights,The Weeknd,canadian contemporary r&b,2020,171,73,51,-6,9,33,200,0,6,91
1,Watermelon Sugar,Harry Styles,pop,2019,95,82,55,-4,34,56,174,12,5,88


### 17. astype() → Type Conversion

Converts a Series or entire DataFrame to a specific data type.

Common use cases:

Convert string to int/float

Convert year column to string for plotting

Convert popularity to float if it’s not already

In [5]:
df['year'].astype(str)  # convert years to string

0     2020
1     2019
2     2021
3     2019
4     2017
      ... 
95    2016
96    2015
97    2021
98    2018
99    2016
Name: year, Length: 100, dtype: object

In [3]:
df['popularity'].astype(float)  # convert to float

0     91.0
1     88.0
2     88.0
3     86.0
4     86.0
      ... 
95    66.0
96    66.0
97    66.0
98    56.0
99    53.0
Name: popularity, Length: 100, dtype: float64

In [4]:
df.astype({'year': int, 'popularity' : float})
# multiple conversions

Unnamed: 0,title,artist,top genre,year,beats.per.minute,energy,danceability,loudness.dB,liveness,valance,length,acousticness,speechiness,popularity
0,Blinding Lights,The Weeknd,canadian contemporary r&b,2020,171,73,51,-6,9,33,200,0,6,91.0
1,Watermelon Sugar,Harry Styles,pop,2019,95,82,55,-4,34,56,174,12,5,88.0
2,Mood (feat. iann dior),24kGoldn,cali rap,2021,91,72,70,-4,32,73,141,17,4,88.0
3,Someone You Loved,Lewis Capaldi,pop,2019,110,41,50,-6,11,45,182,75,3,86.0
4,Perfect,Ed Sheeran,pop,2017,95,45,60,-6,11,17,263,16,2,86.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,One Dance,Drake,canadian hip hop,2016,104,61,79,-6,32,43,174,1,6,66.0
96,Sugar,Maroon 5,pop,2015,120,79,75,-7,9,88,235,6,3,66.0
97,Emotions,Mark Mendy,pop dance,2021,126,83,66,-5,40,74,172,5,29,66.0
98,Cold Water,Major Lazer,dance pop,2018,93,80,61,-5,16,50,185,7,4,56.0


### 18. replace() → Value Replacement

Replaces specific values with others.

Works for strings, numbers, lists, or even regex.

In [6]:
df['artist'].replace('Drake','Champagne Papi')
# replace single value

0         The Weeknd
1       Harry Styles
2           24kGoldn
3      Lewis Capaldi
4         Ed Sheeran
           ...      
95    Champagne Papi
96          Maroon 5
97        Mark Mendy
98       Major Lazer
99       Mike Posner
Name: artist, Length: 100, dtype: object

In [8]:
df['year'].replace([2018,2019],2020)
# replace multiple values

0     2020
1     2020
2     2021
3     2020
4     2017
      ... 
95    2016
96    2015
97    2021
98    2020
99    2016
Name: year, Length: 100, dtype: int64

In [11]:
df['title'].replace({'Blinding Lights': 'BL'})
# dictionary replacement

0                                      BL
1                        Watermelon Sugar
2                  Mood (feat. iann dior)
3                       Someone You Loved
4                                 Perfect
                     ...                 
95                              One Dance
96                                  Sugar
97                               Emotions
98                             Cold Water
99    I Took A Pill In Ibiza - Seeb Remix
Name: title, Length: 100, dtype: object

### Practice Questions

In [12]:
# Convert the year column from integer to string.

df['year'].astype(str)

0     2020
1     2019
2     2021
3     2019
4     2017
      ... 
95    2016
96    2015
97    2021
98    2018
99    2016
Name: year, Length: 100, dtype: object

In [13]:
# Convert the popularity column to float.

df['popularity'].astype(float)

0     91.0
1     88.0
2     88.0
3     86.0
4     86.0
      ... 
95    66.0
96    66.0
97    66.0
98    56.0
99    53.0
Name: popularity, Length: 100, dtype: float64

In [16]:
# Convert the beats.per.minute column to integer type.

df['beats.per.minute'].astype(int)

0     171
1      95
2      91
3     110
4      95
     ... 
95    104
96    120
97    126
98     93
99    102
Name: beats.per.minute, Length: 100, dtype: int64

In [19]:
# Change both year and popularity columns in one go: year → str, popularity → float.

df.astype({'year' : str, 'popularity': float})

Unnamed: 0,title,artist,top genre,year,beats.per.minute,energy,danceability,loudness.dB,liveness,valance,length,acousticness,speechiness,popularity
0,Blinding Lights,The Weeknd,canadian contemporary r&b,2020,171,73,51,-6,9,33,200,0,6,91.0
1,Watermelon Sugar,Harry Styles,pop,2019,95,82,55,-4,34,56,174,12,5,88.0
2,Mood (feat. iann dior),24kGoldn,cali rap,2021,91,72,70,-4,32,73,141,17,4,88.0
3,Someone You Loved,Lewis Capaldi,pop,2019,110,41,50,-6,11,45,182,75,3,86.0
4,Perfect,Ed Sheeran,pop,2017,95,45,60,-6,11,17,263,16,2,86.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,One Dance,Drake,canadian hip hop,2016,104,61,79,-6,32,43,174,1,6,66.0
96,Sugar,Maroon 5,pop,2015,120,79,75,-7,9,88,235,6,3,66.0
97,Emotions,Mark Mendy,pop dance,2021,126,83,66,-5,40,74,172,5,29,66.0
98,Cold Water,Major Lazer,dance pop,2018,93,80,61,-5,16,50,185,7,4,56.0


In [18]:
df.dtypes

title               object
artist              object
top genre           object
year                 int64
beats.per.minute     int64
energy               int64
danceability         int64
loudness.dB          int64
liveness             int64
valance              int64
length               int64
acousticness         int64
speechiness          int64
popularity           int64
dtype: object

In [21]:
# Replace multiple artists at once: change “The Weeknd” → “Weeknd” and “Ed Sheeran” → “Sheeran”.

df.replace({'The Weeknd': 'Weeknd', 'Ed Sheeran' : 'Sheeran'})

Unnamed: 0,title,artist,top genre,year,beats.per.minute,energy,danceability,loudness.dB,liveness,valance,length,acousticness,speechiness,popularity
0,Blinding Lights,Weeknd,canadian contemporary r&b,2020,171,73,51,-6,9,33,200,0,6,91
1,Watermelon Sugar,Harry Styles,pop,2019,95,82,55,-4,34,56,174,12,5,88
2,Mood (feat. iann dior),24kGoldn,cali rap,2021,91,72,70,-4,32,73,141,17,4,88
3,Someone You Loved,Lewis Capaldi,pop,2019,110,41,50,-6,11,45,182,75,3,86
4,Perfect,Sheeran,pop,2017,95,45,60,-6,11,17,263,16,2,86
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,One Dance,Drake,canadian hip hop,2016,104,61,79,-6,32,43,174,1,6,66
96,Sugar,Maroon 5,pop,2015,120,79,75,-7,9,88,235,6,3,66
97,Emotions,Mark Mendy,pop dance,2021,126,83,66,-5,40,74,172,5,29,66
98,Cold Water,Major Lazer,dance pop,2018,93,80,61,-5,16,50,185,7,4,56


In [22]:
# Replace all values in top genre column “pop” → “Pop Music”

df['top genre'] = df['top genre'].replace('pop', 'Pop Music')