# Billboard CSV Cleaning

Clean billboard dataset for BigQuery - remove multi-line lyrics and normalize text

In [11]:
library(tidyverse)
library(readr)

In [12]:
# load data
input <- '../../data/billboard_24years_lyrics_spotify.csv'
output <- '../../data/cleaned/billboard_24years_lyrics_spotify_bigquery.csv'

df <- read_csv(input, na=c("", "NA"))
sprintf("Loaded %d rows", nrow(df))

[1mRows: [22m[34m3397[39m [1mColumns: [22m[34m26[39m
[36m--[39m [1mColumn specification[22m [36m--------------------------------------------------------[39m
[1mDelimiter:[22m ","
[31mchr[39m (11): song, band_singer, songurl, titletext, url, lyrics, uri, type, id,...
[32mdbl[39m (15): ranking, year, danceability, energy, key, loudness, mode, speechin...

[36mi[39m Use `spec()` to retrieve the full column specification for this data.
[36mi[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


In [13]:
glimpse(df)

Rows: 3,397
Columns: 26
$ ranking          [3m[90m<dbl>[39m[23m 1[90m, [39m2[90m, [39m2[90m, [39m3[90m, [39m3[90m, [39m4[90m, [39m5[90m, [39m6[90m, [39m7[90m, [39m8[90m, [39m9[90m, [39m10[90m, [39m11[90m, [39m12[90m, [39m13[90m, [39m14[90m, [39m~
$ song             [3m[90m<chr>[39m[23m "Breathe"[90m, [39m"Smooth"[90m, [39m"Smooth"[90m, [39m"Maria Maria"[90m, [39m"Maria ~
$ band_singer      [3m[90m<chr>[39m[23m "Faith Hill"[90m, [39m"Santana"[90m, [39m"Rob Thomas"[90m, [39m"Santana"[90m, [39m"Th~
$ songurl          [3m[90m<chr>[39m[23m "/wiki/Breathe_(Faith_Hill_song)"[90m, [39m"/wiki/Smooth_(San~
$ titletext        [3m[90m<chr>[39m[23m "Breathe"[90m, [39m"Smooth"[90m, [39m"Smooth"[90m, [39m"Maria Maria"[90m, [39m"Maria ~
$ url              [3m[90m<chr>[39m[23m "/wiki/Faith_Hill"[90m, [39m"/wiki/Santana_(band)"[90m, [39m"/wiki/Ro~
$ year             [3m[90m<dbl>[39m[23m 2000[90m, [39m2000[90m, 

In [14]:
# check spotify features
spotify_cols <- c('danceability', 'energy', 'tempo', 'duration_ms', 'valence')
sapply(df[spotify_cols], function(x) sum(is.na(x)))

In [15]:
# clean text fields
df2 <- df %>% 
  mutate(across(where(is.character), function(x) {
    x <- str_replace_all(x, "\\n", " ")
    x <- str_replace_all(x, "\\r", "")
    x <- str_replace_all(x, "\\s+", " ")
    str_trim(x)
  }))

sprintf("Cleaned %d rows", nrow(df2))

In [16]:
# verify
check <- df2 %>% filter(str_detect(lyrics, "\\n"))
if(nrow(check) == 0) {
  print("âœ“ No newlines")
}

[1] "<U+2713> No newlines"


In [17]:
# export
dir.create("../../data/cleaned", showWarnings=F, recursive=T)
write_csv(df2, output, na="", quote="all", eol="\n")
sprintf("Wrote: %s", output)

In [18]:
# check file
lines <- length(readLines(output))
expected <- nrow(df2) + 1
sprintf("Lines: %d (expected %d)", lines, expected)

In [19]:
# preview final output
df2 %>% 
  select(song, band_singer, year, danceability, energy, tempo) %>% 
  head(10)

song,band_singer,year,danceability,energy,tempo
<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>
Breathe,Faith Hill,2000,0.529,0.496,136.859
Smooth,Santana,2000,0.609,0.923,115.996
Smooth,Rob Thomas,2000,0.59,0.637,115.983
Maria Maria,Santana,2000,0.777,0.601,97.911
Maria Maria,The Product G&B,2000,,,
I Wanna Know,Joe,2000,0.725,0.487,136.086
Everything You Want,Vertical Horizon,2000,0.636,0.761,93.896
Say My Name,Destiny's Child,2000,,,
I Knew I Loved You,Savage Garden,2000,0.61,0.497,85.031
Amazed,Lonestar,2000,0.561,0.543,139.803


In [20]:
# lyrics preview
df2 %>% 
  select(song, band_singer, lyrics) %>% 
  mutate(lyrics <- str_trunc(lyrics, 80)) %>% 
  head(5)

song,band_singer,lyrics,"lyrics <- str_trunc(lyrics, 80)"
<chr>,<chr>,<chr>,<chr>
Breathe,Faith Hill,"I can feel the magic floating in the air Being with you gets me that way I watch the sunlight dance across your face and I've Never been this swept away All my thoughts just seem to settle on the breeze When I'm lying wrapped up in your arms The whole world just fades away The only thing I hear Is the beating of your heart And I can feel you breathe, it's washing over me And suddenly, I'm melting into you There's nothing left to prove Baby, all we need is just to be Caught up in the touch, slow and steady rush Baby, isn't that the way that love's supposed to be? I can feel you breathe Just breathe In a way, I know my heart is waking up As all the walls come tumbling down I'm closer than I've ever felt before And I know, and you know There's no need for words right now You might also like And I can feel you breathe, it's washing over me And suddenly, I'm melting into you There's nothing left to prove Baby, all we need is just to be Caught up in the touch, slow and steady rush Baby, isn't that the way that love's supposed to be? I can feel you breathe Just breathe Caught up in the touch The slow and steady rush Baby, isn't that the way that love's supposed to be? I can feel you breathe Just breathe I can feel the magic floating in the air Being with you gets me that way7Embed",I can feel the magic floating in the air Being with you gets me that way I wa...
Smooth,Santana,"Man, it's a hot one Like seven inches from the midday sun Well, I hear you whisper and the words melt everyone But you stay so cool My mu<U+00F1>equita, my Spanish Harlem Mona Lisa You're my reason for reason, the step in my groove, yeah And if you say, ""This life ain't good enough"" I would give my world to lift you up I could change my life to better suit your mood 'Cause you're so smooth And it's just like the ocean under the moon Well, it's the same as the emotion that I get from you You got the kind of lovin' that can be so smooth Gimme your heart, make it real, or else forget about it Well, I'll tell you one thing If you would leave it'd be a crying shame In every breath and every word I hear your name callin' me out Out from the barrio, you hear my rhythm on the radio You feel the turning of the world so soft and slow Turnin' you round and round You might also like And if you say, ""This life ain't good enough"" I would give my world to lift you up I could change my life to better suit your mood 'Cause you're so smooth Oh, and it's just like the ocean under the moon Well, it's the same as the emotion that I get from you You got the kind of loving that can be so smooth, yeah Gimme your heart, make it real, or else forget about it, yeah And it's just like the ocean under the moon Well, it's the same as the emotion that I get from you You got the kind of loving that can be so smooth, yeah Gimme your heart, make it real, or else forget about it, damn Or else forget about it Oh, or else forget about it Oh, let's don't forget about it (Gimme your heart, make it real) Or else forget about it, hey Let's don't forget about it, no, oh, no, oh Let's don't forget about it, no, no, no, oh Let's don't forget about it, hey, no, no, oh Let's don't forget about it, hey, hey, hey24Embed","Man, it's a hot one Like seven inches from the midday sun Well, I hear you wh..."
Smooth,Rob Thomas,"Man, it's a hot one Like seven inches from the midday sun Well, I hear you whisper and the words melt everyone But you stay so cool My mu<U+00F1>equita, my Spanish Harlem Mona Lisa You're my reason for reason, the step in my groove, yeah And if you say, ""This life ain't good enough"" I would give my world to lift you up I could change my life to better suit your mood 'Cause you're so smooth And it's just like the ocean under the moon Well, it's the same as the emotion that I get from you You got the kind of lovin' that can be so smooth Gimme your heart, make it real, or else forget about it Well, I'll tell you one thing If you would leave it'd be a crying shame In every breath and every word I hear your name callin' me out Out from the barrio, you hear my rhythm on the radio You feel the turning of the world so soft and slow Turnin' you round and round You might also like And if you say, ""This life ain't good enough"" I would give my world to lift you up I could change my life to better suit your mood 'Cause you're so smooth Oh, and it's just like the ocean under the moon Well, it's the same as the emotion that I get from you You got the kind of loving that can be so smooth, yeah Gimme your heart, make it real, or else forget about it, yeah And it's just like the ocean under the moon Well, it's the same as the emotion that I get from you You got the kind of loving that can be so smooth, yeah Gimme your heart, make it real, or else forget about it, damn Or else forget about it Oh, or else forget about it Oh, let's don't forget about it (Gimme your heart, make it real) Or else forget about it, hey Let's don't forget about it, no, oh, no, oh Let's don't forget about it, no, no, no, oh Let's don't forget about it, hey, no, no, oh Let's don't forget about it, hey, hey, hey24Embed","Man, it's a hot one Like seven inches from the midday sun Well, I hear you wh..."
Maria Maria,Santana,"Ladies and gents, turn up your sound systems To the sound of Carlos Santana And the G&B It's the product of Ghetto Blues From the Refugee Camp Oh, Maria, Maria She remind me of a West Side Story Growing up in Spanish Harlem She living the life just like a movie star Oh, Maria, Maria She fell in love in East L.A To the sounds of the guitar, yeah, yeah Played by Carlos Santana Stop the looting, stop the shooting Pickpocketing on the corner See, as the rich is getting richer The poor is getting poorer See me and Maria on the corner Thinking of ways to make it better In my mailbox, there's an eviction letter Signed by the judge, said, ""See you later"", yeah You might also like Ahora vengo mama chula, mama chula Ahora vengo mama chula (East Coast) Ahora vengo mama chula, mama chula Ahora vengo mama chula (West Coast) Oh, Maria, Maria She remind me of a West Side Story Growing up in Spanish Harlem She living the life just like a movie star Oh, Maria, Maria She fell in love in East L.A I said, to the sounds of the guitar, yeah, yeah Played by Carlos Santana I said a la favela los colores The streets are getting hotter There is no water to put out the fire Ni gota de esperanza See me and Maria on the corner Thinking of ways to make it better Then I looked up in the sky Hoping that there's a paradise, yeah, yeah Ahora vengo mama chula, mama chula Ahora vengo mama chula (North side) Ahora vengo mama chula, mama chula Ahora vengo mama chula (South side) Ahora vengo mama chula, mama chula Ahora vengo mama chula (Worldwide) Ahora vengo mama chula, mama chula Ahora vengo mama chula (Open up your eyes) Maria, you know you're my lover When the wind blows, I can feel you Through the weather And even when we are apart Still feels like we're together, Maria, yeah She remind me of a West Side Story Growing up in Spanish Harlem She living the life just like a movie star Oh, Maria, Maria Oh, she fell in love in East L.A To the sounds of the guitar Played by Carlos Santana Put 'em up y'all Carlos Santana with the Refugee Camp Wyclef Jerry Wonda Mr. Santana G&B Yo Carlos, man, you're making that guitar cry22Embed","Ladies and gents, turn up your sound systems To the sound of Carlos Santana A..."
Maria Maria,The Product G&B,"Turn up this sound system To the sound of Carlos Santana And the G-M-B's Ghetto blues from the refugee camp Oh Maria Maria She reminds me of a westside story Growing up in Spanish Harlem She's livin' her life just like a movie star Oh, Maria Maria She fell in love in East L.A To the sounds of the guitar Played by Carlos Santan Stop the looting, stop the shooting Pick pocking on the corner See as the rich is getting richer The poor is getting poorer Se mira Maria on the corner Thinkin' of ways to make it better In my mailbox there's an eviction letter Somebody just said see you later Ahora vengo mama chola mama chola Ahora vengo mama chola (East coast) Ahora vengo mama chola mama chola Ahora vengo mama chola (West coast) You might also likeOh Maria Maria She reminds me of a westside story Growing up in Spanish Harlem She's livin' her life just like a movie star Oh, Maria Maria She fell in love in East L.A To the sounds of the guitar Played by Carlos Santan I said a la favella los colores The streets are getting hotter There is no water To put out the fire Mi canto la esperanza Se mira Maria on the corner Thinkin' of ways to make it better Then I looked up in the sky Hoping of days of paradise Ahora vengo mama chola mama chola Ahora vengo mama chola (North side) Ahora vengo mama chola mama chola Ahora vengo mama chola (South side) Ahora vengo mama chola mama chola Ahora vengo mama chola (World wide) Ahora vengo mama chola mama chola Ahora vengo mama chola (Open up your eyes) Maria you know your my lover When the wind blows I can feel you through the weather And even when we are apart It feels like we are together Maria, yeah She reminds me of a westside story Growing up in Spanish Harlem She's livin' her life just like a movie star Oh Maria Maria She fell in love in East L.A To the sounds of the guitar Played by Carlos Santana Put 'em up y'all Carlos Santana with the refugee camp Wyclef Jerry Wonder Mr. Santana Gmb Yo, Carlos, man you makin' the guitar cryEmbed",Turn up this sound system To the sound of Carlos Santana And the G-M-B's Ghet...
