<a href="https://colab.research.google.com/github/makaylalerner/swiftiedata/blob/main/ts_data_exp_clean.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Taylor Swift Eras Project**: Data Cleaning, Exploration, and Transformation
This file is part of a larger project to visualize Spotify data from Taylor Swifts' albums/eras. This file is for cleaning, exploring, and transforming data as needed, but ultimately this data will be visualized in D3 and apart of a React webapp. See the repo for my information: https://github.com/makaylalerner/swiftiedata

In [None]:
import pandas as pd
import scipy as sp
import numpy as np
import matplotlib as mpl


In [None]:
# loading in csv from GitHub raw
data = pd.read_csv("https://raw.githubusercontent.com/makaylalerner/swiftiedata/main/taylor_swift_spotify.csv", header=0)

#dropping extra column, was just a copy of the index
data.drop(['Unnamed: 0'], axis=1, inplace=True)

data


Unnamed: 0,name,album,release_date,track_number,id,uri,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,valence,popularity,duration_ms
0,Lavender Haze,Midnights (3am Edition),2022-10-22,1,4g2c7NoTWAOSYDy44l9nub,spotify:track:4g2c7NoTWAOSYDy44l9nub,0.204000,0.735,0.444,0.001200,0.1700,-10.519,0.0684,97.038,0.0984,76,202395
1,Maroon,Midnights (3am Edition),2022-10-22,2,199E1RRrVmVTQqBXih5qRC,spotify:track:199E1RRrVmVTQqBXih5qRC,0.059300,0.658,0.378,0.000000,0.0976,-8.300,0.0379,108.034,0.0382,75,218270
2,Anti-Hero,Midnights (3am Edition),2022-10-22,3,02Zkkf2zMkwRGQjZ7T4p8f,spotify:track:02Zkkf2zMkwRGQjZ7T4p8f,0.133000,0.638,0.634,0.000001,0.1520,-6.582,0.0457,96.953,0.5190,77,200690
3,Snow On The Beach (feat. Lana Del Rey),Midnights (3am Edition),2022-10-22,4,6ADDIJxxqzM9LMpm78yzQG,spotify:track:6ADDIJxxqzM9LMpm78yzQG,0.735000,0.659,0.323,0.003210,0.1160,-13.425,0.0436,110.007,0.1540,74,256124
4,"You're On Your Own, Kid",Midnights (3am Edition),2022-10-22,5,7gVWKBcfIW93YxNBi3ApIE,spotify:track:7gVWKBcfIW93YxNBi3ApIE,0.416000,0.694,0.380,0.000008,0.1260,-10.307,0.0614,120.044,0.3760,75,194206
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1260,Mary's Song (Oh My My My) - Instrumental w/ BG...,Taylor Swift Karaoke,2006-10-24,10,5YluKOG2VGcJMO8XVMpg9h,spotify:track:5YluKOG2VGcJMO8XVMpg9h,0.000081,0.475,0.529,0.562000,0.1460,-10.802,0.0317,151.000,0.3010,6,216226
1261,Our Song - Instrumental w/ BG vocals,Taylor Swift Karaoke,2006-10-24,11,0PHdWHKV69ZNQyfYLBlVAT,spotify:track:0PHdWHKV69ZNQyfYLBlVAT,0.032600,0.528,0.484,0.001650,0.1400,-9.266,0.0316,178.299,0.5850,11,204306
1262,I'm Only Me When I'm With You - Instrumental w...,Taylor Swift Karaoke,2006-10-24,12,4Vg8MqpDQFDfKmXdpO1jD3,spotify:track:4Vg8MqpDQFDfKmXdpO1jD3,0.000053,0.541,0.796,0.820000,0.3260,-8.833,0.0306,144.004,0.8470,6,223386
1263,Invisible - Instrumental w/ BG vocals,Taylor Swift Karaoke,2006-10-24,13,7Fg8MxumrT8axFZVzN1MtT,spotify:track:7Fg8MxumrT8axFZVzN1MtT,0.259000,0.575,0.279,0.572000,0.0941,-12.066,0.0286,96.007,0.1180,5,206480


In [None]:
# exploring for duplicates
data[ data['name'] == 'Lavender Haze']

Unnamed: 0,name,album,release_date,track_number,id,uri,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,valence,popularity,duration_ms
0,Lavender Haze,Midnights (3am Edition),2022-10-22,1,4g2c7NoTWAOSYDy44l9nub,spotify:track:4g2c7NoTWAOSYDy44l9nub,0.204,0.735,0.444,0.0012,0.17,-10.519,0.0684,97.038,0.0984,76,202395
20,Lavender Haze,Midnights (3am Edition),2022-10-21,1,2KkIiEt1WIHOYItqkD30kR,spotify:track:2KkIiEt1WIHOYItqkD30kR,0.253,0.735,0.46,0.00167,0.184,-10.501,0.0804,96.962,0.106,54,202395
40,Lavender Haze,Midnights,2022-10-21,1,5jQI2r1RdgtuT8S3iG8zFC,spotify:track:5jQI2r1RdgtuT8S3iG8zFC,0.258,0.733,0.436,0.000573,0.157,-10.489,0.08,96.985,0.0976,88,202395
53,Lavender Haze,Midnights,2022-10-21,1,5E36pd9opG8oT1GkKYv7qz,spotify:track:5E36pd9opG8oT1GkKYv7qz,0.253,0.743,0.463,0.000634,0.164,-10.485,0.0782,96.999,0.101,53,202395


In [None]:
#looking at all the albums included in the original set
data['album'].unique()

array(['Midnights (3am Edition)', 'Midnights', "Red (Taylor's Version)",
       "Fearless (Taylor's Version)", 'evermore (deluxe version)',
       'evermore',
       'folklore: the long pond studio sessions (from the Disney+ special) [deluxe edition]',
       'folklore (deluxe version)', 'folklore', 'Lover',
       'Taylor Swift Karaoke: reputation', 'reputation',
       'reputation (Big Machine Radio Release Special)',
       'reputation Stadium Tour Surprise Song Playlist',
       'Taylor Swift Karaoke: 1989 (Deluxe)', '1989',
       '1989 (Big Machine Radio Release Special)',
       'Taylor Swift Karaoke: 1989 (Deluxe Edition)',
       'Taylor Swift Karaoke: 1989', '1989 (Deluxe Edition)',
       '1989 (Deluxe)', 'Red', 'Red (Deluxe Edition)',
       'Red (Big Machine Radio Release Special)', 'Red (Karaoke Version)',
       'Taylor Swift Karaoke: Red', 'Speak Now (Japanese Version)',
       'Speak Now World Tour Live', 'Speak Now',
       'Speak Now (Big Machine Radio Release Specia

In [None]:
#looking at just one album for reference
data[data['album'] == "Midnights"]

Unnamed: 0,name,album,release_date,track_number,id,uri,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,valence,popularity,duration_ms
40,Lavender Haze,Midnights,2022-10-21,1,5jQI2r1RdgtuT8S3iG8zFC,spotify:track:5jQI2r1RdgtuT8S3iG8zFC,0.258,0.733,0.436,0.000573,0.157,-10.489,0.08,96.985,0.0976,88,202395
41,Maroon,Midnights,2022-10-21,2,3eX0NZfLtGzoLUxPNvRfqm,spotify:track:3eX0NZfLtGzoLUxPNvRfqm,0.0573,0.637,0.398,1e-06,0.101,-8.294,0.0564,108.075,0.0374,81,218270
42,Anti-Hero,Midnights,2022-10-21,3,0V3wPSX9ygBnCm8psDIegu,spotify:track:0V3wPSX9ygBnCm8psDIegu,0.13,0.637,0.643,2e-06,0.142,-6.571,0.0519,97.008,0.533,94,200690
43,Snow On The Beach (feat. Lana Del Rey),Midnights,2022-10-21,4,1wtOxkiel43cVs0Yux5Q4h,spotify:track:1wtOxkiel43cVs0Yux5Q4h,0.69,0.663,0.319,0.000993,0.117,-13.481,0.0375,109.957,0.193,81,256124
44,"You're On Your Own, Kid",Midnights,2022-10-21,5,4D7BCuvgdJlYvlX5WlN54t,spotify:track:4D7BCuvgdJlYvlX5WlN54t,0.401,0.696,0.396,5e-06,0.125,-10.289,0.0656,120.041,0.38,82,194206
45,Midnight Rain,Midnights,2022-10-21,6,3rWDp9tBPQR9z6U5YyRSK4,spotify:track:3rWDp9tBPQR9z6U5YyRSK4,0.69,0.643,0.363,5.2e-05,0.115,-11.738,0.0767,139.865,0.23,85,174782
46,Question...?,Midnights,2022-10-21,7,0heeNYlwOGuUSe7TgUD27B,spotify:track:0heeNYlwOGuUSe7TgUD27B,0.2,0.751,0.502,0.0,0.296,-8.763,0.167,108.943,0.106,76,210556
47,Vigilante Shit,Midnights,2022-10-21,8,1xwAWUI6Dj0WGC3KiUPN0O,spotify:track:1xwAWUI6Dj0WGC3KiUPN0O,0.173,0.798,0.277,5.3e-05,0.121,-11.096,0.39,79.846,0.163,78,164801
48,Bejeweled,Midnights,2022-10-21,9,3qoftcUZaUOncvIYjFSPdE,spotify:track:3qoftcUZaUOncvIYjFSPdE,0.0618,0.696,0.559,5.6e-05,0.0887,-9.19,0.0693,163.999,0.433,82,194165
49,Labyrinth,Midnights,2022-10-21,10,0A1JLUlkZkp2EFrosoNQi0,spotify:track:0A1JLUlkZkp2EFrosoNQi0,0.785,0.406,0.306,0.488,0.122,-15.48,0.0517,110.014,0.122,75,247962


In [None]:
# removing albums listed here, mostly karaoke, radio specials, etc but not her music

albums_to_remove = [
    "folklore: the long pond studio sessions (from the Disney+ special) [deluxe edition]",
    "Taylor Swift Karaoke: reputation",
    "reputation (Big Machine Radio Release Special)",
    "reputation Stadium Tour Surprise Song Playlist",
    "Taylor Swift Karaoke: 1989",
    "Taylor Swift Karaoke: 1989 (Deluxe)",
    "1989 (Big Machine Radio Release Special)",
    "Taylor Swift Karaoke: 1989 (Deluxe Edition)",
    "Red (Big Machine Radio Release Special)",
    "Red (Karaoke Version)",
    "Taylor Swift Karaoke: Red",
    "Speak Now (Japanese Version)",
    "Speak Now World Tour Live",
    "Speak Now (Big Machine Radio Release Special)",
    "Speak Now (Karaoke Version)",
    "Taylor Swift Karaoke: Speak Now",
    "Speak Now (US Version)",
    "Fearless (International Version)",
    "Fearless (Big Machine Radio Release Special)",
    "Fearless (Karaoke Version)",
    "Fearless Karaoke",
    "Live From Clear Channel Stripped 2008",
    "Taylor Swift (Big Machine Radio Release Special)",
    "Taylor Swift (Karaoke Version)",
    "Taylor Swift Karaoke"
]

cleaned_data = data[~data['album'].isin(albums_to_remove)]

cleaned_data

Unnamed: 0,name,album,release_date,track_number,id,uri,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,valence,popularity,duration_ms
0,Lavender Haze,Midnights (3am Edition),2022-10-22,1,4g2c7NoTWAOSYDy44l9nub,spotify:track:4g2c7NoTWAOSYDy44l9nub,0.20400,0.735,0.444,0.001200,0.1700,-10.519,0.0684,97.038,0.0984,76,202395
1,Maroon,Midnights (3am Edition),2022-10-22,2,199E1RRrVmVTQqBXih5qRC,spotify:track:199E1RRrVmVTQqBXih5qRC,0.05930,0.658,0.378,0.000000,0.0976,-8.300,0.0379,108.034,0.0382,75,218270
2,Anti-Hero,Midnights (3am Edition),2022-10-22,3,02Zkkf2zMkwRGQjZ7T4p8f,spotify:track:02Zkkf2zMkwRGQjZ7T4p8f,0.13300,0.638,0.634,0.000001,0.1520,-6.582,0.0457,96.953,0.5190,77,200690
3,Snow On The Beach (feat. Lana Del Rey),Midnights (3am Edition),2022-10-22,4,6ADDIJxxqzM9LMpm78yzQG,spotify:track:6ADDIJxxqzM9LMpm78yzQG,0.73500,0.659,0.323,0.003210,0.1160,-13.425,0.0436,110.007,0.1540,74,256124
4,"You're On Your Own, Kid",Midnights (3am Edition),2022-10-22,5,7gVWKBcfIW93YxNBi3ApIE,spotify:track:7gVWKBcfIW93YxNBi3ApIE,0.41600,0.694,0.380,0.000008,0.1260,-10.307,0.0614,120.044,0.3760,75,194206
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1202,Our Song,Taylor Swift,2006-10-24,11,15DeqWWQB4dcEWzJg15VrN,spotify:track:15DeqWWQB4dcEWzJg15VrN,0.11100,0.668,0.672,0.000000,0.3290,-4.931,0.0303,89.011,0.5390,72,201106
1203,I'm Only Me When I'm With You,Taylor Swift,2006-10-24,12,0JIdBrXGSJXS72zjF9ss9u,spotify:track:0JIdBrXGSJXS72zjF9ss9u,0.00452,0.563,0.934,0.000807,0.1030,-3.629,0.0646,143.964,0.5180,58,213053
1204,Invisible,Taylor Swift,2006-10-24,13,5OOd01o2YS1QFwdpVLds3r,spotify:track:5OOd01o2YS1QFwdpVLds3r,0.63700,0.612,0.394,0.000000,0.1470,-5.723,0.0243,96.001,0.2330,54,203226
1205,A Perfectly Good Heart,Taylor Swift,2006-10-24,14,1spLfUJxtyVyiKKTegQ2r4,spotify:track:1spLfUJxtyVyiKKTegQ2r4,0.00349,0.483,0.751,0.000000,0.1280,-5.726,0.0365,156.092,0.2680,53,220146


In [None]:
cleaned_data['album'].unique()

array(['Midnights (3am Edition)', 'Midnights', "Red (Taylor's Version)",
       "Fearless (Taylor's Version)", 'evermore (deluxe version)',
       'evermore', 'folklore (deluxe version)', 'folklore', 'Lover',
       'reputation', '1989', '1989 (Deluxe Edition)', '1989 (Deluxe)',
       'Red', 'Red (Deluxe Edition)', 'Speak Now',
       'Speak Now (Deluxe Edition)', 'Speak Now (Deluxe Package)',
       'Fearless', 'Fearless (Platinum Edition)',
       'Fearless Platinum Edition', 'Taylor Swift'], dtype=object)

In [None]:
#function definition for classifying era in its own column for use later, combining deluxe and special editions with the original album

def classify_era(row): 
  if row['album'] in ['Midnights (3am Edition)', 'Midnights']: 
    return 'Midnights'
  elif row['album'] in ["Red (Taylor's Version)", "Fearless (Taylor's Version)"]: 
    return 'TaylorsVersion'
  elif row['album'] in ['evermore (deluxe version)', 'evermore']:
    return 'Evermore'
  elif row['album'] in ['folklore (deluxe version)', 'folklore']: 
    return 'Folklore'
  elif row['album'] in ['Lover']: 
    return 'Lover'
  elif row['album'] in ['reputation']:
    return 'Reputation'
  elif row['album'] in ['1989 (Deluxe Edition)', '1989 (Deluxe)', '1989']: 
    return '1989'
  elif row['album'] in ['Red (Deluxe Edition)', 'Red']: 
    return 'Red'
  elif row['album'] in ['Speak Now (Deluxe Edition)', 'Speak Now (Deluxe Package)', 'Speak Now']: 
    return 'SpeakNow'
  elif row['album'] in ['Fearless (Platinum Edition)', 'Fearless Platinum Edition', 'Fearless']: 
    return 'Fearless'
  elif row['album'] in ['Taylor Swift']: 
    return 'Debut'
  else: 
    return 'Other'


In [None]:
# copying and applying classify_era() function on the row level 
clean_copy = cleaned_data.copy()
clean_copy['era'] = clean_copy.apply(classify_era, axis=1)


In [None]:
# test
clean_copy[clean_copy['era'] == 'SpeakNow']

Unnamed: 0,name,album,release_date,track_number,id,uri,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,valence,popularity,duration_ms,era
860,Mine,Speak Now,2010-10-25,1,0dBW6ZsW8skfvoRfgeerBF,spotify:track:0dBW6ZsW8skfvoRfgeerBF,0.00265,0.624,0.757,0.000002,0.1890,-2.940,0.0296,121.070,0.658,67,230706,SpeakNow
861,Sparks Fly,Speak Now,2010-10-25,2,6d9IiDcFxtFVIvt9pCqyGH,spotify:track:6d9IiDcFxtFVIvt9pCqyGH,0.03960,0.605,0.787,0.000001,0.1630,-3.002,0.0308,114.987,0.374,65,260933,SpeakNow
862,Back To December,Speak Now,2010-10-25,3,3DrjZArsPsoqbLzUZZV1Id,spotify:track:3DrjZArsPsoqbLzUZZV1Id,0.11700,0.529,0.670,0.000000,0.3340,-4.663,0.0303,141.893,0.286,69,293026,SpeakNow
863,Speak Now,Speak Now,2010-10-25,4,24DefNCFiWTP8OjYWiXuYe,spotify:track:24DefNCFiWTP8OjYWiXuYe,0.09500,0.709,0.599,0.000000,0.0973,-3.734,0.0304,118.975,0.735,64,240760,SpeakNow
864,Dear John,Speak Now,2010-10-25,5,7hZuICN5eaCuQyp443RCt6,spotify:track:7hZuICN5eaCuQyp443RCt6,0.16600,0.589,0.470,0.000001,0.1120,-5.320,0.0280,119.386,0.102,61,403920,SpeakNow
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,Back To December - Acoustic,Speak Now (Deluxe Package),2010-01-01,18,7k7BOXE4u6luKTwcLEAyrd,spotify:track:7k7BOXE4u6luKTwcLEAyrd,0.73100,0.541,0.451,0.000000,0.1970,-6.522,0.0270,141.713,0.333,60,292533,SpeakNow
996,Haunted - Acoustic Version,Speak Now (Deluxe Package),2010-01-01,19,6SdBf0ZsJZoKaH5zcDaz9X,spotify:track:6SdBf0ZsJZoKaH5zcDaz9X,0.84100,0.574,0.462,0.000000,0.2800,-5.124,0.0252,80.858,0.314,53,217626,SpeakNow
997,Mine,Speak Now (Deluxe Package),2010-01-01,20,67Io3gxHwfbUreBf114c0u,spotify:track:67Io3gxHwfbUreBf114c0u,0.00327,0.621,0.780,0.000005,0.1840,-2.934,0.0297,121.038,0.672,67,230773,SpeakNow
998,Back To December,Speak Now (Deluxe Package),2010-01-01,21,5aNkiOMxQXkpoEY5bTYoCh,spotify:track:5aNkiOMxQXkpoEY5bTYoCh,0.11300,0.525,0.676,0.000000,0.2940,-4.684,0.0294,141.950,0.281,49,293040,SpeakNow


In [None]:
#using regex to normalize song names 
import re

def normalize_song_name(song_name):
    # Remove any text within parentheses, brackets or after '-' in the song name
    cleaned_name = re.sub(r"[\(\[].*?[\)\]]|-.*$", "", song_name)
    # Remove extra spaces and convert the song name to lowercase
    return cleaned_name.strip().lower()

# applying normalize_song_name to data 
clean_copy['normalized_song_name'] = clean_copy['name'].apply(normalize_song_name)

# taking average metrics when grouping on the same song 
columns_to_average = [
    "acousticness",
    "danceability",
    "energy",
    "instrumentalness",
    "liveness",
    "loudness",
    "speechiness",
    "tempo",
    "valence",
    "popularity",
    "duration_ms",
]
grouped_data = clean_copy.groupby(['era', 'normalized_song_name'])[columns_to_average].mean().reset_index()


In [None]:
# release dates had to be cut for grouping, since we are only needed years for the timeline, this goes back in to add them 
def add_year(row): 
  if row['era'] == 'Debut': 
    return '10/26/2006'
  elif row['era'] == 'Fearless': 
    return '11/11/2008'
  elif row['era'] == 'SpeakNow': 
    return '10/25/2010'
  elif row['era'] == 'Red': 
    return '10/22/2012'
  elif row['era'] == '1989': 
    return '10/27/2014'
  elif row['era'] == 'Reputation': 
    return '11/10/2017'
  elif row['era'] == 'Lover':
    return '08/23/2019'
  elif row['era'] == 'Folklore':
    return '07/24/2020'
  elif row['era'] == 'Evermore': 
    return '12/11/2020'
  elif row['era'] == 'TaylorsVersion': 
    return '04/09/2021'
  elif row['era'] == 'Midnights': 
    return '10/21/2022'

In [None]:
# applying add_year and making th year column
grouped_data['year'] = grouped_data.apply(add_year, axis=1)

In [None]:
grouped_data['myindex'] = grouped_data.index

In [None]:
grouped_data [grouped_data['era'] == 'Debut']

Unnamed: 0,era,normalized_song_name,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,valence,popularity,duration_ms,year,myindex
16,Debut,a perfectly good heart,0.00349,0.483,0.751,0.0,0.128,-5.726,0.0365,156.092,0.268,52.5,220146.0,10/26/2006,16
17,Debut,a place in this world,0.051,0.576,0.777,0.0,0.32,-2.881,0.0324,115.028,0.428,56.0,199200.0,10/26/2006,17
18,Debut,cold as you,0.217,0.418,0.482,0.0,0.123,-5.769,0.0266,175.558,0.261,56.0,239013.0,10/26/2006,18
19,Debut,i'm only me when i'm with you,0.00452,0.563,0.934,0.000807,0.103,-3.629,0.0646,143.964,0.518,56.5,213053.0,10/26/2006,19
20,Debut,invisible,0.637,0.612,0.394,0.0,0.147,-5.723,0.0243,96.001,0.233,53.5,203226.0,10/26/2006,20
21,Debut,mary's song,0.0177,0.403,0.627,0.0,0.182,-5.28,0.0292,74.9,0.374,56.0,213080.0,10/26/2006,21
22,Debut,our song,0.111,0.668,0.672,0.0,0.329,-4.931,0.0303,89.011,0.539,67.0,201106.0,10/26/2006,22
23,Debut,picture to burn,0.173,0.658,0.877,0.0,0.0962,-2.098,0.0323,105.586,0.821,64.5,173066.0,10/26/2006,23
24,Debut,should've said no,0.0103,0.476,0.777,0.0,0.196,-3.771,0.0289,167.964,0.472,63.0,242200.0,10/26/2006,24
25,Debut,stay beautiful,0.0868,0.594,0.629,0.0,0.137,-4.919,0.0246,131.597,0.504,53.5,236053.0,10/26/2006,25


In [None]:
grouped_data [grouped_data['era'] == 'Fearless']

Unnamed: 0,era,normalized_song_name,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,valence,popularity,duration_ms,year,myindex
47,Fearless,beautiful eyes,0.383,0.529,0.7,0.0,0.0967,-3.303,0.026,96.019,0.372,11.0,179240.0,11/11/2008,47
48,Fearless,breathe,0.380833,0.496,0.4815,0.0,0.119,-7.3825,0.027317,135.676167,0.168167,34.0,264372.833333,11/11/2008,48
49,Fearless,change,0.004393,0.5435,0.761,0.0,0.1125,-4.187167,0.036383,96.009333,0.225,30.666667,280584.166667,11/11/2008,49
50,Fearless,come in with the rain,0.09145,0.527,0.468,3.405e-06,0.244,-6.1885,0.0257,143.951,0.227,44.0,238099.5,11/11/2008,50
51,Fearless,fearless,0.0376,0.563667,0.707833,3.466667e-07,0.307167,-4.399,0.02875,116.6735,0.4555,37.5,242550.833333,11/11/2008,51
52,Fearless,fifteen,0.067333,0.5585,0.636667,0.0,0.143833,-4.425833,0.026417,95.447833,0.208667,36.666667,294741.833333,11/11/2008,52
53,Fearless,forever & always,0.276587,0.602,0.658,0.0,0.108125,-6.43575,0.051412,125.750375,0.463125,37.25,236176.25,11/11/2008,53
54,Fearless,hey stephen,0.177333,0.842167,0.550333,2.478333e-06,0.100183,-7.349167,0.031517,115.998667,0.821333,34.166667,254979.666667,11/11/2008,54
55,Fearless,i heart ?,0.0472,0.706,0.818,0.0,0.327,-3.335,0.0267,105.995,0.64,16.0,197346.0,11/11/2008,55
56,Fearless,i'm only me when i'm with you,0.00498,0.557,0.935,0.000611,0.101,-3.694,0.0675,143.997,0.543,20.0,214320.0,11/11/2008,56


In [None]:
grouped_data [grouped_data['era'] == 'SpeakNow']

Unnamed: 0,era,normalized_song_name,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,valence,popularity,duration_ms,year,myindex
161,SpeakNow,back to december,0.266629,0.528429,0.590286,0.0,0.286714,-5.512143,0.028771,141.866286,0.307857,61.714286,292895.0,10/25/2010,161
162,SpeakNow,better than revenge,0.0153,0.5175,0.917,1.3485e-05,0.3595,-3.1855,0.0857,145.8515,0.6435,63.75,217166.5,10/25/2010,162
163,SpeakNow,dear john,0.1745,0.586,0.469,1.55e-06,0.1115,-5.349,0.0279,119.3805,0.114,61.75,403915.0,10/25/2010,163
164,SpeakNow,enchanted,0.07275,0.495,0.6205,0.000406,0.1575,-3.8955,0.02805,122.934,0.218,73.25,352193.0,10/25/2010,164
165,SpeakNow,haunted,0.333667,0.4795,0.782167,3.6e-07,0.1955,-3.456333,0.046683,135.006,0.342167,58.0,233930.833333,10/25/2010,165
166,SpeakNow,if this was a movie,0.16,0.513,0.7215,4.295e-06,0.3045,-3.4775,0.0266,147.82,0.273,58.5,234546.0,10/25/2010,166
167,SpeakNow,innocent,0.194,0.5525,0.606,0.0,0.125,-5.2925,0.02585,134.0195,0.178,54.0,302259.5,10/25/2010,167
168,SpeakNow,last kiss,0.5755,0.365,0.335,3.42e-05,0.09945,-9.503,0.0306,86.698,0.202,60.75,367139.5,10/25/2010,168
169,SpeakNow,long live,0.0393,0.415,0.681,7.53e-05,0.1085,-4.3095,0.034275,204.04075,0.144,59.25,317829.75,10/25/2010,169
170,SpeakNow,mean,0.4485,0.569,0.754,0.0,0.218,-3.9825,0.04425,163.989,0.7985,61.5,237739.5,10/25/2010,170


In [None]:
grouped_data [grouped_data['era'] == 'Red']

Unnamed: 0,era,normalized_song_name,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,valence,popularity,duration_ms,year,myindex
127,Red,22,0.001632,0.649,0.72,0.185725,0.06505,-6.95525,0.036425,103.99975,0.66275,44.75,231347.25,10/22/2012,127
128,Red,all too well,0.030322,0.60075,0.61675,0.1864,0.1325,-8.35725,0.02525,93.046,0.32725,46.75,328928.0,10/22/2012,128
129,Red,begin again,0.16335,0.5115,0.52875,0.229503,0.17875,-8.9095,0.026775,79.014,0.336,39.5,238063.0,10/22/2012,129
130,Red,come back...be here,0.00471,0.483,0.548,2e-06,0.0997,-6.291,0.0254,79.926,0.217,48.5,223313.0,10/22/2012,130
131,Red,everything has changed,0.30075,0.6125,0.4625,0.186503,0.3155,-7.67075,0.037775,80.00925,0.39775,41.0,245021.5,10/22/2012,131
132,Red,girl at home,0.272,0.733,0.59,0.0,0.135,-5.86,0.0284,125.048,0.633,42.0,220580.0,10/22/2012,132
133,Red,holy ground,0.01285,0.62725,0.80375,0.18731,0.094725,-7.12375,0.07035,157.03975,0.65475,35.0,202838.5,10/22/2012,133
134,Red,i almost do,0.016575,0.56925,0.484,0.176031,0.1735,-7.84725,0.0279,145.915,0.2205,36.5,243789.25,10/22/2012,134
135,Red,i knew you were trouble.,0.003626,0.638,0.44,0.017352,0.039675,-7.33225,0.038875,77.01875,0.64875,47.25,219129.5,10/22/2012,135
136,Red,red,0.076607,0.628833,0.893833,0.061623,0.084533,-4.7265,0.039,124.987833,0.645333,44.5,223633.833333,10/22/2012,136


In [None]:
grouped_data [grouped_data['era'] == '1989']

Unnamed: 0,era,normalized_song_name,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,valence,popularity,duration_ms,year,myindex
0,1989,all you had to do was stay,0.001981,0.603286,0.731,3.534286e-05,0.103286,-5.757,0.033129,96.969429,0.500429,43.857143,193293.0,10/27/2014,0
1,1989,bad blood,0.0877,0.648857,0.798571,6.351429e-06,0.170714,-6.109714,0.184857,170.179143,0.291571,48.428571,211933.0,10/27/2014,1
2,1989,blank space,0.3052,0.7315,0.5523,6.56e-07,0.27649,-8.3112,0.2583,105.3909,0.5776,41.3,201634.0,10/27/2014,2
3,1989,clean,0.237429,0.812143,0.378429,0.0,0.107429,-7.759143,0.0348,103.976286,0.213571,41.428571,271000.0,10/27/2014,3
4,1989,how you get the girl,0.0042,0.764429,0.658,0.006391429,0.091629,-6.125714,0.049771,119.990143,0.533286,42.285714,247533.0,10/27/2014,4
5,1989,i know places,0.4146,0.5964,0.5701,0.0,0.2915,-8.8731,0.22444,135.6213,0.3963,32.6,201894.1,10/27/2014,5
6,1989,i wish you would,0.22624,0.6896,0.7342,5.447e-05,0.1297,-9.0932,0.31022,118.115,0.5484,34.1,177347.9,10/27/2014,6
7,1989,new romantics,0.005055,0.63325,0.88775,0.000398,0.0635,-5.8635,0.0717,121.95675,0.5835,50.25,230462.75,10/27/2014,7
8,1989,out of the woods,0.000907,0.552429,0.841286,1.195714e-05,0.338714,-6.937571,0.038257,91.998857,0.340571,44.857143,235800.0,10/27/2014,8
9,1989,shake it off,0.059786,0.647286,0.791429,0.0,0.227714,-5.401143,0.165,160.043429,0.942571,51.857143,219200.0,10/27/2014,9


In [None]:
grouped_data [grouped_data['era'] == 'Reputation']

Unnamed: 0,era,normalized_song_name,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,valence,popularity,duration_ms,year,myindex
146,Reputation,...ready for it?,0.0527,0.613,0.764,0.0,0.197,-6.509,0.136,160.015,0.417,79.0,208186.0,11/10/2017,146
147,Reputation,call it what you want,0.186,0.598,0.504,0.000221,0.34,-9.874,0.0731,163.954,0.252,76.0,203506.0,11/10/2017,147
148,Reputation,dancing with our hands tied,0.0604,0.624,0.691,1.1e-05,0.138,-6.686,0.196,160.024,0.284,73.0,211506.0,11/10/2017,148
149,Reputation,delicate,0.216,0.75,0.404,0.000357,0.0911,-10.178,0.0682,95.045,0.0499,81.0,232253.0,11/10/2017,149
150,Reputation,don’t blame me,0.106,0.615,0.534,1.8e-05,0.0607,-6.719,0.0386,135.917,0.193,87.0,236413.0,11/10/2017,150
151,Reputation,dress,0.0329,0.719,0.469,0.0,0.169,-8.792,0.0533,120.085,0.0851,75.0,230373.0,11/10/2017,151
152,Reputation,end game,0.00845,0.649,0.589,0.0,0.108,-6.237,0.0558,159.073,0.151,75.0,244826.0,11/10/2017,152
153,Reputation,getaway car,0.00465,0.562,0.689,2e-06,0.0888,-6.745,0.127,172.054,0.351,81.0,233626.0,11/10/2017,153
154,Reputation,gorgeous,0.0713,0.8,0.535,9e-06,0.213,-6.684,0.135,92.027,0.451,79.0,209680.0,11/10/2017,154
155,Reputation,i did something bad,0.0679,0.696,0.602,2.1e-05,0.0696,-6.156,0.159,82.989,0.305,77.0,238253.0,11/10/2017,155


In [None]:
grouped_data [grouped_data['era'] == 'Lover']

Unnamed: 0,era,normalized_song_name,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,valence,popularity,duration_ms,year,myindex
89,Lover,afterglow,0.13,0.756,0.449,0.0,0.114,-8.746,0.0344,111.011,0.399,79.0,223293.0,08/23/2019,89
90,Lover,cornelia street,0.781,0.824,0.624,0.000189,0.1,-9.728,0.0827,102.012,0.248,75.0,287266.0,08/23/2019,90
91,Lover,cruel summer,0.117,0.552,0.702,2.1e-05,0.105,-5.707,0.157,169.994,0.564,86.0,178426.0,08/23/2019,91
92,Lover,daylight,0.808,0.557,0.496,0.000173,0.0772,-9.602,0.0563,149.983,0.265,75.0,293453.0,08/23/2019,92
93,Lover,death by a thousand cuts,0.454,0.712,0.732,0.0,0.319,-6.754,0.0629,94.071,0.313,75.0,198533.0,08/23/2019,93
94,Lover,false god,0.736,0.739,0.32,0.000147,0.111,-10.862,0.239,79.97,0.351,73.0,200306.0,08/23/2019,94
95,Lover,i forgot that you existed,0.298,0.664,0.316,2e-06,0.0812,-10.345,0.519,92.875,0.541,74.0,170640.0,08/23/2019,95
96,Lover,i think he knows,0.00889,0.897,0.366,0.000353,0.0715,-8.029,0.0569,100.003,0.416,74.0,173386.0,08/23/2019,96
97,Lover,it’s nice to have a friend,0.971,0.737,0.175,0.000337,0.171,-9.912,0.0401,70.008,0.545,69.0,150440.0,08/23/2019,97
98,Lover,london boy,0.0246,0.695,0.71,0.000104,0.133,-6.639,0.05,157.925,0.557,75.0,190240.0,08/23/2019,98


In [None]:
grouped_data [grouped_data['era'] == 'Folklore']

Unnamed: 0,era,normalized_song_name,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,valence,popularity,duration_ms,year,myindex
72,Folklore,august,0.54175,0.49575,0.62575,7.2e-05,0.09265,-9.2115,0.0339,89.8445,0.40725,56.75,261921.5,07/24/2020,72
73,Folklore,betty,0.56925,0.592,0.37775,0.0,0.098275,-8.7405,0.025475,95.98575,0.478,53.75,294520.75,07/24/2020,73
74,Folklore,cardigan,0.534,0.61275,0.58075,0.000362,0.2525,-8.584,0.042275,130.036,0.54675,60.5,239560.0,07/24/2020,74
75,Folklore,epiphany,0.73,0.3,0.26375,0.000383,0.086025,-13.68725,0.02875,94.1435,0.10875,47.75,289748.25,07/24/2020,75
76,Folklore,exile,0.77375,0.301,0.3785,4.8e-05,0.11,-8.4355,0.0284,75.686,0.154,55.75,285635.5,07/24/2020,76
77,Folklore,hoax,0.9655,0.6655,0.18025,5e-06,0.1335,-15.05125,0.040975,118.82425,0.42275,48.0,220041.5,07/24/2020,77
78,Folklore,illicit affairs,0.87625,0.5515,0.3085,0.0,0.1065,-10.4755,0.03425,119.77875,0.455,53.25,190896.75,07/24/2020,78
79,Folklore,invisible string,0.84325,0.6525,0.45375,7.5e-05,0.10725,-11.13925,0.053925,83.4385,0.451,54.25,252880.0,07/24/2020,79
80,Folklore,mad woman,0.65575,0.59425,0.70425,1.1e-05,0.1145,-8.97175,0.045625,141.9425,0.50575,49.5,237260.0,07/24/2020,80
81,Folklore,mirrorball,0.68675,0.552,0.414,2e-06,0.05965,-10.04375,0.0338,110.1145,0.371,54.0,208976.0,07/24/2020,81


In [None]:
grouped_data [grouped_data['era'] == 'Evermore']

Unnamed: 0,era,normalized_song_name,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,valence,popularity,duration_ms,year,myindex
30,Evermore,champagne problems,0.92,0.4624,0.2432,0.0,0.113,-12.0606,0.03758,171.3298,0.322,54.6,244000.0,12/11/2020,30
31,Evermore,closure,0.8338,0.6878,0.7044,5e-06,0.134,-10.817,0.2466,151.8776,0.9196,45.0,180653.0,12/11/2020,31
32,Evermore,coney island,0.8218,0.5338,0.5366,0.00087,0.1416,-11.2672,0.0621,107.8862,0.2968,47.8,275320.0,12/11/2020,32
33,Evermore,cowboy like me,0.7704,0.6044,0.5154,0.000164,0.123,-8.9932,0.0343,127.9746,0.5114,47.0,275040.0,12/11/2020,33
34,Evermore,dorothea,0.6952,0.6058,0.488,0.0,0.129,-8.3248,0.0264,119.9608,0.3568,47.4,225880.0,12/11/2020,34
35,Evermore,evermore,0.937,0.392,0.27,0.002358,0.111,-10.6726,0.03084,123.4822,0.3176,49.2,304106.0,12/11/2020,35
36,Evermore,gold rush,0.8224,0.5064,0.4656,0.1448,0.121,-10.457,0.04092,112.0372,0.3522,49.4,185320.0,12/11/2020,36
37,Evermore,happiness,0.8664,0.5586,0.3328,0.0,0.1136,-10.6994,0.0378,122.0614,0.207,46.2,315146.0,12/11/2020,37
38,Evermore,it’s time to go,0.801,0.592,0.41,1.4e-05,0.09,-12.426,0.0397,151.923,0.416,53.0,254640.0,12/11/2020,38
39,Evermore,ivy,0.8522,0.4406,0.5458,1.6e-05,0.09046,-9.3718,0.03706,87.9652,0.5298,47.4,260440.0,12/11/2020,39


In [None]:
grouped_data [grouped_data['era'] == 'TaylorsVersion']

Unnamed: 0,era,normalized_song_name,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,valence,popularity,duration_ms,year,myindex
178,TaylorsVersion,22,0.000443,0.642,0.695,1.02e-05,0.0753,-5.62,0.0281,103.984,0.642,70.5,230960.0,04/09/2021,178
179,TaylorsVersion,all too well,0.1468,0.5355,0.524,0.001015,0.160875,-8.2595,0.030975,139.49175,0.1705,66.0,471093.0,04/09/2021,179
180,TaylorsVersion,babe,0.0538,0.584,0.743,2.83e-06,0.121,-7.075,0.0931,167.844,0.746,54.5,224240.0,04/09/2021,180
181,TaylorsVersion,begin again,0.075,0.519,0.527,0.0,0.132,-7.673,0.0274,78.915,0.267,54.0,238866.0,04/09/2021,181
182,TaylorsVersion,better man,0.214,0.473,0.579,0.0,0.0877,-5.824,0.0384,73.942,0.255,64.5,297013.0,04/09/2021,182
183,TaylorsVersion,breathe,0.156,0.506,0.626,0.0,0.228,-6.066,0.0287,148.035,0.321,65.0,263377.0,04/09/2021,183
184,TaylorsVersion,bye bye baby,0.334,0.624,0.624,0.0,0.0995,-7.86,0.0539,80.132,0.527,63.0,242157.0,04/09/2021,184
185,TaylorsVersion,change,0.000191,0.499,0.815,0.0,0.181,-4.063,0.0341,95.999,0.344,62.0,279359.0,04/09/2021,185
186,TaylorsVersion,come back...be here,0.0158,0.46,0.632,0.0,0.0822,-6.031,0.0302,79.846,0.399,54.0,223333.0,04/09/2021,186
187,TaylorsVersion,come in with the rain,0.0406,0.476,0.564,0.0,0.102,-5.677,0.0269,143.929,0.167,61.0,237338.0,04/09/2021,187


In [None]:
grouped_data['normalized_song_name'][107] = "anti hero"
grouped_data['normalized_song_name'][107]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grouped_data['normalized_song_name'][107] = "anti hero"


'anti hero'

In [None]:
grouped_data [grouped_data['era'] == 'Midnights']

Unnamed: 0,era,normalized_song_name,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,valence,popularity,duration_ms,year,myindex
107,Midnights,anti hero,0.1285,0.638,0.63225,1.855e-06,0.15875,-6.584,0.04855,96.958,0.508,69.0,200690.0,10/21/2022,107
108,Midnights,bejeweled,0.063875,0.69625,0.553,7.195e-05,0.0881,-9.1525,0.067175,164.00725,0.40875,63.75,194165.0,10/21/2022,108
109,Midnights,bigger than the whole sky,0.8275,0.4085,0.2305,0.00485,0.115,-12.445,0.0543,165.6165,0.0668,67.5,218502.0,10/21/2022,109
110,Midnights,dear reader,0.484,0.6215,0.392,0.001415,0.117,-12.0985,0.0596,107.8505,0.158,64.0,225194.0,10/21/2022,110
111,Midnights,glitch,0.337,0.623,0.4715,0.0,0.11,-9.7375,0.2235,140.799,0.339,64.0,148781.0,10/21/2022,111
112,Midnights,high infidelity,0.7175,0.649,0.5325,0.0,0.08695,-10.195,0.08725,87.9125,0.7565,66.0,231475.0,10/21/2022,112
113,Midnights,karma,0.0714,0.64825,0.62975,0.0,0.51825,-7.07025,0.0662,90.01575,0.09975,64.25,204852.0,10/21/2022,113
114,Midnights,labyrinth,0.7905,0.462,0.3095,0.37975,0.123,-15.436,0.04665,110.0005,0.138,59.0,247962.0,10/21/2022,114
115,Midnights,lavender haze,0.242,0.7365,0.45075,0.00101925,0.16875,-10.4985,0.07675,96.996,0.10075,67.75,202395.0,10/21/2022,115
116,Midnights,maroon,0.058225,0.65075,0.39525,5.575e-07,0.098825,-8.3435,0.04165,108.047,0.037975,64.25,218270.0,10/21/2022,116


In [None]:
from google.colab import files
grouped_data.to_csv('data_taylors_version.csv', encoding = 'utf-8-sig') 
files.download('data_taylors_version.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>