# Billboard Hot 100 (songs) Updated Dataframe Cleaning and Wrangling
## Based on newly released data (January 1st, 2023)

## Contents
## 01. Import libraries and datasets
## 02. Cleaning and wrangling
## 03. EDA to prepare for time series and visualization
## 04. Data export

In [1]:
# Import libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy
import os

In [2]:
# Create path

path = r"C:\Users\leobs\OneDrive\Desktop\LF_TS_Analysis"

In [3]:
# Import billboard hot 100 dataframe with updates

df = pd.read_csv(os.path.join(path,'01 Sourced Data', 'Main Dataframes', 'hot-100-current (3).csv'), index_col = False)

In [4]:
# Import song stream for merging song info

df_songs = pd.read_csv(os.path.join(path,'02 Manipulated Data', 'song_stream_alb_final.csv'), index_col = 0)

# 02. Cleaning

In [5]:
df.head(20)

Unnamed: 0,chart_week,current_week,title,performer,last_week,peak_pos,wks_on_chart
0,2022-01-01,1,All I Want For Christmas Is You,Mariah Carey,1.0,1,50
1,2022-01-01,2,Rockin' Around The Christmas Tree,Brenda Lee,2.0,2,44
2,2022-01-01,3,Jingle Bell Rock,Bobby Helms,4.0,3,41
3,2022-01-01,4,A Holly Jolly Christmas,Burl Ives,5.0,4,25
4,2022-01-01,5,Easy On Me,Adele,3.0,1,11
5,2022-01-01,6,It's The Most Wonderful Time Of The Year,Andy Williams,7.0,5,26
6,2022-01-01,7,Last Christmas,Wham!,9.0,7,24
7,2022-01-01,8,Feliz Navidad,Jose Feliciano,11.0,6,19
8,2022-01-01,9,Stay,The Kid LAROI & Justin Bieber,6.0,1,24
9,2022-01-01,10,Sleigh Ride,The Ronettes,13.0,10,15


In [6]:
df.tail(20)

Unnamed: 0,chart_week,current_week,title,performer,last_week,peak_pos,wks_on_chart
341280,2021-11-13,81,Rap Freaks,Yung Miami,,81,1
341281,2021-11-13,82,Eat It,Megan Thee Stallion,,82,1
341282,2021-11-13,83,Esta Danada,Ivan Cornejo,72.0,61,5
341283,2021-11-13,84,Freedom Was A Highway,Jimmie Allen & Brad Paisley,82.0,82,4
341284,2021-11-13,85,Whole Lotta Money,BIA Featuring Nicki Minaj,75.0,16,17
341285,2021-11-13,86,Switches & Dracs,Moneybagg Yo Featuring Lil Durk & EST Gee,69.0,69,2
341286,2021-11-13,87,Half Of My Hometown,Kelsea Ballerini Featuring Kenny Chesney,89.0,87,2
341287,2021-11-13,88,Get Into It (Yuh),Doja Cat,86.0,68,13
341288,2021-11-13,89,Pissed Me Off,Lil Durk,62.0,39,3
341289,2021-11-13,90,Lo Siento BB:/,"Tainy, Bad Bunny & Julieta Venegas",81.0,51,4


In [7]:
df.shape

(341300, 7)

In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 341300 entries, 0 to 341299
Data columns (total 7 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   chart_week    341300 non-null  object 
 1   current_week  341300 non-null  int64  
 2   title         341300 non-null  object 
 3   performer     341300 non-null  object 
 4   last_week     308840 non-null  float64
 5   peak_pos      341300 non-null  int64  
 6   wks_on_chart  341300 non-null  int64  
dtypes: float64(1), int64(3), object(3)
memory usage: 18.2+ MB


In [9]:
df.describe()

Unnamed: 0,current_week,last_week,peak_pos,wks_on_chart
count,341300.0,308840.0,341300.0,341300.0
mean,50.5,47.292355,40.687764,9.287767
std,28.866032,28.175538,29.333028,7.862138
min,1.0,0.0,1.0,1.0
25%,26.0,23.0,13.0,4.0
50%,51.0,47.0,38.0,7.0
75%,75.0,71.0,65.0,13.0
max,100.0,100.0,100.0,91.0


### Column renames

In [10]:
df.columns

Index(['chart_week', 'current_week', 'title', 'performer', 'last_week',
       'peak_pos', 'wks_on_chart'],
      dtype='object')

In [11]:
df.rename(columns = {'current_week': 'chart_position', 'title': 'song_title', 'performer': 'artist', 'last_week': 'prior_wk_position'}, inplace=True)

In [12]:
df.columns

Index(['chart_week', 'chart_position', 'song_title', 'artist',
       'prior_wk_position', 'peak_pos', 'wks_on_chart'],
      dtype='object')

### Duplicates

In [13]:
df_dupes = df[df.duplicated()]

In [14]:
df_dupes

Unnamed: 0,chart_week,chart_position,song_title,artist,prior_wk_position,peak_pos,wks_on_chart


### Data types

In [15]:
# Check for mixed data types

for col in df.columns.tolist():
    weird = (df[[col]].applymap(type) !=df[[col]].iloc[0].apply(type)).any(axis = 1)                                                                             
    if len (df[weird]) > 0:
        print (col)

  weird = (df[[col]].applymap(type) !=df[[col]].iloc[0].apply(type)).any(axis = 1)
  weird = (df[[col]].applymap(type) !=df[[col]].iloc[0].apply(type)).any(axis = 1)
  weird = (df[[col]].applymap(type) !=df[[col]].iloc[0].apply(type)).any(axis = 1)
  weird = (df[[col]].applymap(type) !=df[[col]].iloc[0].apply(type)).any(axis = 1)
  weird = (df[[col]].applymap(type) !=df[[col]].iloc[0].apply(type)).any(axis = 1)
  weird = (df[[col]].applymap(type) !=df[[col]].iloc[0].apply(type)).any(axis = 1)
  weird = (df[[col]].applymap(type) !=df[[col]].iloc[0].apply(type)).any(axis = 1)


In [16]:
df.dtypes

chart_week            object
chart_position         int64
song_title            object
artist                object
prior_wk_position    float64
peak_pos               int64
wks_on_chart           int64
dtype: object

In [17]:
# Convert the chart_week column to datetime

df['chart_week'] = pd.to_datetime(df['chart_week'])

In [18]:
df['chart_week'].dtype

dtype('<M8[ns]')

In [19]:
# Get all the unique values in the chart week columns

unique_weeks_sub = df['chart_week'].unique()

In [20]:
unique_weeks_sub.shape

(3413,)

In [21]:
df.tail(20)

Unnamed: 0,chart_week,chart_position,song_title,artist,prior_wk_position,peak_pos,wks_on_chart
341280,2021-11-13,81,Rap Freaks,Yung Miami,,81,1
341281,2021-11-13,82,Eat It,Megan Thee Stallion,,82,1
341282,2021-11-13,83,Esta Danada,Ivan Cornejo,72.0,61,5
341283,2021-11-13,84,Freedom Was A Highway,Jimmie Allen & Brad Paisley,82.0,82,4
341284,2021-11-13,85,Whole Lotta Money,BIA Featuring Nicki Minaj,75.0,16,17
341285,2021-11-13,86,Switches & Dracs,Moneybagg Yo Featuring Lil Durk & EST Gee,69.0,69,2
341286,2021-11-13,87,Half Of My Hometown,Kelsea Ballerini Featuring Kenny Chesney,89.0,87,2
341287,2021-11-13,88,Get Into It (Yuh),Doja Cat,86.0,68,13
341288,2021-11-13,89,Pissed Me Off,Lil Durk,62.0,39,3
341289,2021-11-13,90,Lo Siento BB:/,"Tainy, Bad Bunny & Julieta Venegas",81.0,51,4


## Create a subset with only chart weeks from Taylor's first album onwards

In [22]:
# Create dataframe with values 10/24/2006 onward

df_sub = df.loc[(df['chart_week'] > '10/24/2006')]

In [23]:
df_sub = df_sub.sort_values(by='chart_week', ascending=False)

In [24]:
df_sub.head(10)

Unnamed: 0,chart_week,chart_position,song_title,artist,prior_wk_position,peak_pos,wks_on_chart
10427,2023-12-30,28,I Remember Everything,Zach Bryan Featuring Kacey Musgraves,22.0,1,17
10436,2023-12-30,37,Thinkin' Bout Me,Morgan Wallen,32.0,7,42
10425,2023-12-30,26,Water,Tyla,21.0,10,12
10426,2023-12-30,27,Run Rudolph Run,Chuck Berry,38.0,10,21
10428,2023-12-30,29,Agora Hills,Doja Cat,30.0,14,13
10429,2023-12-30,30,Santa Baby,Eartha Kitt With Henri Rene And His Orchestra,31.0,30,8
10430,2023-12-30,31,Little Saint Nick,The Beach Boys,40.0,29,7
10431,2023-12-30,32,Please Come Home For Christmas,Eagles,45.0,18,14
10432,2023-12-30,33,Last Night,Morgan Wallen,29.0,1,47
10433,2023-12-30,34,"You're A Mean One, Mr. Grinch",Thurl Ravenscroft,41.0,31,11


In [25]:
df_sub.tail(20)

Unnamed: 0,chart_week,chart_position,song_title,artist,prior_wk_position,peak_pos,wks_on_chart
260753,2006-10-28,54,Boston,Augustana,88.0,54,2
260754,2006-10-28,55,Money In The Bank,Lil Scrappy Featuring Young Buck,65.0,55,5
260755,2006-10-28,56,I Wanna Love You,Akon Featuring Snoop Dogg,79.0,56,3
260756,2006-10-28,57,Chicken Noodle Soup.,Webstar & Young B Featuring The Voice Of Harlem,45.0,45,6
260757,2006-10-28,58,It Ends Tonight,The All-American Rejects,84.0,58,3
260758,2006-10-28,59,Hurt,Christina Aguilera,75.0,59,3
260759,2006-10-28,60,Every Mile A Memory,Dierks Bentley,58.0,58,10
260761,2006-10-28,62,Everytime Tha Beat Drop,Monica Featuring Dem Franchize Boyz,48.0,48,11
260762,2006-10-28,63,We Fly High,Jim Jones,87.0,63,2
260763,2006-10-28,64,Welcome To The Black Parade,My Chemical Romance,78.0,64,5


In [26]:
df_sub.shape

(89700, 7)

In [27]:
df_sub['artist'].value_counts(dropna=False)

artist
Taylor Swift                                     1381
Drake                                             896
Morgan Wallen                                     659
Jason Aldean                                      587
Luke Bryan                                        568
                                                 ... 
David Guetta Featuring Jennifer Hudson              1
Game Featuring Lil Wayne & Tyler, The Creator       1
Kehlani Featuring Jhene Aiko                        1
Nicole Scherzinger                                  1
Nick Lachey                                         1
Name: count, Length: 3827, dtype: int64

### Check missing values

In [28]:
df_sub.isnull().sum()

chart_week              0
chart_position          0
song_title              0
artist                  0
prior_wk_position    8834
peak_pos                0
wks_on_chart            0
dtype: int64

In [29]:
df_sub['prior_wk_position'].value_counts(dropna=False)

prior_wk_position
NaN      8834
0.0      1603
6.0       896
2.0       895
5.0       895
         ... 
96.0      505
97.0      471
98.0      449
99.0      410
100.0     380
Name: count, Length: 102, dtype: int64

In [30]:
# Fill NA with 0

df_sub['prior_wk_position'].fillna(0, inplace=True)

In [31]:
df_sub['prior_wk_position'].value_counts(dropna=False)

prior_wk_position
0.0      10437
6.0        896
7.0        895
2.0        895
5.0        895
         ...  
96.0       505
97.0       471
98.0       449
99.0       410
100.0      380
Name: count, Length: 101, dtype: int64

In [32]:
# Change data type to integer now that no NaN values

df_sub['prior_wk_position'] = df_sub['prior_wk_position'].astype(int)

In [33]:
df_sub.head(50)

Unnamed: 0,chart_week,chart_position,song_title,artist,prior_wk_position,peak_pos,wks_on_chart
10427,2023-12-30,28,I Remember Everything,Zach Bryan Featuring Kacey Musgraves,22,1,17
10436,2023-12-30,37,Thinkin' Bout Me,Morgan Wallen,32,7,42
10425,2023-12-30,26,Water,Tyla,21,10,12
10426,2023-12-30,27,Run Rudolph Run,Chuck Berry,38,10,21
10428,2023-12-30,29,Agora Hills,Doja Cat,30,14,13
10429,2023-12-30,30,Santa Baby,Eartha Kitt With Henri Rene And His Orchestra,31,30,8
10430,2023-12-30,31,Little Saint Nick,The Beach Boys,40,29,7
10431,2023-12-30,32,Please Come Home For Christmas,Eagles,45,18,14
10432,2023-12-30,33,Last Night,Morgan Wallen,29,1,47
10433,2023-12-30,34,"You're A Mean One, Mr. Grinch",Thurl Ravenscroft,41,31,11


# EDA with all artists

In [34]:
df_sub['song_title'].value_counts(dropna=False)

song_title
Stay                      172
Let It Go                 129
Rockstar                  129
Home                      125
See You Again             119
                         ... 
Livin It Up                 1
Rich N***a Shit             1
I'm Not Gonna Miss You      1
Peepin Out The Window       1
Life Is A Highway           1
Name: count, Length: 7553, dtype: int64

In [35]:
unique_artists = df_sub['artist'].nunique()

In [36]:
unique_artists

3827

In [37]:
artist_counts = df_sub['artist'].value_counts(dropna=False)

In [38]:
artist_counts.head(20)

artist
Taylor Swift        1381
Drake                896
Morgan Wallen        659
Jason Aldean         587
Luke Bryan           568
The Weeknd           537
Ed Sheeran           503
Carrie Underwood     502
Beyonce              495
Rihanna              492
Luke Combs           446
Blake Shelton        442
Imagine Dragons      442
Keith Urban          433
Kenny Chesney        418
Chris Brown          413
Katy Perry           394
P!nk                 387
Maroon 5             385
Ariana Grande        381
Name: count, dtype: int64

## Isolate Taylor only songs

In [39]:
df_sub.shape

(89700, 7)

In [40]:
# Using loc to conditionally assign a value based on whether a column includes
# string "Taylor Swift"

df_sub['result'] = 0 # Initialize the 'result' column with zeros
df_sub.loc[df_sub['artist'].str.contains('Taylor Swift'), 'result'] = 1

In [41]:
df_sub.head(15)

Unnamed: 0,chart_week,chart_position,song_title,artist,prior_wk_position,peak_pos,wks_on_chart,result
10427,2023-12-30,28,I Remember Everything,Zach Bryan Featuring Kacey Musgraves,22,1,17,0
10436,2023-12-30,37,Thinkin' Bout Me,Morgan Wallen,32,7,42,0
10425,2023-12-30,26,Water,Tyla,21,10,12,0
10426,2023-12-30,27,Run Rudolph Run,Chuck Berry,38,10,21,0
10428,2023-12-30,29,Agora Hills,Doja Cat,30,14,13,0
10429,2023-12-30,30,Santa Baby,Eartha Kitt With Henri Rene And His Orchestra,31,30,8,0
10430,2023-12-30,31,Little Saint Nick,The Beach Boys,40,29,7,0
10431,2023-12-30,32,Please Come Home For Christmas,Eagles,45,18,14,0
10432,2023-12-30,33,Last Night,Morgan Wallen,29,1,47,0
10433,2023-12-30,34,"You're A Mean One, Mr. Grinch",Thurl Ravenscroft,41,31,11,0


In [42]:
df_sub['result'].value_counts(dropna=False)

result
0    88087
1     1613
Name: count, dtype: int64

In [43]:
# Curioous about the difference being with collabs
# Will run again and use == rather than includes

df_sub['result_taylor_only'] = 0
df_sub.loc[df_sub['artist'] == 'Taylor Swift', 'result_taylor_only'] = 1

In [44]:
df_sub['result_taylor_only'].value_counts(dropna=False)

result_taylor_only
0    88319
1     1381
Name: count, dtype: int64

In [45]:
# Delineate songs that are primarily Taylors that feature other artists vs.
# songs that are from other artists and feature Taylor
# Will keep all Taylor only songs and all songs Taylor made with featured artists


df_sub['all_taylor_and_with_collabs'] = 0
df_sub.loc[(df_sub['result_taylor_only'] == 1) | (df_sub['artist'].str.contains('Taylor Swift Featuring')), 'all_taylor_and_with_collabs'] = 1

In [46]:
df_sub['all_taylor_and_with_collabs'].value_counts(dropna=False)

all_taylor_and_with_collabs
0    88176
1     1524
Name: count, dtype: int64

In [47]:
# Create a subset to show Taylor Swift collaborations

taylor_collab_songs = df_sub[(df_sub['result'] == 1) & (df_sub['result_taylor_only'] == 0)]

In [48]:
taylor_collab_songs.shape

(232, 10)

## Taylor Swift songs (only TS and with featured artists) made the Hot 100 List 1524 times since the launch of her first album, on October 24th 2006

In [49]:
# Create a billboard chart with only the Taylor Swift + collabs songs

bill_songs_ts_oct_06_dec_23 = df_sub[df_sub['all_taylor_and_with_collabs'] == 1].reset_index(drop=True)

In [50]:
bill_songs_ts_oct_06_dec_23.head(100)

Unnamed: 0,chart_week,chart_position,song_title,artist,prior_wk_position,peak_pos,wks_on_chart,result,result_taylor_only,all_taylor_and_with_collabs
0,2023-12-30,18,Cruel Summer,Taylor Swift,13,1,33,1,1,1
1,2023-12-30,40,Is It Over Now? (Taylor's Version) [From The V...,Taylor Swift,36,1,8,1,1,1
2,2023-12-30,77,You're Losing Me (From The Vault),Taylor Swift,68,27,4,1,1,1
3,2023-12-23,36,Is It Over Now? (Taylor's Version) [From The V...,Taylor Swift,30,1,7,1,1,1
4,2023-12-23,13,Cruel Summer,Taylor Swift,7,1,32,1,1,1
...,...,...,...,...,...,...,...,...,...,...
95,2023-09-02,4,Cruel Summer,Taylor Swift,4,3,16,1,1,1
96,2023-09-02,15,Karma,Taylor Swift Featuring Ice Spice,14,2,24,1,0,1
97,2023-09-02,18,Anti-Hero,Taylor Swift,20,1,44,1,1,1
98,2023-09-02,74,I Can See You (Taylor's Version) (From The Vault),Taylor Swift,71,5,7,1,1,1


In [51]:
bill_songs_ts_oct_06_dec_23.shape

(1524, 10)

In [52]:
top_songs= bill_songs_ts_oct_06_dec_23['song_title'].value_counts(dropna=False)

In [53]:
top_songs.head(10)

song_title
Anti-Hero                   53
Shake It Off                50
You Belong With Me          50
Love Story                  49
Teardrops On My Guitar      48
Blank Space                 38
Our Song                    36
I Knew You Were Trouble.    36
Delicate                    35
Cruel Summer                33
Name: count, dtype: int64

In [54]:
bill_songs_ts_oct_06_dec_23.columns

Index(['chart_week', 'chart_position', 'song_title', 'artist',
       'prior_wk_position', 'peak_pos', 'wks_on_chart', 'result',
       'result_taylor_only', 'all_taylor_and_with_collabs'],
      dtype='object')

In [55]:
# Drop columns not needed

bill_songs_ts_oct_06_dec_23.drop(columns=['result',
       'result_taylor_only', 'all_taylor_and_with_collabs'], axis=1, inplace=True)

In [56]:
bill_songs_ts_oct_06_dec_23.columns

Index(['chart_week', 'chart_position', 'song_title', 'artist',
       'prior_wk_position', 'peak_pos', 'wks_on_chart'],
      dtype='object')

# Prepare for merge with songs df

In [57]:
df_songs.head()

Unnamed: 0,song_title,album_title,song_release_date,track,acou,dance,ener,instru,live,loud,...,alb_r_yr,album_release_date,w1_sales,top_alb_count,top_alb_normal,alb_track_cnt,tvflag,sales_group,tot_streams_group,alb_genre
0,Welcome To New York (Taylor's Version),1989 (Taylor's Version)[Deluxe],10/27/2023,1,0.00942,0.757,0.61,3.7e-05,0.367,-4.84,...,2023,2023-10-27,1359000,1,0.076923,22,True,High first week sales,Low total streams,Synth/Dream/Dance pop
1,Blank Space (Taylor's Version),1989 (Taylor's Version)[Deluxe],10/27/2023,2,0.0885,0.733,0.733,0.0,0.168,-5.376,...,2023,2023-10-27,1359000,1,0.076923,22,True,High first week sales,Low total streams,Synth/Dream/Dance pop
2,Style (Taylor's Version),1989 (Taylor's Version)[Deluxe],10/27/2023,3,0.000421,0.511,0.822,0.0197,0.0899,-4.785,...,2023,2023-10-27,1359000,1,0.076923,22,True,High first week sales,Low total streams,Synth/Dream/Dance pop
3,Out Of The Woods (Taylor's Version),1989 (Taylor's Version)[Deluxe],10/27/2023,4,0.000537,0.545,0.885,5.6e-05,0.385,-5.968,...,2023,2023-10-27,1359000,1,0.076923,22,True,High first week sales,Low total streams,Synth/Dream/Dance pop
4,All You Had To Do Was Stay (Taylor's Version),1989 (Taylor's Version)[Deluxe],10/27/2023,5,0.000656,0.588,0.721,0.0,0.131,-5.579,...,2023,2023-10-27,1359000,1,0.076923,22,True,High first week sales,Low total streams,Synth/Dream/Dance pop


In [58]:
df_songs.columns

Index(['song_title', 'album_title', 'song_release_date', 'track', 'acou',
       'dance', 'ener', 'instru', 'live', 'loud', 'speech', 'temp', 'val',
       'pop', 'dur', 'album_id', 'song_id', 'days', 'tot_streams',
       'last_stream_update', 'streams_n', 'top_song', 'top_song_flag',
       'top_s_cnt', 'tv', 'alb_num', 'album_announce_mo', 'alb_ann_day',
       'alb_ann_yr', 'album_announce_date', 'album_release_mo', 'alb_r_day',
       'alb_r_yr', 'album_release_date', 'w1_sales', 'top_alb_count',
       'top_alb_normal', 'alb_track_cnt', 'tvflag', 'sales_group',
       'tot_streams_group', 'alb_genre'],
      dtype='object')

In [59]:
df_songs.shape

(285, 42)

In [60]:
# Merge dataframes on song_title

bill_songs_merge = bill_songs_ts_oct_06_dec_23.merge(df_songs, how='left', on='song_title', indicator=True)

In [61]:
bill_songs_merge['_merge'].value_counts(dropna=False)

_merge
both          1249
left_only      275
right_only       0
Name: count, dtype: int64

In [62]:
unknown_alb= bill_songs_merge[bill_songs_merge['_merge'] == 'left_only']

In [63]:
pd.set_option('display.max_rows', 275)

In [64]:
unknown_alb

Unnamed: 0,chart_week,chart_position,song_title,artist,prior_wk_position,peak_pos,wks_on_chart,album_title,song_release_date,track,...,album_release_date,w1_sales,top_alb_count,top_alb_normal,alb_track_cnt,tvflag,sales_group,tot_streams_group,alb_genre,_merge
1,2023-12-30,40,Is It Over Now? (Taylor's Version) [From The V...,Taylor Swift,36,1,8,,,,...,,,,,,,,,,left_only
2,2023-12-30,77,You're Losing Me (From The Vault),Taylor Swift,68,27,4,,,,...,,,,,,,,,,left_only
3,2023-12-23,36,Is It Over Now? (Taylor's Version) [From The V...,Taylor Swift,30,1,7,,,,...,,,,,,,,,,left_only
5,2023-12-23,68,You're Losing Me (From The Vault),Taylor Swift,27,27,3,,,,...,,,,,,,,,,left_only
6,2023-12-16,30,Is It Over Now? (Taylor's Version) [From The V...,Taylor Swift,21,1,6,,,,...,,,,,,,,,,left_only
7,2023-12-16,27,You're Losing Me (From The Vault),Taylor Swift,46,27,2,,,,...,,,,,,,,,,left_only
9,2023-12-16,84,Now That We Don't Talk (Taylor's Version) [Fro...,Taylor Swift,74,2,6,,,,...,,,,,,,,,,left_only
10,2023-12-09,46,You're Losing Me,Taylor Swift,0,46,1,,,,...,,,,,,,,,,left_only
12,2023-12-09,21,Is It Over Now? (Taylor's Version) [From The V...,Taylor Swift,9,1,5,,,,...,,,,,,,,,,left_only
13,2023-12-09,98,Slut! (Taylor's Version) [From The Vault],Taylor Swift,76,3,5,,,,...,,,,,,,,,,left_only


In [65]:
unique_songs_unknown = unknown_alb['song_title'].value_counts()

In [66]:
unique_songs_unknown

song_title
Teardrops On My Guitar                                          48
ME!                                                             20
Willow                                                          20
Today Was A Fairytale                                           18
Safe & Sound                                                    17
Cardigan                                                        14
Eyes Open                                                       13
Snow On The Beach                                                8
Crazier                                                          8
Is It Over Now? (Taylor's Version) [From The Vault]              8
The 1                                                            6
Now That We Don't Talk (Taylor's Version) [From The Vault]       6
All Of The Girls You Loved Before                                6
Christmas Tree Farm (Old Timey Version)                          5
Slut! (Taylor's Version) [From The Vault]          

In [67]:
unknown_alb.shape

(275, 49)

In [68]:
bill_songs_merge['song_title'].value_counts()

song_title
Anti-Hero                                                       53
Shake It Off                                                    50
You Belong With Me                                              50
Love Story                                                      49
Teardrops On My Guitar                                          48
Blank Space                                                     38
Our Song                                                        36
I Knew You Were Trouble.                                        36
Delicate                                                        35
Cruel Summer                                                    33
Style                                                           32
Karma                                                           30
Lavender Haze                                                   29
Wildest Dreams                                                  27
Bad Blood                                          

### Edit formatting for bill_songs_ts_oct_06_dec_23 to retry merge

In [69]:
# Display unique values in song_title

unique_songs = bill_songs_ts_oct_06_dec_23['song_title'].unique()

In [70]:
unique_songs

array(['Cruel Summer',
       "Is It Over Now? (Taylor's Version) [From The Vault]",
       "You're Losing Me (From The Vault)",
       "Now That We Don't Talk (Taylor's Version) [From The Vault]",
       "You're Losing Me", "Slut! (Taylor's Version) [From The Vault]",
       "Say Don't Go (Taylor's Version) [From The Vault]",
       "Style (Taylor's Version)", "Bad Blood (Taylor's Version)",
       "Out Of The Woods (Taylor's Version)",
       "Wildest Dreams (Taylor's Version)",
       "Suburban Legends (Taylor's Version) [From The Vault]",
       "Blank Space (Taylor's Version)",
       "Shake It Off (Taylor's Version)",
       "All You Had To Do Was Stay (Taylor's Version)",
       "New Romantics (Taylor's Version)", "Clean (Taylor's Version)",
       "Welcome To New York (Taylor's Version)",
       "I Know Places (Taylor's Version)",
       "I Wish You Would (Taylor's Version)",
       "Wonderland (Taylor's Version)",
       "How You Get The Girl (Taylor's Version)",
       "This 

In [71]:
unique_songs.shape

(226,)

In [72]:
# Remove non-studio album songs (8)

songs_to_drop = ['Breathless', 'Carolina', 'Christmas Tree Farm (Old Timey Version)', 'Only The Young', 'Christmas Tree Farm', 
    'Sweeter Than Fiction', 'Eyes Open', 'Crazier']

In [73]:
bill_songs_ts_oct_06_dec_23 = bill_songs_ts_oct_06_dec_23[~bill_songs_ts_oct_06_dec_23['song_title'].isin(songs_to_drop)]

In [74]:
unique_songs2 = bill_songs_ts_oct_06_dec_23['song_title'].unique()

In [75]:
unique_songs2.shape

(218,)

In [76]:
# Replace capital values with lowercase for evermore and folklore albums to match for merge

ever_folk_values_to_change = {'Willow': 'willow', 'Champagne Problems': 'champagne problems', "'Tis The Damn Season": "'tis the damn season'", 'Gold Rush': 'gold rush',
       'Tolerate It': 'tolerate it', 'Happiness': 'happiness', 'Ivy': 'ivy', 'Dorothea': 'dorthea', 'Long Story Short': 'long story short',
       'Cowboy Like Me': 'cowboy like me', 'Marjorie': 'marjorie', 'Closure': 'closure', 'Cardigan': 'cardigan', 'The 1': 'the 1',
       'The Last Great American Dynasty':'the last great american dynasty', 'My Tears Ricochet': 'my tears ricochet', 'August': 'august','Invisible String': 'invisible string', 'Betty': 'betty', "Mirrorball":"mirrorball", 'Seven': 'seven',
       'This Is Me Trying': 'this is me trying', 'Illicit Affairs': 'illicit affairs', 'Mad Woman': 'mad woman', 'Epiphany': 'epiphany', 'Peace': 'peace', 'Hoax': 'hoax'}


In [77]:
ever_folk_values_to_change

# 27 changes

{'Willow': 'willow',
 'Champagne Problems': 'champagne problems',
 "'Tis The Damn Season": "'tis the damn season'",
 'Gold Rush': 'gold rush',
 'Tolerate It': 'tolerate it',
 'Happiness': 'happiness',
 'Ivy': 'ivy',
 'Dorothea': 'dorthea',
 'Long Story Short': 'long story short',
 'Cowboy Like Me': 'cowboy like me',
 'Marjorie': 'marjorie',
 'Closure': 'closure',
 'Cardigan': 'cardigan',
 'The 1': 'the 1',
 'The Last Great American Dynasty': 'the last great american dynasty',
 'My Tears Ricochet': 'my tears ricochet',
 'August': 'august',
 'Invisible String': 'invisible string',
 'Betty': 'betty',
 'Mirrorball': 'mirrorball',
 'Seven': 'seven',
 'This Is Me Trying': 'this is me trying',
 'Illicit Affairs': 'illicit affairs',
 'Mad Woman': 'mad woman',
 'Epiphany': 'epiphany',
 'Peace': 'peace',
 'Hoax': 'hoax'}

In [78]:
bill_songs_ts_oct_06_dec_23['song_title'].replace(ever_folk_values_to_change, inplace=True)

In [79]:
unique_songs3 = bill_songs_ts_oct_06_dec_23['song_title'].unique()

In [80]:
# Recheck unique songs with lowercase shift

unique_songs3

array(['Cruel Summer',
       "Is It Over Now? (Taylor's Version) [From The Vault]",
       "You're Losing Me (From The Vault)",
       "Now That We Don't Talk (Taylor's Version) [From The Vault]",
       "You're Losing Me", "Slut! (Taylor's Version) [From The Vault]",
       "Say Don't Go (Taylor's Version) [From The Vault]",
       "Style (Taylor's Version)", "Bad Blood (Taylor's Version)",
       "Out Of The Woods (Taylor's Version)",
       "Wildest Dreams (Taylor's Version)",
       "Suburban Legends (Taylor's Version) [From The Vault]",
       "Blank Space (Taylor's Version)",
       "Shake It Off (Taylor's Version)",
       "All You Had To Do Was Stay (Taylor's Version)",
       "New Romantics (Taylor's Version)", "Clean (Taylor's Version)",
       "Welcome To New York (Taylor's Version)",
       "I Know Places (Taylor's Version)",
       "I Wish You Would (Taylor's Version)",
       "Wonderland (Taylor's Version)",
       "How You Get The Girl (Taylor's Version)",
       "This 

In [81]:
# Need to reformat the 1989TV tracks- replace [] with () for 1989 TV songs

bill_songs_ts_oct_06_dec_23['song_title'].replace({"Now That We Don't Talk (Taylor's Version) [From The Vault]": "Now That We Don't Talk (Taylor's Version)(From The Vault)", 
                           "Slut! (Taylor's Version) [From The Vault]": '''"Slut!"(Taylor's Version)(From The Vault)''', 
                           "Say Don't Go (Taylor's Version) [From The Vault]": "Say Don't Go (Taylor's Version)(From The Vault)", 
                           "Suburban Legends (Taylor's Version) [From The Vault]": "Suburban Legends (Taylor's Version)(From The Vault)"}, 
                                                  inplace=True)

In [82]:
unique_songs_unknown

song_title
Teardrops On My Guitar                                          48
ME!                                                             20
Willow                                                          20
Today Was A Fairytale                                           18
Safe & Sound                                                    17
Cardigan                                                        14
Eyes Open                                                       13
Snow On The Beach                                                8
Crazier                                                          8
Is It Over Now? (Taylor's Version) [From The Vault]              8
The 1                                                            6
Now That We Don't Talk (Taylor's Version) [From The Vault]       6
All Of The Girls You Loved Before                                6
Christmas Tree Farm (Old Timey Version)                          5
Slut! (Taylor's Version) [From The Vault]          

In [83]:
# Remove 'Ronan', 'You're Losing Me (From The Vault)', 'All Of The Girls You Loved Before'
# since they are single releases
# 3 songs

singles_remove = ['Ronan', "You're Losing Me (From The Vault)", 'All Of The Girls You Loved Before']

In [84]:
bill_songs_ts_oct_06_dec_23 = bill_songs_ts_oct_06_dec_23[~bill_songs_ts_oct_06_dec_23['song_title'].isin(singles_remove)]

In [85]:
# Check 
unique_songs4 = bill_songs_ts_oct_06_dec_23['song_title'].unique()

In [86]:
unique_songs4

array(['Cruel Summer',
       "Is It Over Now? (Taylor's Version) [From The Vault]",
       "Now That We Don't Talk (Taylor's Version)(From The Vault)",
       "You're Losing Me", '"Slut!"(Taylor\'s Version)(From The Vault)',
       "Say Don't Go (Taylor's Version)(From The Vault)",
       "Style (Taylor's Version)", "Bad Blood (Taylor's Version)",
       "Out Of The Woods (Taylor's Version)",
       "Wildest Dreams (Taylor's Version)",
       "Suburban Legends (Taylor's Version)(From The Vault)",
       "Blank Space (Taylor's Version)",
       "Shake It Off (Taylor's Version)",
       "All You Had To Do Was Stay (Taylor's Version)",
       "New Romantics (Taylor's Version)", "Clean (Taylor's Version)",
       "Welcome To New York (Taylor's Version)",
       "I Know Places (Taylor's Version)",
       "I Wish You Would (Taylor's Version)",
       "Wonderland (Taylor's Version)",
       "How You Get The Girl (Taylor's Version)",
       "This Love (Taylor's Version)",
       "You Are In L

In [87]:
# Rename 'Teardrops On My Guitar'

bill_songs_ts_oct_06_dec_23['song_title'].replace({'Teardrops On My Guitar':"Teardrops On My Guitar - Radio Single Remix"}, inplace=True)

In [88]:
# Need to reformat songs that have featured artists:

bill_songs_ts_oct_06_dec_23['song_title'].replace({
    "Castles Crumbling (Taylor's Version) (From The Vault)":"Castles Crumbling (feat. Hayley Williams) (Taylor's Version) (From The Vault)" ,
    "Electric Touch (Taylor's Version) (From The Vault)":"Electric Touch (feat. Fall Out Boy) (Taylor's Version) (From The Vault)",
    "Everything Has Changed (Taylor's Version)":"Everything Has Changed (feat. Ed Sheeran) (Taylor's Version)",
    "I Bet You Think About Me (Taylor's Version) (From The Vault)":"I Bet You Think About Me (feat. Chris Stapleton) (Taylor's Version) (From The Vault)",
    "Nothing New (Taylor's Version) (From The Vault)":"Nothing New (feat. Phoebe Bridgers) (Taylor's Version) (From The Vault)",
    "Snow On The Beach":"Snow On The Beach (feat. Lana Del Rey)",
    "Soon You'll Get Better":"Soon You'll Get Better (feat. The Chicks)",
    "The Last Time (Taylor's Version)":"The Last Time (feat. Gary Lightbody of Snow Patrol) (Taylor's Version)"}
     ,inplace=True)

### Changes tracker
8 removals due to non studio
27 lowercase because of folklore/evermore
4 1989 TV title shifts
3 removals due to singles
1 rename Teardrops
8 rename due to featured artists

---
51 changes


## Re-attempt merge with updates

In [89]:
bill_songs_ts_oct_06_dec_23.shape

(1482, 7)

In [90]:
# Count of songs
unique_songs5 = bill_songs_ts_oct_06_dec_23['song_title'].unique()

In [91]:
unique_songs5.shape

(215,)

In [92]:
df_songs.shape

(285, 42)

In [93]:
# Merge on song title, with updates

bill_songs_merge2 = bill_songs_ts_oct_06_dec_23.merge(df_songs, how='left', on='song_title', indicator=True)

In [94]:
bill_songs_merge2.shape

(1482, 49)

In [95]:
bill_songs_merge2.columns

Index(['chart_week', 'chart_position', 'song_title', 'artist',
       'prior_wk_position', 'peak_pos', 'wks_on_chart', 'album_title',
       'song_release_date', 'track', 'acou', 'dance', 'ener', 'instru', 'live',
       'loud', 'speech', 'temp', 'val', 'pop', 'dur', 'album_id', 'song_id',
       'days', 'tot_streams', 'last_stream_update', 'streams_n', 'top_song',
       'top_song_flag', 'top_s_cnt', 'tv', 'alb_num', 'album_announce_mo',
       'alb_ann_day', 'alb_ann_yr', 'album_announce_date', 'album_release_mo',
       'alb_r_day', 'alb_r_yr', 'album_release_date', 'w1_sales',
       'top_alb_count', 'top_alb_normal', 'alb_track_cnt', 'tvflag',
       'sales_group', 'tot_streams_group', 'alb_genre', '_merge'],
      dtype='object')

In [96]:
bill_songs_merge2['_merge'].value_counts(dropna=False)

_merge
both          1383
left_only       99
right_only       0
Name: count, dtype: int64

In [97]:
unknown_alb_2 = bill_songs_merge2[bill_songs_merge2['_merge'] == 'left_only']

In [98]:
unknown_alb_2 = unknown_alb_2['song_title']

In [99]:
unknown_alb_2

1       Is It Over Now? (Taylor's Version) [From The V...
2       Is It Over Now? (Taylor's Version) [From The V...
4       Is It Over Now? (Taylor's Version) [From The V...
6       Now That We Don't Talk (Taylor's Version)(From...
7                                        You're Losing Me
9       Is It Over Now? (Taylor's Version) [From The V...
10              "Slut!"(Taylor's Version)(From The Vault)
11        Say Don't Go (Taylor's Version)(From The Vault)
12      Now That We Don't Talk (Taylor's Version)(From...
14      Is It Over Now? (Taylor's Version) [From The V...
15        Say Don't Go (Taylor's Version)(From The Vault)
16              "Slut!"(Taylor's Version)(From The Vault)
18      Now That We Don't Talk (Taylor's Version)(From...
19      Now That We Don't Talk (Taylor's Version)(From...
20              "Slut!"(Taylor's Version)(From The Vault)
22      Is It Over Now? (Taylor's Version) [From The V...
26      Suburban Legends (Taylor's Version)(From The V...
28        Say 

Drop: "You're Losing Me", "Safe & Sound", "Today Was A Fairytale"

Rename: "'tis the damn season'": "'tis the damn season", "Exhile":"exile",
"Evermore":"evermore", "Coney Island":"coney island", "ME !": "ME! (feat. Brendon Urie of Panic! At The Disco)", "Run (Taylor's Version) (From The Vault)": "Run (feat. Ed Sheeran) (Taylor's Version) (From The Vault)"


Issues:
- x You're Losing Me
- X Me!
- xToday Was A Fairytale
- X Exile
- X Evermore
- X Coney Island
- X 'tis the damn season'
- x "Run (Taylor's Version) (From The Vault)"

Start here, will revisit Taylor's Version issues after

In [100]:
drop_again = ["You're Losing Me", "Safe & Sound", "Today Was A Fairytale"]

In [101]:
bill_songs_ts_oct_06_dec_23 = bill_songs_ts_oct_06_dec_23[~bill_songs_ts_oct_06_dec_23['song_title'].isin(drop_again)]

In [102]:
# Renames

bill_songs_ts_oct_06_dec_23['song_title'].replace({
                            "'tis the damn season'": "'tis the damn season", 
                            "Exhile":"exile",
                            "Evermore":"evermore", 
                            "Coney Island":"coney island", 
                            "ME!": "ME! (feat. Brendon Urie of Panic! At The Disco)",
                            "Run (Taylor's Version) (From The Vault)": "Run (feat. Ed Sheeran) (Taylor's Version) (From The Vault)"
                             }, inplace=True)

- Renames based on formatting () and spaces

'"Slut!"(Taylor\'s Version)(From The Vault)':'"Slut!" (Taylor\'s Version) (From The Vault)',
"Is It Over Now? (Taylor's Version) [From The Vault]": "Is It Over Now (Taylor's Version) (From The Vault)",
"Now That We Don't Talk (Taylor's Version)(From the Vault)":"Now That We Don't Talk (Taylor's Version) (From the Vault)",
"Say Don't Go (Taylor's Version)(From The Vault)":"Say Don't Go (Taylor's Version) (From The Vault)",
"Suburban Legends (Taylor's Version)(From The Vault)":"Suburban Legends (Taylor's Version) (From The Vault)",
"No Body, No Crime": "no body, no crime (feat. HAIM)"

In [103]:
# Renames based on formatting () and spaces

bill_songs_ts_oct_06_dec_23['song_title'].replace({
    '"Slut!"(Taylor\'s Version)(From The Vault)': '"Slut!" (Taylor\'s Version) (From The Vault)',
"Is It Over Now? (Taylor's Version) [From The Vault]": "Is It Over Now (Taylor's Version) (From The Vault)",
"Now That We Don't Talk (Taylor's Version)(From the Vault)":"Now That We Don't Talk (Taylor's Version) (From the Vault)",
"Say Don't Go (Taylor's Version)(From The Vault)":"Say Don't Go (Taylor's Version) (From The Vault)",
"Suburban Legends (Taylor's Version)(From The Vault)":"Suburban Legends (Taylor's Version) (From The Vault)",
"No Body, No Crime": "no body, no crime (feat. HAIM)"},
    inplace=True)

### Attempt merge again

In [104]:
# Merge on song title, with updates

bill_songs_merge3 = bill_songs_ts_oct_06_dec_23.merge(df_songs, how='left', on='song_title', indicator=True)

In [105]:
bill_songs_merge3['_merge'].value_counts(dropna=False)

_merge
both          1419
left_only       27
right_only       0
Name: count, dtype: int64

In [106]:
unknown_alb_3 = bill_songs_merge3[bill_songs_merge3['_merge'] == 'left_only']

In [107]:
unknown_alb_3['song_title'].value_counts(dropna=False)

song_title
Is It Over Now (Taylor's Version) (From The Vault)             8
Now That We Don't Talk (Taylor's Version)(From The Vault)      6
Exile                                                          5
You All Over Me (Taylor's Version) (From The Vault)            2
'tis the damn season                                           2
When Emma Falls In Love (Taylor's Version) (From The Vault)    1
coney island                                                   1
evermore                                                       1
dorthea                                                        1
Name: count, dtype: int64

In [108]:
unknown_alb_3['song_title'].unique()

array(["Is It Over Now (Taylor's Version) (From The Vault)",
       "Now That We Don't Talk (Taylor's Version)(From The Vault)",
       "When Emma Falls In Love (Taylor's Version) (From The Vault)",
       "You All Over Me (Taylor's Version) (From The Vault)",
       "'tis the damn season", 'coney island', 'evermore', 'dorthea',
       'Exile'], dtype=object)

In [109]:
# Make more formatting shifts 

bill_songs_ts_oct_06_dec_23['song_title'].replace({
                            "Is It Over Now (Taylor's Version) (From The Vault)": "Is It Over Now? (Taylor's Version) (From The Vault)",
                            "Now That We Don't Talk (Taylor's Version)(From The Vault)":"Now That We Don't Talk (Taylor's Version) (From The Vault)",
                            "When Emma Falls In Love (Taylor's Version) (From The Vault)": "When Emma Falls in Love (Taylor's Version) (From The Vault)",
                            "You All Over Me (Taylor's Version) (From The Vault)": "You All Over Me (feat. Maren Morris) (Taylor's Version) (From The Vault)",
                            "'tis the damn season": "tis the damn season",
                            "coney island": "coney island (feat. The National)",
                            'evermore': "evermore (feat. Bon Iver)",
                            "dorthea": "dorothea",
                            "Exile": "exile (feat. Bon Iver)"},
                        inplace=True)

## Merge attempt 4

In [110]:
# Merge on song title, with updates

bill_songs_merge4 = bill_songs_ts_oct_06_dec_23.merge(df_songs, how='left', on='song_title', indicator=True)

In [111]:
bill_songs_merge4['_merge'].value_counts(dropna=False)

_merge
both          1444
left_only        2
right_only       0
Name: count, dtype: int64

In [112]:
last_issues = bill_songs_merge4[bill_songs_merge4['_merge'] == 'left_only']

In [113]:
last_issues

Unnamed: 0,chart_week,chart_position,song_title,artist,prior_wk_position,peak_pos,wks_on_chart,album_title,song_release_date,track,...,album_release_date,w1_sales,top_alb_count,top_alb_normal,alb_track_cnt,tvflag,sales_group,tot_streams_group,alb_genre,_merge
414,2021-01-02,91,tis the damn season,Taylor Swift,39,39,2,,,,...,,,,,,,,,,left_only
418,2020-12-26,39,tis the damn season,Taylor Swift,0,39,1,,,,...,,,,,,,,,,left_only


In [114]:
df_songs.columns

Index(['song_title', 'album_title', 'song_release_date', 'track', 'acou',
       'dance', 'ener', 'instru', 'live', 'loud', 'speech', 'temp', 'val',
       'pop', 'dur', 'album_id', 'song_id', 'days', 'tot_streams',
       'last_stream_update', 'streams_n', 'top_song', 'top_song_flag',
       'top_s_cnt', 'tv', 'alb_num', 'album_announce_mo', 'alb_ann_day',
       'alb_ann_yr', 'album_announce_date', 'album_release_mo', 'alb_r_day',
       'alb_r_yr', 'album_release_date', 'w1_sales', 'top_alb_count',
       'top_alb_normal', 'alb_track_cnt', 'tvflag', 'sales_group',
       'tot_streams_group', 'alb_genre'],
      dtype='object')

In [115]:
tis_check = df_songs[df_songs['album_id'] == "EVER"]

In [116]:
tis_check

Unnamed: 0,song_title,album_title,song_release_date,track,acou,dance,ener,instru,live,loud,...,alb_r_yr,album_release_date,w1_sales,top_alb_count,top_alb_normal,alb_track_cnt,tvflag,sales_group,tot_streams_group,alb_genre
123,willow,evermore (deluxe version),12/11/2020,1,0.833,0.392,0.574,0.00179,0.145,-9.195,...,2020,2020-12-11,154500,151,0.142051,17,False,Low first week sales,High total streams,Alternative/Folk/Indie
124,champagne problems,evermore (deluxe version),12/11/2020,2,0.92,0.462,0.24,0.0,0.113,-12.077,...,2020,2020-12-11,154500,151,0.142051,17,False,Low first week sales,High total streams,Alternative/Folk/Indie
125,gold rush,evermore (deluxe version),12/11/2020,3,0.83,0.512,0.462,0.166,0.121,-10.491,...,2020,2020-12-11,154500,151,0.142051,17,False,Low first week sales,Medium total streams,Alternative/Folk/Indie
126,‘tis the damn season,evermore (deluxe version),12/11/2020,4,0.735,0.575,0.434,6.6e-05,0.105,-8.193,...,2020,2020-12-11,154500,151,0.142051,17,False,Low first week sales,Medium total streams,Alternative/Folk/Indie
127,tolerate it,evermore (deluxe version),12/11/2020,5,0.878,0.316,0.361,2.7e-05,0.0797,-10.381,...,2020,2020-12-11,154500,151,0.142051,17,False,Low first week sales,Medium total streams,Alternative/Folk/Indie
128,"no body, no crime (feat. HAIM)",evermore (deluxe version),12/11/2020,6,0.418,0.546,0.613,0.0,0.103,-7.589,...,2020,2020-12-11,154500,151,0.142051,17,False,Low first week sales,Medium total streams,Alternative/Folk/Indie
129,happiness,evermore (deluxe version),12/11/2020,7,0.87,0.559,0.334,0.0,0.114,-10.733,...,2020,2020-12-11,154500,151,0.142051,17,False,Low first week sales,Medium total streams,Alternative/Folk/Indie
130,dorothea,evermore (deluxe version),12/11/2020,8,0.696,0.605,0.488,0.0,0.129,-8.322,...,2020,2020-12-11,154500,151,0.142051,17,False,Low first week sales,Medium total streams,Alternative/Folk/Indie
131,coney island (feat. The National),evermore (deluxe version),12/11/2020,9,0.819,0.537,0.537,0.000904,0.142,-11.266,...,2020,2020-12-11,154500,151,0.142051,17,False,Low first week sales,Medium total streams,Alternative/Folk/Indie
132,ivy,evermore (deluxe version),12/11/2020,10,0.855,0.515,0.545,2e-05,0.0921,-9.277,...,2020,2020-12-11,154500,151,0.142051,17,False,Low first week sales,Medium total streams,Alternative/Folk/Indie


In [117]:
# Make more formatting shifts 

bill_songs_ts_oct_06_dec_23['song_title'].replace({
                            "tis the damn season":"‘tis the damn season"},
                        inplace=True)

In [118]:
# Merge on song title, with updates

bill_songs_merge5 = bill_songs_ts_oct_06_dec_23.merge(df_songs, how='left', on='song_title', indicator=True)

In [119]:
bill_songs_merge5['_merge'].value_counts(dropna=False)

_merge
both          1446
left_only        0
right_only       0
Name: count, dtype: int64

## Succesfully merged billboard song data with all song information!
#### Exported below

# Data export

In [120]:
# Export billboard 100 with all artists from Taylor's career time window
# Note: some formatting issues for Taylor's songs (corrected in below df)

df_sub.to_csv(os.path.join(path, '02 Manipulated Data', 'df_hot_100_all_artists_dec_23.csv'))

In [121]:
# Export billboard 100 songs with Taylor Swift only, during her career
# Includes all song information

bill_songs_merge5.to_csv(os.path.join(path, '02 Manipulated Data', 'df_hot_100_ts_only_dec_23.csv'))