# Spotify Music Popularity Capstone - Data Collection & Wrangling

Greg Welliver   

In [1]:
# Import relevant libraries and packages.
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns 
import statsmodels.api as sm
from statsmodels.graphics.api import abline_plot
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn import linear_model, preprocessing 
import warnings
from scipy import stats
import re

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
#import sys

credentials = []
f = open('credentials.txt', 'r')
for line in f:
    #add line to credentials list (remove trailing whitespace characters)
    credentials.append(line.rstrip())

# create Spotify object with credentials
client_credentials_manager = SpotifyClientCredentials(credentials[0], credentials[1])
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [2]:
### Links to Top Hits Spotify playlists

topLink2021 = "https://open.spotify.com/playlist/5GhQiRkGuqzpWZSE7OU4Se"
topLink2020 = "https://open.spotify.com/playlist/2fmTTbBkXi8pewbUvG3CeZ"
topLink2019 = "https://open.spotify.com/playlist/37i9dQZF1DWVRSukIED0e9"
topLink2018 = "https://open.spotify.com/playlist/37i9dQZF1DXe2bobNYDtW8"
topLink2017 = "https://open.spotify.com/playlist/37i9dQZF1DWTE7dVUebpUW"
topLink2016 = "https://open.spotify.com/playlist/37i9dQZF1DX8XZ6AUo9R4R"
topLink2015 = "https://open.spotify.com/playlist/37i9dQZF1DX9ukdrXQLJGZ"
topLink2014 = "https://open.spotify.com/playlist/37i9dQZF1DX0h0QnLkMBl4"
topLink2013 = "https://open.spotify.com/playlist/37i9dQZF1DX3Sp0P28SIer"
topLink2012 = "https://open.spotify.com/playlist/37i9dQZF1DX0yEZaMOXna3"
topLink2011 = "https://open.spotify.com/playlist/37i9dQZF1DXcagnSNtrGuJ"
topLink2010 = "https://open.spotify.com/playlist/37i9dQZF1DXc6IFF23C9jj"
topLink2009 = "https://open.spotify.com/playlist/37i9dQZF1DX4UkKv8ED8jp"
topLink2008 = "https://open.spotify.com/playlist/37i9dQZF1DWYuGZUE4XQXm"
topLink2007 = "https://open.spotify.com/playlist/37i9dQZF1DX3j9EYdzv2N9"
topLink2006 = "https://open.spotify.com/playlist/37i9dQZF1DX1vSJnMeoy3V"
topLink2005 = "https://open.spotify.com/playlist/37i9dQZF1DWWzQTBs5BHX9"
topLink2004 = "https://open.spotify.com/playlist/37i9dQZF1DWTWdbR13PQYH"
topLink2003 = "https://open.spotify.com/playlist/37i9dQZF1DXaW8fzPh9b08"
topLink2002 = "https://open.spotify.com/playlist/37i9dQZF1DX0P7PzzKwEKl"
topLink2001 = "https://open.spotify.com/playlist/37i9dQZF1DX9Ol4tZWPH6V"
topLink2000 = "https://open.spotify.com/playlist/37i9dQZF1DWUZv12GM5cFk"

allLinks = [topLink2000, topLink2001, topLink2002, topLink2003, topLink2004, topLink2005, topLink2006, topLink2007, topLink2008, topLink2009, topLink2010, topLink2011, topLink2012, topLink2013, topLink2014, topLink2015, topLink2016, topLink2017, topLink2018, topLink2019, topLink2020, topLink2021]

allLinks2 = ["https://open.spotify.com/playlist/5GhQiRkGuqzpWZSE7OU4Se", "https://open.spotify.com/playlist/2fmTTbBkXi8pewbUvG3CeZ"]

allLinks3 = [topLink2018, topLink2019, topLink2020, topLink2021]

In [3]:
#YearDictionary = {"https://open.spotify.com/playlist/5GhQiRkGuqzpWZSE7OU4Se": 2021, "https://open.spotify.com/playlist/2fmTTbBkXi8pewbUvG3CeZ": 2020, "https://open.spotify.com/playlist/37i9dQZF1DWVRSukIED0e9": 2019}
YearDictionary = {"https://open.spotify.com/playlist/5GhQiRkGuqzpWZSE7OU4Se": 2021, "https://open.spotify.com/playlist/2fmTTbBkXi8pewbUvG3CeZ": 2020, "https://open.spotify.com/playlist/37i9dQZF1DWVRSukIED0e9": 2019, "https://open.spotify.com/playlist/37i9dQZF1DXe2bobNYDtW8": 2018, "https://open.spotify.com/playlist/37i9dQZF1DWTE7dVUebpUW": 2017, "https://open.spotify.com/playlist/37i9dQZF1DX8XZ6AUo9R4R": 2016, "https://open.spotify.com/playlist/37i9dQZF1DX9ukdrXQLJGZ": 2015, "https://open.spotify.com/playlist/37i9dQZF1DX0h0QnLkMBl4": 2014, "https://open.spotify.com/playlist/37i9dQZF1DX3Sp0P28SIer": 2013, "https://open.spotify.com/playlist/37i9dQZF1DX0yEZaMOXna3": 2012, "https://open.spotify.com/playlist/37i9dQZF1DXcagnSNtrGuJ": 2011, "https://open.spotify.com/playlist/37i9dQZF1DXc6IFF23C9jj": 2010, "https://open.spotify.com/playlist/37i9dQZF1DX4UkKv8ED8jp": 2009, "https://open.spotify.com/playlist/37i9dQZF1DWYuGZUE4XQXm": 2008, "https://open.spotify.com/playlist/37i9dQZF1DX3j9EYdzv2N9": 2007, "https://open.spotify.com/playlist/37i9dQZF1DX1vSJnMeoy3V": 2006, "https://open.spotify.com/playlist/37i9dQZF1DWWzQTBs5BHX9": 2005, "https://open.spotify.com/playlist/37i9dQZF1DWTWdbR13PQYH": 2004, "https://open.spotify.com/playlist/37i9dQZF1DXaW8fzPh9b08": 2003, "https://open.spotify.com/playlist/37i9dQZF1DX0P7PzzKwEKl": 2002, "https://open.spotify.com/playlist/37i9dQZF1DX9Ol4tZWPH6V": 2001, "https://open.spotify.com/playlist/37i9dQZF1DWUZv12GM5cFk": 2000}

## Data Collection

- Data collection was quite the challenge for this project!  I leveraged the spotipy library, on-the-fly learning, and previous work done by github community members to assist.  I specify each step below to get to the end product: detailed track information on 255,096 tracks from the years 2000 - 2021.  This includes 1,863 songs that placed in the Top Hits on spotify for a given year.

#### WORKING CODE, MARKDOWN FOR NOW
# function to get information on top tracks, passing in the playlist URI

track_names = []
track_ids = []
track_uris = []
artist_names = []
album_release_dates = []
track_years = []
popular_years = []

def tophitsyear(url):
    for track in sp.playlist_tracks(url)["items"]:
        track_name = track["track"]["name"]
        track_names.append(track_name)
        track_id = track["track"]["id"]
        track_ids.append(track_id)
        track_uri = track["track"]["uri"]
        track_uris.append(track_uri)
        #artist_name = track["track"]["artists"] # too difficult to get
        #artist_names.append(artist_name)
        album_release_date = track["track"]["album"]["release_date"]
        album_release_dates.append(album_release_date)
        track_year = track["track"]["album"]["release_date"].split("-")[0]
        track_years.append(track_year)
        popular_year = YearDictionary[(url)]
        popular_years.append(popular_year)
        #print(popular_year)
        #track_years.append(track_year)
        #print(track_name, track_id, track_uri)
        #return track_name, track_id, track_uri
    #return track_names, track_ids, track_years, popular_year
    #return track_name, track_id, track_uri, album_release_date, popular_year

#tophitsyear(allLinks2)
#empty_list = pd.DataFrame(empty_list)
#print(popular_years)

#use list comprehension with function
final_list1 = [tophitsyear(url) for url in allLinks]

print(final_list1)

#### WORKING CODE, MARKDOWN FOR NOW
# code to create dataframe from populated lists
df = pd.DataFrame(list(zip(track_uris, track_ids, track_names, album_release_dates, track_years, popular_years)), columns=['track_uri', 'track_id', 'track_name', 'album_release_date', 'track_year', 'popular_year'])
df.head()
#df.shape

#### WORKING CODE, MARKDOWN FOR NOW
track_uris2 = df['track_uri']
identifier_list = track_uris2.values.tolist()
print(identifier_list)

#### WORKING CODE, MARKDOWN FOR NOW
AudioFeatures2 = [sp.audio_features(identifier)[0] for identifier in track_uris2]
print(AudioFeatures2)

#### WORKING CODE, MARKDOWN FOR NOW
# Use to read AudrioFeatures2 that was created from list comprehension
df5 = pd.json_normalize(AudioFeatures2)
print(df5)

#### Steps below were to get all song audio features for years 2000-2021

#### WORKING CODE, MARKDOWN FOR NOW
# Initializing lists for basic track details
artist_name = []
artist_id = []
album_name = []
album_id = []
track_name = []
track_id = []
track_pop = []
track_year = []
track_spotify_genre = []
track_uri = []

# Initiating lists for track audio features
key = []
acousticness = []
danceability = []
duration_ms = []
energy = []
instrumentalness = []
liveness = []
loudness = []
mode = []
speechiness = []
tempo = []
time_signature = []
valence = []

# Initializing genre
artist_genre = []

#### Below code is to retrieve collection of song ids and basic track details - 50 tracks per. I had to run two years at a time to prevent it from timing out 

#### WORKING CODE, MARKDOWN FOR NOW
for genre in ['hip-hop', 'house','indie','pop','r&b','rock', 'soundtrack', 'jazz', 'classical', 'country', 'dance', 'alternative', 'movie', 'world', 'blues']:
  for year in range(2020,2022): 
      query = "genre:" + genre + " year:" + str(year)
      for i in range(0,1000,50):
          track_results = sp.search(q=query, type='track', limit=50, offset=i) # change 'q=' argument for year
          for item in track_results['tracks']['items']:
              artist_name.append(item['album']['artists'][0]['name'])
              artist_id.append(item['album']['artists'][0]['id'])
              track_name.append(item['name'])
              track_id.append(item['id'])
              track_pop.append(item['popularity'])
              track_year.append(year)
              track_spotify_genre.append(genre)
              #album_name.append(item['album']['name'])
              #album_id.append(item['album']['id'])
              track_uri.append(item['uri'])

len(artist_name)

In [4]:
#dataset = pd.read_csv("../Data/AllSongs 2000-2021.csv")

#### WORKING CODE, MARKDOWN FOR NOW
track_ids = dataset.track_id
track_ids.head()

### Code below retrieves all audio features for the given track ids

#### WORKING CODE, MARKDOWN FOR NOW

# this gets all audio features for the given track ids
for i in range(0,len(track_ids),50):
  track_features = sp.audio_features(track_ids[i:i+50]) # returns features of first 50 tracks
  for j in range(0,len(track_features)): # iterate over those 50 tracks
    if track_features[j] is None:  # if track does not have audio features from spotify
      track_features[j] = {}
    key.append(track_features[j].get('key', np.nan))
    acousticness.append(track_features[j].get('acousticness', np.nan))
    danceability.append(track_features[j].get('danceability', np.nan))
    duration_ms.append(track_features[j].get('duration_ms', np.nan))
    energy.append(track_features[j].get('energy', np.nan))
    instrumentalness.append(track_features[j].get('instrumentalness', np.nan))
    liveness.append(track_features[j].get('liveness', np.nan))
    loudness.append(track_features[j].get('loudness', np.nan))
    mode.append(track_features[j].get('mode', np.nan))
    speechiness.append(track_features[j].get('speechiness', np.nan))
    tempo.append(track_features[j].get('tempo', np.nan))
    time_signature.append(track_features[j].get('time_signature', np.nan))
    valence.append(track_features[j].get('valence', np.nan))

### Code below retrieves second artists on tracks

I'll use this data to create a "collaboration" variable

#### WORKING CODE, MARKDOWN FOR NOW

# get second artist on tracks
for genre in ['hip-hop', 'house','indie','pop','r&b','rock', 'soundtrack', 'jazz', 'classical', 'country', 'dance', 'alternative', 'movie', 'world', 'blues']:
    for year in range(2020,2023): 
          query = "genre:" + genre + " year:" + str(year)
          for i in range(0,1000,50):
              track_results = sp.search(q=query, type='track', limit=50, offset=i) # change 'q=' argument for year
              for item in track_results['tracks']['items']:
                    try:
                        second_artist_name.append(item['album']['artists'][1]['name'])
                        track_id.append(item['id'])
                        track_uri.append(item['uri'])
                    except:
                        pass



Below code creates dataframe of audio features

#### WORKING CODE, MARKDOWN FOR NOW

# create dataframe of audio features
AllTracksAudio = pd.DataFrame({"track_id":track_ids,
                   "key":key,
                    "acousticness":acousticness,
                    "danceability":danceability,
                    "duration_ms":duration_ms,
                    "energy":energy,
                    "instrumentalness":instrumentalness,
                    "liveness":liveness,
                    "loudness":loudness,
                    "mode":mode,
                    "speechiness":speechiness,
                    "tempo":tempo,
                    "time_signature":time_signature,
                    "valence":valence,
                    })

## Combining datasets

At this point, I now have data collected, but disperesed among different files.

- AllSongsAudioFeatures.csv: songs with all audio features from 2000-2021. contains audio features on 315,618 songs. I will use this as the primary file to merge data into.  There are about 60,000 duplicate track_ids that we want to drop
- AllSongs.csv: high-level informational data (track_name, artist_name, track_genre, popularity) on 315,618 songs
- AllCharts 2000-2021.csv: all of the Top 100 songs in each year from 2000-2021
- SecondArtists.csv: second artists on tracks from 2000-2021
- artists.csv: used this file to pull artist countries
- collab_columns.csv: additional file I worked on to come up with collaborations
- album_ratings.cvs: file for retrieving record label information for tracks


In [5]:
# load AllSongsAudioFeatures
AllSongsAudioFeatures = pd.read_csv("../Data/AllSongsAudioFeatures.csv", index_col =[0])
AllSongsAudioFeatures.drop_duplicates(keep='first', inplace=True) # drop ~60,000 rows that don't have unique track_ID

In [6]:
#confirm that dupclicates are removed
AllSongsAudioFeatures.track_id.value_counts()

3yfqSUWxFvZELEM4PmlwIR    1
0r3ur5jBMpjkcrqbVVvgs7    1
4lXBIshfa0juWxi6odtLHt    1
4iddUbzyhbLEqhsVcZmc7u    1
4OfCkjpcpqKgr9igYyOSHm    1
                         ..
5yqO0ETHCWHhy1Jf4ltayp    1
7A5uV4KrkkOO7ZipSPCAI6    1
6rZwfQkFFafuyU1SDSBx6u    1
5elDLvpi4MB1NT02PjbgtE    1
0IUtfOQZrk5NTuor7XvAW0    1
Name: track_id, Length: 255098, dtype: int64

In [7]:
len(AllSongsAudioFeatures)

255098

In [8]:
AllSongsAudioFeatures.shape

(255098, 14)

In [9]:
# load AllSongs
AllSongs = pd.read_csv("../Data/AllSongs 2000-2021.csv", index_col =[0])
AllSongs.drop_duplicates(subset='track_id', keep='first', inplace=True) # drop ~60,000 rows that don't have unique track_ID

In [10]:
AllSongs.shape

(267210, 8)

In [11]:
# load AllCharts
AllCharts = pd.read_csv("../Data/AllCharts 2000-2021.csv", index_col =[0])
AllCharts.drop_duplicates(subset='track_id', keep='first', inplace=True) # drop second value of 96 songs that appeared in the Top 100 list in two different years

In [12]:
AllCharts.shape

(2104, 7)

In [13]:
#check for missing values
AllCharts.isna().sum()

track_uri             0
track_id              0
track_name            0
album_release_date    0
track_year            0
popular_year          0
Top100                0
dtype: int64

In [14]:
# load second artists
SecondArtists = pd.read_csv("../Data/SecondArtists.csv", index_col =[0])
SecondArtists.drop_duplicates(keep='first', inplace=True) # 

In [15]:
AllSongs.track_name.value_counts()

Home                                 122
Silent Night                         120
Winter Wonderland                    112
Intro                                 92
O Holy Night                          88
                                    ... 
Theme From 27 Dresses                  1
Nim's Island                           1
Rooftop Camp Out - Score               1
Formation Of The Terra Cotta Army      1
Cheek To Cheek - Live                  1
Name: track_name, Length: 196942, dtype: int64

In [16]:
# load artist deatil
artist_detail = pd.read_csv("../Data/artists.csv", index_col =[0])
artist_detail.drop_duplicates(keep='first', inplace=True) # 

  artist_detail = pd.read_csv("../Data/artists.csv", index_col =[0])


In [17]:
index_names = AllSongs[ (AllSongs['track_name'] == AllSongs['track_name']) & (AllSongs['art_name'] == AllSongs['art_name'])].index

In [18]:
len(index_names)

267208

AllSongs.drop(index_names, inplace = True)

len(AllSongs)

In [19]:
AllSongs.track_name.value_counts()

Home                                 122
Silent Night                         120
Winter Wonderland                    112
Intro                                 92
O Holy Night                          88
                                    ... 
Theme From 27 Dresses                  1
Nim's Island                           1
Rooftop Camp Out - Score               1
Formation Of The Terra Cotta Army      1
Cheek To Cheek - Live                  1
Name: track_name, Length: 196942, dtype: int64

In [20]:
AllSongsAudioFeatures.head(5)

Unnamed: 0,track_id,key,acousticness,danceability,duration_ms,energy,instrumentalness,liveness,loudness,mode,speechiness,tempo,time_signature,valence
0,3yfqSUWxFvZELEM4PmlwIR,5,0.0302,0.949,284200,0.661,0.0,0.0454,-4.244,0,0.0572,104.504,4,0.76
1,0I3q5fE6wg7LIfHGngUTnV,4,0.143,0.843,270507,0.806,0.0,0.0771,-5.946,0,0.269,94.948,4,0.613
2,3UmaczJpikHgJFyBTAJVoz,6,0.0371,0.78,404107,0.768,2e-06,0.518,-4.325,0,0.238,80.063,4,0.507
3,3Gf5nttwcX9aaSQXRWidEZ,7,0.0616,0.85,291782,0.7,2e-06,0.244,-6.49,1,0.0478,101.875,4,0.722
4,6glsMWIMIxQ4BedzLqGVi4,5,0.0281,0.725,240027,0.64,0.0,0.099,-7.905,0,0.332,166.028,3,0.915


In [21]:
AllSongs.head(5)

Unnamed: 0,track_id,track_name,track_year,track_spotify_genre,art_name,art_id,track_uri,track_pop
0,3yfqSUWxFvZELEM4PmlwIR,The Real Slim Shady,2000,hip-hop,Eminem,7dGJo4pcD2V6oG8kP0tJRR,spotify:track:3yfqSUWxFvZELEM4PmlwIR,88
1,0I3q5fE6wg7LIfHGngUTnV,Ms. Jackson,2000,hip-hop,Outkast,1G9G7WwrXka3Z1r7aIDjI7,spotify:track:0I3q5fE6wg7LIfHGngUTnV,82
2,3UmaczJpikHgJFyBTAJVoz,Stan,2000,hip-hop,Eminem,7dGJo4pcD2V6oG8kP0tJRR,spotify:track:3UmaczJpikHgJFyBTAJVoz,85
3,3Gf5nttwcX9aaSQXRWidEZ,Ride Wit Me,2000,hip-hop,Nelly,2gBjLmx6zQnFGQJCAQpRgw,spotify:track:3Gf5nttwcX9aaSQXRWidEZ,75
4,6glsMWIMIxQ4BedzLqGVi4,"So Fresh, So Clean",2000,hip-hop,Outkast,1G9G7WwrXka3Z1r7aIDjI7,spotify:track:6glsMWIMIxQ4BedzLqGVi4,72


### Begin merges

In [22]:
# get all audio features for all tracks
df1 = pd.merge(AllSongs, AllSongsAudioFeatures, on='track_id', how='left')

In [23]:
df1.shape

(267210, 21)

In [24]:
df1.head(5)

Unnamed: 0,track_id,track_name,track_year,track_spotify_genre,art_name,art_id,track_uri,track_pop,key,acousticness,...,duration_ms,energy,instrumentalness,liveness,loudness,mode,speechiness,tempo,time_signature,valence
0,3yfqSUWxFvZELEM4PmlwIR,The Real Slim Shady,2000,hip-hop,Eminem,7dGJo4pcD2V6oG8kP0tJRR,spotify:track:3yfqSUWxFvZELEM4PmlwIR,88,5.0,0.0302,...,284200.0,0.661,0.0,0.0454,-4.244,0.0,0.0572,104.504,4.0,0.76
1,0I3q5fE6wg7LIfHGngUTnV,Ms. Jackson,2000,hip-hop,Outkast,1G9G7WwrXka3Z1r7aIDjI7,spotify:track:0I3q5fE6wg7LIfHGngUTnV,82,4.0,0.143,...,270507.0,0.806,0.0,0.0771,-5.946,0.0,0.269,94.948,4.0,0.613
2,3UmaczJpikHgJFyBTAJVoz,Stan,2000,hip-hop,Eminem,7dGJo4pcD2V6oG8kP0tJRR,spotify:track:3UmaczJpikHgJFyBTAJVoz,85,6.0,0.0371,...,404107.0,0.768,2e-06,0.518,-4.325,0.0,0.238,80.063,4.0,0.507
3,3Gf5nttwcX9aaSQXRWidEZ,Ride Wit Me,2000,hip-hop,Nelly,2gBjLmx6zQnFGQJCAQpRgw,spotify:track:3Gf5nttwcX9aaSQXRWidEZ,75,7.0,0.0616,...,291782.0,0.7,2e-06,0.244,-6.49,1.0,0.0478,101.875,4.0,0.722
4,6glsMWIMIxQ4BedzLqGVi4,"So Fresh, So Clean",2000,hip-hop,Outkast,1G9G7WwrXka3Z1r7aIDjI7,spotify:track:6glsMWIMIxQ4BedzLqGVi4,72,5.0,0.0281,...,240027.0,0.64,0.0,0.099,-7.905,0.0,0.332,166.028,3.0,0.915


In [25]:
AllCharts.head()

Unnamed: 0,track_uri,track_id,track_name,album_release_date,track_year,popular_year,Top100
0,spotify:track:6naxalmIoLFWR0siv8dnQQ,6naxalmIoLFWR0siv8dnQQ,Oops!...I Did It Again,2000-05-16,2000,2000,Y
1,spotify:track:2m1hi0nfMR9vdGC8UcrnwU,2m1hi0nfMR9vdGC8UcrnwU,All The Small Things,1999-06-01,1999,2000,Y
2,spotify:track:3y4LxiYMgDl4RethdzpmNe,3y4LxiYMgDl4RethdzpmNe,Breathe,1999-11-09,1999,2000,Y
3,spotify:track:0v1XpBHnsbkCn7iJ9Ucr1l,0v1XpBHnsbkCn7iJ9Ucr1l,It's My Life,2000-01-01,2000,2000,Y
4,spotify:track:62bOmKYxYg7dhrC6gH9vFn,62bOmKYxYg7dhrC6gH9vFn,Bye Bye Bye,2000-03-21,2000,2000,Y


In [26]:
#
#df1 = df1.merge(AllCharts[['album_release_date', 'popular_year', 'Top100']], on='track_id', how='left')
df1 = pd.merge(df1, AllCharts[['track_id', 'album_release_date', 'popular_year', 'Top100']], on='track_id', how='left')

In [27]:
df1.head(8)

#df1.isna().sum()

Unnamed: 0,track_id,track_name,track_year,track_spotify_genre,art_name,art_id,track_uri,track_pop,key,acousticness,...,liveness,loudness,mode,speechiness,tempo,time_signature,valence,album_release_date,popular_year,Top100
0,3yfqSUWxFvZELEM4PmlwIR,The Real Slim Shady,2000,hip-hop,Eminem,7dGJo4pcD2V6oG8kP0tJRR,spotify:track:3yfqSUWxFvZELEM4PmlwIR,88,5.0,0.0302,...,0.0454,-4.244,0.0,0.0572,104.504,4.0,0.76,2000-05-23,2000.0,Y
1,0I3q5fE6wg7LIfHGngUTnV,Ms. Jackson,2000,hip-hop,Outkast,1G9G7WwrXka3Z1r7aIDjI7,spotify:track:0I3q5fE6wg7LIfHGngUTnV,82,4.0,0.143,...,0.0771,-5.946,0.0,0.269,94.948,4.0,0.613,2000-10-31,2001.0,Y
2,3UmaczJpikHgJFyBTAJVoz,Stan,2000,hip-hop,Eminem,7dGJo4pcD2V6oG8kP0tJRR,spotify:track:3UmaczJpikHgJFyBTAJVoz,85,6.0,0.0371,...,0.518,-4.325,0.0,0.238,80.063,4.0,0.507,2000-05-23,2001.0,Y
3,3Gf5nttwcX9aaSQXRWidEZ,Ride Wit Me,2000,hip-hop,Nelly,2gBjLmx6zQnFGQJCAQpRgw,spotify:track:3Gf5nttwcX9aaSQXRWidEZ,75,7.0,0.0616,...,0.244,-6.49,1.0,0.0478,101.875,4.0,0.722,2000-06-27,2001.0,Y
4,6glsMWIMIxQ4BedzLqGVi4,"So Fresh, So Clean",2000,hip-hop,Outkast,1G9G7WwrXka3Z1r7aIDjI7,spotify:track:6glsMWIMIxQ4BedzLqGVi4,72,5.0,0.0281,...,0.099,-7.905,0.0,0.332,166.028,3.0,0.915,,,
5,23wfXwnsPZYe5A1xXRHb3J,The Way I Am,2000,hip-hop,Eminem,7dGJo4pcD2V6oG8kP0tJRR,spotify:track:23wfXwnsPZYe5A1xXRHb3J,75,4.0,0.144,...,0.309,-2.692,0.0,0.265,87.248,4.0,0.315,,,
6,3wMUvT6eIw2L5cZFG1yH9j,Country Grammar (Hot Shit),2000,hip-hop,Nelly,2gBjLmx6zQnFGQJCAQpRgw,spotify:track:3wMUvT6eIw2L5cZFG1yH9j,69,2.0,0.00689,...,0.142,-6.822,1.0,0.108,162.831,4.0,0.565,2000-06-27,2000.0,Y
7,5eGEc27nnhtmcOh6RC890a,Kill You,2000,hip-hop,Eminem,7dGJo4pcD2V6oG8kP0tJRR,spotify:track:5eGEc27nnhtmcOh6RC890a,70,11.0,0.155,...,0.296,-8.268,1.0,0.379,106.991,4.0,0.677,,,


In [28]:
artist_detail.head(2)

Unnamed: 0_level_0,artist_mb,artist_lastfm,country_mb,country_lastfm,tags_mb,tags_lastfm,listeners_lastfm,scrobbles_lastfm,ambiguous_artist
mbid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
cc197bad-dc9c-440d-a5b5-d52ba2e14234,Coldplay,Coldplay,United Kingdom,United Kingdom,rock; pop; alternative rock; british; uk; brit...,rock; alternative; britpop; alternative rock; ...,5381567.0,360111850.0,False
a74b1b7f-71a5-4011-9441-d0b5e4122711,Radiohead,Radiohead,United Kingdom,United Kingdom,rock; electronic; alternative rock; british; g...,alternative; alternative rock; rock; indie; el...,4732528.0,499548797.0,False


In [29]:
df1 = pd.merge(df1, SecondArtists[['track_id', 'second_artist']], on='track_id', how='left')


In [30]:
df1 = pd.merge(df1, artist_detail[['country_mb', 'artist_mb']], how = 'left',
                left_on = 'art_name', right_on = 'artist_mb').drop(columns = ['artist_mb'])


In [31]:
df1.head()

Unnamed: 0,track_id,track_name,track_year,track_spotify_genre,art_name,art_id,track_uri,track_pop,key,acousticness,...,mode,speechiness,tempo,time_signature,valence,album_release_date,popular_year,Top100,second_artist,country_mb
0,3yfqSUWxFvZELEM4PmlwIR,The Real Slim Shady,2000,hip-hop,Eminem,7dGJo4pcD2V6oG8kP0tJRR,spotify:track:3yfqSUWxFvZELEM4PmlwIR,88,5.0,0.0302,...,0.0,0.0572,104.504,4.0,0.76,2000-05-23,2000.0,Y,,United States
1,0I3q5fE6wg7LIfHGngUTnV,Ms. Jackson,2000,hip-hop,Outkast,1G9G7WwrXka3Z1r7aIDjI7,spotify:track:0I3q5fE6wg7LIfHGngUTnV,82,4.0,0.143,...,0.0,0.269,94.948,4.0,0.613,2000-10-31,2001.0,Y,,
2,3UmaczJpikHgJFyBTAJVoz,Stan,2000,hip-hop,Eminem,7dGJo4pcD2V6oG8kP0tJRR,spotify:track:3UmaczJpikHgJFyBTAJVoz,85,6.0,0.0371,...,0.0,0.238,80.063,4.0,0.507,2000-05-23,2001.0,Y,,United States
3,3Gf5nttwcX9aaSQXRWidEZ,Ride Wit Me,2000,hip-hop,Nelly,2gBjLmx6zQnFGQJCAQpRgw,spotify:track:3Gf5nttwcX9aaSQXRWidEZ,75,7.0,0.0616,...,1.0,0.0478,101.875,4.0,0.722,2000-06-27,2001.0,Y,,United States
4,3Gf5nttwcX9aaSQXRWidEZ,Ride Wit Me,2000,hip-hop,Nelly,2gBjLmx6zQnFGQJCAQpRgw,spotify:track:3Gf5nttwcX9aaSQXRWidEZ,75,7.0,0.0616,...,1.0,0.0478,101.875,4.0,0.722,2000-06-27,2001.0,Y,,


In [32]:
# there are a number of songs without audio information.  drop all values that don't have acousticness (could have used any variable)
df1 = df1[df1['acousticness'].notna()]
# note this leaves us with 1,863 observations that were Top100

In [33]:
df1.Top100.value_counts()

Y    2751
Name: Top100, dtype: int64

In [34]:
# load data for collaborations
collab_columns = pd.read_csv("../Data/collab_columns.csv")

In [35]:
# merge collaborations data
df1 = df1.merge(collab_columns[['feat', 'Feat2', 'track_id']], how = 'left',
                left_on = 'track_id', right_on = 'track_id')

In [36]:
# load data for music labels
label_detail = pd.read_csv("../Data/album_ratings.csv")

In [37]:
df1.head()

Unnamed: 0,track_id,track_name,track_year,track_spotify_genre,art_name,art_id,track_uri,track_pop,key,acousticness,...,tempo,time_signature,valence,album_release_date,popular_year,Top100,second_artist,country_mb,feat,Feat2
0,3yfqSUWxFvZELEM4PmlwIR,The Real Slim Shady,2000,hip-hop,Eminem,7dGJo4pcD2V6oG8kP0tJRR,spotify:track:3yfqSUWxFvZELEM4PmlwIR,88,5.0,0.0302,...,104.504,4.0,0.76,2000-05-23,2000.0,Y,,United States,False,False
1,0I3q5fE6wg7LIfHGngUTnV,Ms. Jackson,2000,hip-hop,Outkast,1G9G7WwrXka3Z1r7aIDjI7,spotify:track:0I3q5fE6wg7LIfHGngUTnV,82,4.0,0.143,...,94.948,4.0,0.613,2000-10-31,2001.0,Y,,,False,False
2,3UmaczJpikHgJFyBTAJVoz,Stan,2000,hip-hop,Eminem,7dGJo4pcD2V6oG8kP0tJRR,spotify:track:3UmaczJpikHgJFyBTAJVoz,85,6.0,0.0371,...,80.063,4.0,0.507,2000-05-23,2001.0,Y,,United States,False,False
3,3Gf5nttwcX9aaSQXRWidEZ,Ride Wit Me,2000,hip-hop,Nelly,2gBjLmx6zQnFGQJCAQpRgw,spotify:track:3Gf5nttwcX9aaSQXRWidEZ,75,7.0,0.0616,...,101.875,4.0,0.722,2000-06-27,2001.0,Y,,United States,False,False
4,3Gf5nttwcX9aaSQXRWidEZ,Ride Wit Me,2000,hip-hop,Nelly,2gBjLmx6zQnFGQJCAQpRgw,spotify:track:3Gf5nttwcX9aaSQXRWidEZ,75,7.0,0.0616,...,101.875,4.0,0.722,2000-06-27,2001.0,Y,,,False,False


In [38]:
# merge label data
df1 = df1.merge(label_detail['Label'], how = 'left',
                left_on = 'art_name', right_on = label_detail['Artist'])

In [39]:
df1.drop_duplicates(subset='track_id', keep='first', inplace=True)

## With all of the datasets combined, we need to do some cleaning.  I noticed earlier that there are duplicate track_ids, track_uri, genres per song

In [40]:
# there are a number of songs without critical information.  drop all values that don't have an art_name
df1 = df1[df1['art_name'].notna()]
# note this leaves us with 1,903 observations that were Top100

In [41]:
# there are a number of songs without audio information.  drop all values that don't have acousticness (could have used any variable)
df1 = df1[df1['acousticness'].notna()]
# note this leaves us with 1,863 observations that were Top100

In [42]:
#df3.isna().sum()

In [43]:
df1.head(2)

Unnamed: 0,track_id,track_name,track_year,track_spotify_genre,art_name,art_id,track_uri,track_pop,key,acousticness,...,time_signature,valence,album_release_date,popular_year,Top100,second_artist,country_mb,feat,Feat2,Label
0,3yfqSUWxFvZELEM4PmlwIR,The Real Slim Shady,2000,hip-hop,Eminem,7dGJo4pcD2V6oG8kP0tJRR,spotify:track:3yfqSUWxFvZELEM4PmlwIR,88,5.0,0.0302,...,4.0,0.76,2000-05-23,2000.0,Y,,United States,False,False,"Aftermath, Interscope, Shady"
12,0I3q5fE6wg7LIfHGngUTnV,Ms. Jackson,2000,hip-hop,Outkast,1G9G7WwrXka3Z1r7aIDjI7,spotify:track:0I3q5fE6wg7LIfHGngUTnV,82,4.0,0.143,...,4.0,0.613,2000-10-31,2001.0,Y,,,False,False,


In [44]:
#drop some columns that we don't need
df1 = df1.drop(['album_release_date', 'track_uri', 'art_id', 'track_year'], axis=1)

In [45]:
df1.head()

Unnamed: 0,track_id,track_name,track_spotify_genre,art_name,track_pop,key,acousticness,danceability,duration_ms,energy,...,tempo,time_signature,valence,popular_year,Top100,second_artist,country_mb,feat,Feat2,Label
0,3yfqSUWxFvZELEM4PmlwIR,The Real Slim Shady,hip-hop,Eminem,88,5.0,0.0302,0.949,284200.0,0.661,...,104.504,4.0,0.76,2000.0,Y,,United States,False,False,"Aftermath, Interscope, Shady"
12,0I3q5fE6wg7LIfHGngUTnV,Ms. Jackson,hip-hop,Outkast,82,4.0,0.143,0.843,270507.0,0.806,...,94.948,4.0,0.613,2001.0,Y,,,False,False,
13,3UmaczJpikHgJFyBTAJVoz,Stan,hip-hop,Eminem,85,6.0,0.0371,0.78,404107.0,0.768,...,80.063,4.0,0.507,2001.0,Y,,United States,False,False,"Aftermath, Interscope, Shady"
25,3Gf5nttwcX9aaSQXRWidEZ,Ride Wit Me,hip-hop,Nelly,75,7.0,0.0616,0.85,291782.0,0.7,...,101.875,4.0,0.722,2001.0,Y,,United States,False,False,Universal Motown
43,6glsMWIMIxQ4BedzLqGVi4,"So Fresh, So Clean",hip-hop,Outkast,72,5.0,0.0281,0.725,240027.0,0.64,...,166.028,3.0,0.915,,,,,False,False,


In [46]:
df1 = df1.rename(columns = {'track_pop':'track_popularity', 'track_spotify_genre':'track_genre', 'mode':'modality', 'art_name':'artist_name', 'country_mb':'artist_country'})

In [47]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 255096 entries, 0 to 1063084
Data columns (total 25 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   track_id          255096 non-null  object 
 1   track_name        255096 non-null  object 
 2   track_genre       255096 non-null  object 
 3   artist_name       255096 non-null  object 
 4   track_popularity  255096 non-null  int64  
 5   key               255096 non-null  float64
 6   acousticness      255096 non-null  float64
 7   danceability      255096 non-null  float64
 8   duration_ms       255096 non-null  float64
 9   energy            255096 non-null  float64
 10  instrumentalness  255096 non-null  float64
 11  liveness          255096 non-null  float64
 12  loudness          255096 non-null  float64
 13  modality          255096 non-null  float64
 14  speechiness       255096 non-null  float64
 15  tempo             255096 non-null  float64
 16  time_signature    2

In [48]:
df1.nunique()

track_id            255096
track_name          188539
track_genre             15
artist_name          22772
track_popularity        95
key                     12
acousticness          5032
danceability          1314
duration_ms          84943
energy                2729
instrumentalness      5401
liveness              1753
loudness             30094
modality                 2
speechiness           1545
tempo                89703
time_signature           5
valence               1710
popular_year            22
Top100                   1
second_artist         6883
artist_country         131
feat                     3
Feat2                    3
Label                 2087
dtype: int64

In [49]:
df1.isna().sum()

track_id                 0
track_name               0
track_genre              0
artist_name              0
track_popularity         0
key                      0
acousticness             0
danceability             0
duration_ms              0
energy                   0
instrumentalness         0
liveness                 0
loudness                 0
modality                 0
speechiness              0
tempo                    0
time_signature           0
valence                  0
popular_year        253233
Top100              253233
second_artist       230971
artist_country       71786
feat                     0
Feat2                    0
Label               153863
dtype: int64

In [50]:
display(df1.loc[(df1['artist_country'].notnull()) & (df1['artist_name'] == 'Drake')])

Unnamed: 0,track_id,track_name,track_genre,artist_name,track_popularity,key,acousticness,danceability,duration_ms,energy,...,tempo,time_signature,valence,popular_year,Top100,second_artist,artist_country,feat,Feat2,Label
433183,3QLjDkgLh9AOEHlhQtDuhs,Best I Ever Had,hip-hop,Drake,77,1.0,0.190000,0.395,257690.0,0.922,...,162.361,4.0,0.5580,2009.0,Y,,Canada,FALSE,FALSE,Cash Money
433368,5aooThgj87ID0vgiyvcjPM,I'm Goin In,hip-hop,Drake,63,1.0,0.038900,0.662,225307.0,0.763,...,170.031,4.0,0.5660,,,,Canada,FALSE,FALSE,Cash Money
433525,0JaVdpmiex2EP7bBzyKVTa,November 18th,hip-hop,Drake,60,11.0,0.579000,0.749,188189.0,0.476,...,71.937,4.0,0.5340,,,,Canada,FALSE,FALSE,Cash Money
433549,524wvipGqxPKYWxkjf9y46,Little Bit (feat. Lykke Li),hip-hop,Drake,61,11.0,0.099900,0.758,230484.0,0.888,...,120.392,4.0,0.7710,,,,Canada,Y,FALSE,Cash Money
433577,4i3GraNMzBKze1WsVl38DS,Bria's Interlude (feat. Omarion),hip-hop,Drake,60,2.0,0.209000,0.490,139575.0,0.507,...,147.037,3.0,0.2370,,,,Canada,Y,FALSE,Cash Money
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1005511,3mDFLytDotXo2p0rvfGbkA,What's Next,hip-hop,Drake,54,0.0,0.012500,0.777,178154.0,0.580,...,129.918,4.0,0.0636,,,,Canada,FALSE,FALSE,Cash Money
1005568,6ZoZ4KGIDD23DohdVk0Ybw,Wants and Needs (feat. Lil Baby),hip-hop,Drake,54,7.0,0.057400,0.588,194090.0,0.412,...,136.068,4.0,0.1210,,,,Canada,Y,FALSE,Cash Money
1005958,7wcWkzT1X75DguAwOWxlGt,Way 2 Sexy (with Future & Young Thug),hip-hop,Drake,51,11.0,0.000722,0.803,257605.0,0.596,...,136.023,4.0,0.3260,,,,Canada,FALSE,FALSE,Cash Money
1016484,1PDP7mLiAMwhfmgIwzhOm2,Yebba’s Heartbreak,pop,Drake,78,8.0,0.967000,0.476,133763.0,0.161,...,119.614,4.0,0.0908,,,,Canada,FALSE,FALSE,Cash Money


There appear to be numerous artists named "Drake", to my suprise.

In [51]:
df1[df1.artist_country.notnull()]

Unnamed: 0,track_id,track_name,track_genre,artist_name,track_popularity,key,acousticness,danceability,duration_ms,energy,...,tempo,time_signature,valence,popular_year,Top100,second_artist,artist_country,feat,Feat2,Label
0,3yfqSUWxFvZELEM4PmlwIR,The Real Slim Shady,hip-hop,Eminem,88,5.0,0.03020,0.949,284200.0,0.661,...,104.504,4.0,0.760,2000.0,Y,,United States,FALSE,FALSE,"Aftermath, Interscope, Shady"
13,3UmaczJpikHgJFyBTAJVoz,Stan,hip-hop,Eminem,85,6.0,0.03710,0.780,404107.0,0.768,...,80.063,4.0,0.507,2001.0,Y,,United States,FALSE,FALSE,"Aftermath, Interscope, Shady"
25,3Gf5nttwcX9aaSQXRWidEZ,Ride Wit Me,hip-hop,Nelly,75,7.0,0.06160,0.850,291782.0,0.700,...,101.875,4.0,0.722,2001.0,Y,,United States,FALSE,FALSE,Universal Motown
44,23wfXwnsPZYe5A1xXRHb3J,The Way I Am,hip-hop,Eminem,75,4.0,0.14400,0.785,290427.0,0.871,...,87.248,4.0,0.315,,,,United States,FALSE,FALSE,"Aftermath, Interscope, Shady"
56,3wMUvT6eIw2L5cZFG1yH9j,Country Grammar (Hot Shit),hip-hop,Nelly,69,2.0,0.00689,0.865,287000.0,0.664,...,162.831,4.0,0.565,2000.0,Y,,United States,FALSE,FALSE,Universal Motown
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1063067,5kJKOO7eYN9PqHi2An3lRj,So-Called Lover,blues,Samantha Fish,21,7.0,0.00582,0.429,185760.0,0.993,...,170.074,4.0,0.704,,,,United States,FALSE,FALSE,Ruf
1063069,2ByPBH09DXKl9KiWMQ3w7u,Look Alive,blues,Black Pistol Fire,23,9.0,0.02180,0.677,192011.0,0.675,...,94.013,4.0,0.325,,,,United States,FALSE,FALSE,Rifle Bird
1063070,1bAAzNDtN9VMcYrxCbOUsw,Love is Gonna Come at Last,blues,Badfinger,23,2.0,0.04750,0.555,218649.0,0.772,...,111.037,4.0,0.745,,,,United Kingdom,FALSE,FALSE,Apple
1063073,5R61oQZBCkNZGSCnnIj7yo,You Wonder,blues,Dinosaur Jr.,23,4.0,0.07780,0.586,183173.0,0.690,...,114.498,4.0,0.509,,,,United States,FALSE,FALSE,


In [52]:
len(df1)

255096

In [53]:
# there are many duplicate track_ids - let's eliminate any duplicates
df1.drop_duplicates(subset='track_id', keep='first', inplace=True)

In [54]:
len(df1)

255096

In [55]:
display(df1.loc[(df1['artist_country'].notnull()) & (df1['artist_name'] == 'Drake') & (df1['artist_country'] != 'Canada')])

Unnamed: 0,track_id,track_name,track_genre,artist_name,track_popularity,key,acousticness,danceability,duration_ms,energy,...,tempo,time_signature,valence,popular_year,Top100,second_artist,artist_country,feat,Feat2,Label


In [56]:
df1['popular_year'] = df1['popular_year'].fillna(0)
df1['popular_year'] = df1['popular_year'].astype(int)


In [57]:
df1.Top100 = df1.Top100.fillna('N')

In [58]:
df1.head(10)

Unnamed: 0,track_id,track_name,track_genre,artist_name,track_popularity,key,acousticness,danceability,duration_ms,energy,...,tempo,time_signature,valence,popular_year,Top100,second_artist,artist_country,feat,Feat2,Label
0,3yfqSUWxFvZELEM4PmlwIR,The Real Slim Shady,hip-hop,Eminem,88,5.0,0.0302,0.949,284200.0,0.661,...,104.504,4.0,0.76,2000,Y,,United States,False,FALSE,"Aftermath, Interscope, Shady"
12,0I3q5fE6wg7LIfHGngUTnV,Ms. Jackson,hip-hop,Outkast,82,4.0,0.143,0.843,270507.0,0.806,...,94.948,4.0,0.613,2001,Y,,,False,FALSE,
13,3UmaczJpikHgJFyBTAJVoz,Stan,hip-hop,Eminem,85,6.0,0.0371,0.78,404107.0,0.768,...,80.063,4.0,0.507,2001,Y,,United States,False,FALSE,"Aftermath, Interscope, Shady"
25,3Gf5nttwcX9aaSQXRWidEZ,Ride Wit Me,hip-hop,Nelly,75,7.0,0.0616,0.85,291782.0,0.7,...,101.875,4.0,0.722,2001,Y,,United States,False,FALSE,Universal Motown
43,6glsMWIMIxQ4BedzLqGVi4,"So Fresh, So Clean",hip-hop,Outkast,72,5.0,0.0281,0.725,240027.0,0.64,...,166.028,3.0,0.915,0,N,,,False,FALSE,
44,23wfXwnsPZYe5A1xXRHb3J,The Way I Am,hip-hop,Eminem,75,4.0,0.144,0.785,290427.0,0.871,...,87.248,4.0,0.315,0,N,,United States,False,FALSE,"Aftermath, Interscope, Shady"
56,3wMUvT6eIw2L5cZFG1yH9j,Country Grammar (Hot Shit),hip-hop,Nelly,69,2.0,0.00689,0.865,287000.0,0.664,...,162.831,4.0,0.565,2000,Y,,United States,False,FALSE,Universal Motown
74,5eGEc27nnhtmcOh6RC890a,Kill You,hip-hop,Eminem,70,11.0,0.155,0.897,264400.0,0.559,...,106.991,4.0,0.677,0,N,,United States,False,FALSE,"Aftermath, Interscope, Shady"
86,7lgvU7WnwcNZ8dbGUz8yhP,Bitch Please II,hip-hop,Eminem,72,1.0,0.0783,0.918,288200.0,0.586,...,95.517,4.0,0.779,0,N,,United States,False,FALSE,"Aftermath, Interscope, Shady"
98,4LwOrnuxJwR7C5Sw4liY4Z,What's Your Fantasy (Featuring Shawna),hip-hop,Ludacris,63,6.0,0.16,0.915,275907.0,0.79,...,140.095,4.0,0.967,0,N,,United States,False,Y,


In [59]:
df1.shape

(255096, 25)

## Collaboration assignment section

In [60]:
df1['collaboration'] = "N"

In [61]:
df1.collaboration.value_counts()

N    255096
Name: collaboration, dtype: int64

In [62]:
df1.loc[df1.second_artist.isna() == False, 'collaboration'] = "Y"

In [63]:
df1.loc[df1["feat"] == "Y", "collaboration"] = "Y"

In [64]:
df1.loc[df1["Feat2"] == "Y", "collaboration"] = "Y"

In [65]:
df1.collaboration.value_counts()

N    220634
Y     34462
Name: collaboration, dtype: int64

In [66]:
#drop collab columns since don't need them anymore
df1 = df1.drop(['feat', 'Feat2'], axis=1)

In [67]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 255096 entries, 0 to 1063084
Data columns (total 24 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   track_id          255096 non-null  object 
 1   track_name        255096 non-null  object 
 2   track_genre       255096 non-null  object 
 3   artist_name       255096 non-null  object 
 4   track_popularity  255096 non-null  int64  
 5   key               255096 non-null  float64
 6   acousticness      255096 non-null  float64
 7   danceability      255096 non-null  float64
 8   duration_ms       255096 non-null  float64
 9   energy            255096 non-null  float64
 10  instrumentalness  255096 non-null  float64
 11  liveness          255096 non-null  float64
 12  loudness          255096 non-null  float64
 13  modality          255096 non-null  float64
 14  speechiness       255096 non-null  float64
 15  tempo             255096 non-null  float64
 16  time_signature    2

# previous top 100 section

This section is intended to decipher if an artist has previously had a 

In [68]:
df1.artist_name.value_counts()

Various Artists          14068
Johann Sebastian Bach      845
Annie Cordy                771
Randy Newman               728
Hillsong Worship           726
                         ...  
Kae Tempest                  1
Dave Mack                    1
Earl Bostic                  1
Kyan                         1
Bob Margolin                 1
Name: artist_name, Length: 22772, dtype: int64

In [69]:
df1['prevTop100'] = 'N'

In [70]:
df1[df1['artist_name'] == 'Eminem']

Unnamed: 0,track_id,track_name,track_genre,artist_name,track_popularity,key,acousticness,danceability,duration_ms,energy,...,tempo,time_signature,valence,popular_year,Top100,second_artist,artist_country,Label,collaboration,prevTop100
0,3yfqSUWxFvZELEM4PmlwIR,The Real Slim Shady,hip-hop,Eminem,88,5.0,0.0302,0.949,284200.0,0.661,...,104.504,4.0,0.760,2000,Y,,United States,"Aftermath, Interscope, Shady",N,N
13,3UmaczJpikHgJFyBTAJVoz,Stan,hip-hop,Eminem,85,6.0,0.0371,0.780,404107.0,0.768,...,80.063,4.0,0.507,2001,Y,,United States,"Aftermath, Interscope, Shady",N,N
44,23wfXwnsPZYe5A1xXRHb3J,The Way I Am,hip-hop,Eminem,75,4.0,0.1440,0.785,290427.0,0.871,...,87.248,4.0,0.315,0,N,,United States,"Aftermath, Interscope, Shady",N,N
74,5eGEc27nnhtmcOh6RC890a,Kill You,hip-hop,Eminem,70,11.0,0.1550,0.897,264400.0,0.559,...,106.991,4.0,0.677,0,N,,United States,"Aftermath, Interscope, Shady",N,N
86,7lgvU7WnwcNZ8dbGUz8yhP,Bitch Please II,hip-hop,Eminem,72,1.0,0.0783,0.918,288200.0,0.586,...,95.517,4.0,0.779,0,N,,United States,"Aftermath, Interscope, Shady",N,N
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1002424,3QVHSV8YKYq4L8tI5rnFgj,Stepdad,hip-hop,Eminem,55,1.0,0.0180,0.779,213413.0,0.805,...,145.680,4.0,0.557,0,N,,United States,"Aftermath, Interscope, Shady",N,N
1002492,3CJbxqRQ0JNCqboWDNUUeX,"I Will (feat. KXNG Crooked, Royce Da 5'9"" & Jo...",hip-hop,Eminem,54,8.0,0.0454,0.635,303000.0,0.543,...,98.743,4.0,0.036,0,N,,United States,"Aftermath, Interscope, Shady",Y,N
1002504,086LXfSk4U5XEALardCE7k,These Demons (feat. MAJ),hip-hop,Eminem,54,1.0,0.0541,0.862,207320.0,0.789,...,138.918,4.0,0.472,0,N,,United States,"Aftermath, Interscope, Shady",Y,N
1002820,7ccTcabbJlCJiIqtrSSwBk,Premonition - Intro,hip-hop,Eminem,54,1.0,0.0258,0.847,173893.0,0.637,...,165.979,4.0,0.511,0,N,,United States,"Aftermath, Interscope, Shady",N,N


#### what I need to accomplish:

for each row (track), determine if the artist of that track has previously had a track with a Top100 = "Y".  If this is true, then I want to populate the "prevTop100" column with a "Y". Note that this should only occur for Top100 songs that are not the first Top Hit for the artist (e.g. their first Top100 hit should have a "prevTop100" value of N).

In the Britney Spears example below, the first two tracks, which were in the year 2000, would have prevTop100 values of "N", while all of the tracks in 2001 and forward would have a prevTop100 value of "Y".
    


In [71]:
#df1[df1['artist_name'] == 'Eminem' & df1['Top100'] == "Y"]
display(df1.loc[(df1['artist_name'] == 'Britney Spears') & (df1['Top100'] == 'Y')])

Unnamed: 0,track_id,track_name,track_genre,artist_name,track_popularity,key,acousticness,danceability,duration_ms,energy,...,tempo,time_signature,valence,popular_year,Top100,second_artist,artist_country,Label,collaboration,prevTop100
33729,6naxalmIoLFWR0siv8dnQQ,Oops!...I Did It Again,pop,Britney Spears,78,1.0,0.3,0.751,211160.0,0.834,...,95.053,4.0,0.894,2000,Y,,United States,Jive,N,N
33819,2TTYIwTM2iLC1YOyHuhRMt,Lucky,pop,Britney Spears,67,8.0,0.262,0.765,206227.0,0.791,...,95.026,4.0,0.966,2000,Y,,United States,Jive,N,N
38079,6ldwfK0yWgTAlmIfuQkTYN,I'm a Slave 4 U,pop,Britney Spears,69,5.0,0.415,0.847,203600.0,0.843,...,110.027,4.0,0.963,2001,Y,,United States,Jive,N,N
38717,5p0KCAuu5nrHpTuAIhHdLw,Overprotected - Radio Edit,pop,Britney Spears,63,0.0,0.0381,0.682,198600.0,0.894,...,95.992,4.0,0.845,2002,Y,,United States,Jive,N,N
39198,1xg1yGPy1Y3YUf44s6TB1o,"I'm Not a Girl, Not Yet a Woman",pop,Britney Spears,60,3.0,0.579,0.534,231067.0,0.543,...,78.996,4.0,0.418,2002,Y,,United States,Jive,N,N
146115,6I9VzXrHxO9rA9A5euc8Ak,Toxic,pop,Britney Spears,82,5.0,0.0249,0.774,198800.0,0.838,...,143.04,4.0,0.924,2004,Y,,United States,Jive,N,N
146831,7mS8RbJji2UZAaguRGsOCH,Me Against the Music (feat. Madonna) - LP Vers...,pop,Britney Spears,60,6.0,0.32,0.804,223773.0,0.836,...,120.046,4.0,0.85,2003,Y,,United States,Jive,Y,N
358656,6ic8OlLUNEATToEFU3xmaH,Gimme More,pop,Britney Spears,79,2.0,0.25,0.788,251240.0,0.844,...,113.324,4.0,0.382,2007,Y,,United States,Jive,N,N
359239,2EvwLVrnYbCZEG6Kx5DCRy,Piece of Me,pop,Britney Spears,64,11.0,0.0902,0.769,212107.0,0.638,...,115.007,4.0,0.782,2008,Y,,United States,Jive,N,N
359375,52K4Nl7eVNqUpUeJeWJlwT,Break the Ice,pop,Britney Spears,61,5.0,0.689,0.712,196053.0,0.911,...,117.533,4.0,0.85,2008,Y,,United States,Jive,N,N


In [72]:
df1[df1['Top100'] == 'Y']

Unnamed: 0,track_id,track_name,track_genre,artist_name,track_popularity,key,acousticness,danceability,duration_ms,energy,...,tempo,time_signature,valence,popular_year,Top100,second_artist,artist_country,Label,collaboration,prevTop100
0,3yfqSUWxFvZELEM4PmlwIR,The Real Slim Shady,hip-hop,Eminem,88,5.0,0.03020,0.949,284200.0,0.661,...,104.504,4.0,0.760,2000,Y,,United States,"Aftermath, Interscope, Shady",N,N
12,0I3q5fE6wg7LIfHGngUTnV,Ms. Jackson,hip-hop,Outkast,82,4.0,0.14300,0.843,270507.0,0.806,...,94.948,4.0,0.613,2001,Y,,,,N,N
13,3UmaczJpikHgJFyBTAJVoz,Stan,hip-hop,Eminem,85,6.0,0.03710,0.780,404107.0,0.768,...,80.063,4.0,0.507,2001,Y,,United States,"Aftermath, Interscope, Shady",N,N
25,3Gf5nttwcX9aaSQXRWidEZ,Ride Wit Me,hip-hop,Nelly,75,7.0,0.06160,0.850,291782.0,0.700,...,101.875,4.0,0.722,2001,Y,,United States,Universal Motown,N,N
56,3wMUvT6eIw2L5cZFG1yH9j,Country Grammar (Hot Shit),hip-hop,Nelly,69,2.0,0.00689,0.865,287000.0,0.664,...,162.831,4.0,0.565,2000,Y,,United States,Universal Motown,N,N
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1017965,3eZYOQO4UzKrUDYDghtnFw,test drive,pop,Ariana Grande,65,9.0,0.02510,0.698,122174.0,0.708,...,115.036,4.0,0.322,2021,Y,,United States,Republic,N,N
1018611,4w47S36wQGBhGg073q3nt7,TKN (feat. Travis Scott),r&b,ROSALÍA,70,0.0,0.19200,0.776,129371.0,0.509,...,94.634,4.0,0.485,2020,Y,Travis Scott,Spain,,Y,N
1043432,1raaNykBg1bDnWENUiglUA,Break My Heart,dance,Dua Lipa,67,4.0,0.16700,0.730,221820.0,0.729,...,113.012,4.0,0.467,2020,Y,,United Kingdom,Warner,N,N
1045196,0BI0hfbmqybnd3TezrDME3,Met Him Last Night (feat. Ariana Grande),dance,Demi Lovato,60,4.0,0.22000,0.538,204632.0,0.512,...,144.978,4.0,0.120,2021,Y,,United States,Hollywood,Y,N


In [76]:
# create the dataframe to compare against
top100df = df1[df1['Top100'] == 'Y']
top100df.head(4)


Unnamed: 0,track_id,track_name,track_genre,artist_name,track_popularity,key,acousticness,danceability,duration_ms,energy,...,tempo,time_signature,valence,popular_year,Top100,second_artist,artist_country,Label,collaboration,prevTop100
0,3yfqSUWxFvZELEM4PmlwIR,The Real Slim Shady,hip-hop,Eminem,88,5.0,0.0302,0.949,284200.0,0.661,...,104.504,4.0,0.76,2000,Y,,United States,"Aftermath, Interscope, Shady",N,N
12,0I3q5fE6wg7LIfHGngUTnV,Ms. Jackson,hip-hop,Outkast,82,4.0,0.143,0.843,270507.0,0.806,...,94.948,4.0,0.613,2001,Y,,,,N,N
13,3UmaczJpikHgJFyBTAJVoz,Stan,hip-hop,Eminem,85,6.0,0.0371,0.78,404107.0,0.768,...,80.063,4.0,0.507,2001,Y,,United States,"Aftermath, Interscope, Shady",N,N
25,3Gf5nttwcX9aaSQXRWidEZ,Ride Wit Me,hip-hop,Nelly,75,7.0,0.0616,0.85,291782.0,0.7,...,101.875,4.0,0.722,2001,Y,,United States,Universal Motown,N,N


In [84]:
# in this first for loop, I'm trying to create a "temporary" dataframe that has each artist with a Top100 hit, and specifically find the year of their first Top100 hit
for artist in top100df['artist_name']:
    #temp_df = (top100df[top100df['artist_name'] == artist] & top100df[top100df['popular_year']].min())
    temp_df = df1.loc[(df1['artist_name'] == artist) & (df1['popular_year'].min())]
    print(temp_df)

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist_country, Label, collaboration, prevTop100]
Index: []

[0 rows x 25 columns]
Empty DataFrame
Columns: [track_id, track_name, track_genre, artist_name, track_popularity, key, acousticness, danceability, duration_ms, energy, instrumentalness, liveness, loudness, modality, speechiness, tempo, time_signature, valence, popular_year, Top100, second_artist, artist

In [None]:
# once I'm able to get that for loop right, I need to be able to 

In [74]:
top100df.Top100.value_counts()

Y    1863
Name: Top100, dtype: int64

In [75]:
for row in df1['prevTop100']:
    if df1['artist_name'] in top100df['artist_name']:
        print('yes')
    else:
        print('no')

TypeError: unhashable type: 'Series'

In [None]:
# for row in df6['prev_Top100']:
#     if df6['Top100'] = 'N':
#         skip
#     else:    
#         if artist = df6["artist_name"]
    

In [None]:
for row in df1['prev_Top100']:
    artist = df1['artist_name']
    print(artist)
    #comp_df = df6.loc[(df6['artist_name'] == (artist))]
    #comp_df = df6.loc[(df6['artist_name'] == (artist)) & (df6['popular_year'] == 'Y')]
    #print(comp_df)

#### subset dataframe on the fly approach


#### create dictionary approach
almost like if for each artist, you created a dictionary of years that they previously had a top 100, you could look up that list

one approach: 
embedded for loop where you look at previous dataframe

another approach:
join between two dataframes



#### write to CSV
from pathlib import Path  
filepath = Path('/Users/gregwelliver/Desktop/springboard_files/Capstone Two - Music/Data/Merged_Data7.csv')  
filepath.parent.mkdir(parents=True, exist_ok=True)  
df7.to_csv(filepath)

### Reference docs

spotipy documentation
https://spotipy.readthedocs.io/en/latest/#installation

google sheet with links to top song playlists: https://docs.google.com/spreadsheets/d/10gRoHKIB1RFfxSvb83gzYkV33x5eMMI1RZoXSamFwPI/edit#gid=824975682

github for project that I referenced to pull data: https://github.com/ekatnic/SpotifyApiExploration/blob/master/Spotify_Generate_Tracks.ipynb

Proposal: https://docs.google.com/document/d/1rRzJV0hOaNMsBIjji8v1Cv8C3QUuZ6lUvYokZ5dif-A/edit