# Gnod project: making a song recommender

In this notebook I do the following:
 - Build a dataframe that contains song's name and artist of hot100 Billboard songs
 - Create a first version of the recommender
 - Build a big dataframe with different songs and their audio features

In [1]:
# !pip install requests
from bs4 import BeautifulSoup
import requests
import pandas as pd
from random import randint

In [2]:
# find url and store it in a variable
url = "http://www.popvortex.com/music/charts/top-100-songs.php"

In [3]:
# download html with a get request
response = requests.get(url)

In [4]:
response.status_code # 200 status code means OK!

200

In [5]:
# response.content

In [6]:
# parse html (create the 'soup')
soup = BeautifulSoup(response.content, "html.parser")

In [7]:
# check that the html code looks like it should
# soup

In [8]:
# body > div.container > div:nth-child(4) > div.col-xs-12.col-md-8 > div.chart-wrapper

In [9]:
# retrieve/extract the desired info 
# soup.select("body > div.container > div:nth-child(4) > div.col-xs-12.col-md-8 > div.chart-wrapper")

In [10]:
cite = soup.select("div.chart-content.col-xs-12.col-sm-8 > p > cite")

In [11]:
len(cite)

100

In [12]:
title = []  

for i in cite:
    title.append(i.get_text())  
print(title)

["TEXAS HOLD 'EM", 'Lose Control', 'Beautiful Things', "TEXAS HOLD 'EM", 'dont let me go', 'Lovin On Me', 'Flowers', "Don't Let the Old Man In", 'Selfish', 'Turn the Lights Back On', 'I Remember Everything (feat. Kacey Musgraves)', 'Made For Me', 'Where the Wild Things Are', 'Fast Car', 'Beautiful Messes', 'Save Me', '16 CARRIAGES', 'Lil Boo Thang', 'Houdini', 'Live Like You Were Dying', 'Need a Favor', 'Fast Car', 'Save Me (with Lainey Wilson)', 'Powerful Women', 'Spin You Around (1/24)', 'Cruel Summer', 'Spicy Margarita', 'Training Season', 'Yeah! (feat. Lil Jon & Ludacris)', 'Good Day', 'Three Little Birds', 'White Horse', 'Wildflowers and Wild Horses (Single Version)', 'Thinkin’ Bout Me', "Should've Been a Cowboy", 'Pretty Little Poison', "Let's Go", 'Murder On the Dancefloor (Radio Edit)', 'Devil You Know', 'The Door', 'TRUCK BED', "Nothing's Gonna Stop Us Now", 'What Was I Made For? (From The Motion Picture "Barbie")', 'Stick Season', '23', 'The Sound of Silence (CYRIL Remix)', '

In [13]:
len(title)

100

In [14]:
em = soup.select("div.chart-content.col-xs-12.col-sm-8 > p > em")

In [15]:
len(em)

100

In [16]:
artist = []  

for i in em:
    artist.append(i.get_text())  
print(artist)

['Beyoncé', 'Teddy Swims', 'Benson Boone', 'Beyoncé', 'mgk', 'Jack Harlow', 'Miley Cyrus', 'Toby Keith', 'Justin Timberlake', 'Billy Joel', 'Zach Bryan', 'Muni Long', 'Luke Combs', 'Luke Combs', 'Hillary Scott & The Scott Family', 'Jelly Roll', 'Beyoncé', 'Paul Russell', 'Dua Lipa', 'Tim McGraw', 'Jelly Roll', 'Tracy Chapman', 'Jelly Roll', 'Pitbull & Dolly Parton', 'Morgan Wallen', 'Taylor Swift', 'Jason Derulo & Michael Bublé', 'Dua Lipa', 'USHER', 'Forrest Frank', 'Bob Marley & The Wailers', 'Chris Stapleton', 'Lainey Wilson', 'Morgan Wallen', 'Toby Keith', 'Warren Zeiders', 'Key Glock', 'Sophie Ellis-Bextor', 'Tyler Braden', 'Teddy Swims', 'HARDY', 'Starship', 'Billie Eilish', 'Noah Kahan', 'Chayce Beckham', 'Disturbed', 'Tyla', 'Jamey Johnson', 'Dua Lipa', 'Chris Stapleton', 'Jung Kook & USHER', 'Morgan Wallen', 'Stephen Sanchez', 'Jung Kook', 'Jung Kook & USHER', 'YG Marley', 'Lee Brice', 'Hank Williams, Jr.', 'Shinedown', 'Taylor Swift', 'Taylor Swift', 'Dax', 'Kelsey Hart', 'Th

In [17]:
len(artist)

100

In [18]:
# each list becomes a column
hot100 = pd.DataFrame({"title":title,
                           "artist":artist
                          })

In [19]:
hot100

Unnamed: 0,title,artist
0,TEXAS HOLD 'EM,Beyoncé
1,Lose Control,Teddy Swims
2,Beautiful Things,Benson Boone
3,TEXAS HOLD 'EM,Beyoncé
4,dont let me go,mgk
...,...,...
95,greedy,Tate McRae
96,In Case You Didn't Know,Brett Young
97,Is It Over Now? (Taylor's Version) [From The V...,Taylor Swift
98,How Do You Like Me Now?!,Toby Keith


In [20]:
hot100.to_csv('hot100.csv', sep=',', index=False)

## Creating a first version of the song recommender

In [21]:
# define a fuction to make all values lowercase
def lowervalue(x):
    return x.lower()

In [22]:
hot100['titlelower'] = hot100['title'].apply(lowervalue)
hot100['artistlower'] = hot100['artist'].apply(lowervalue)

In [23]:
hot100lower = hot100[['titlelower', 'artistlower']]

In [24]:
hot100lower

Unnamed: 0,titlelower,artistlower
0,texas hold 'em,beyoncé
1,lose control,teddy swims
2,beautiful things,benson boone
3,texas hold 'em,beyoncé
4,dont let me go,mgk
...,...,...
95,greedy,tate mcrae
96,in case you didn't know,brett young
97,is it over now? (taylor's version) [from the v...,taylor swift
98,how do you like me now?!,toby keith


In [25]:
hot100lower.to_csv('hot100lower.csv', sep=',', index=False)

### First version of the song recomender:

It recommends a song that is in the hot100 songs if the user inputs a song that is also in the hot100 songs, and it says "I have no recommendation at this time" if the user inputs a song that is not in the hot100 songs.

In [26]:
user_input = input("Enter a song's name: ")
user_input_lower_stripped = user_input.lower().strip()

if user_input_lower_stripped in list(hot100lower['titlelower']):
    print(hot100['title'][randint(0,99)])
else:
    print("I have no recommendation at this time")

Enter a song's name: flowers
Standing Next to You


## Creating a large dataset of songs

In [27]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

In [28]:
secrets_file = open("secrets.txt","r")

In [29]:
string = secrets_file.read()

In [30]:
# string

In [31]:
# string.split('\n')

In [32]:
secrets_dict={}
for line in string.split('\n'):
    if len(line) > 0:
#         print(line.split(':'))
        secrets_dict[line.split(':')[0]]=line.split(':')[1].strip()

In [33]:
# secrets_dict

### Authentication with secrets

In [34]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

# initialize SpotiPy with user credentials
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=secrets_dict['clientid'],
                                                           client_secret=secrets_dict['clientsecret']))

### Getting all tracks from each playlist

In [35]:
from random import randint
from time import sleep

# define a function that get out all tracks of a playlist
def get_playlist_tracks(playlist_id):
    results = sp.user_playlist_tracks("spotify",playlist_id)
    tracks = results['items']
    while results['next']!=None:
        results = sp.next(results)
        tracks = tracks + results['items']
        sleep(randint(1,3000)/1000) # respectful nap
    return tracks

In [36]:
# apply the function
pablo_playlist = get_playlist_tracks("5S8SJdl1BDc0ugpkEvFsIL")

In [37]:
len(pablo_playlist)

10000

In [38]:
from pandas import json_normalize

# normalize
pablo_tracks = json_normalize(pablo_playlist)
pablo_tracks.head()

Unnamed: 0,added_at,is_local,primary_color,added_by.external_urls.spotify,added_by.href,added_by.id,added_by.type,added_by.uri,track.album.album_type,track.album.artists,...,track.id,track.is_local,track.name,track.popularity,track.preview_url,track.track,track.track_number,track.type,track.uri,video_thumbnail.url
0,2017-11-20T02:52:18Z,False,,https://open.spotify.com/user/twgeb7mzdcv4u8h1...,https://api.spotify.com/v1/users/twgeb7mzdcv4u...,twgeb7mzdcv4u8h191dxrvlpc,user,spotify:user:twgeb7mzdcv4u8h191dxrvlpc,single,[{'external_urls': {'spotify': 'https://open.s...,...,7zgqtptZvhf8GEmdsM2vp2,False,...Ready For It?,0,,True,1,track,spotify:track:7zgqtptZvhf8GEmdsM2vp2,
1,2017-11-20T02:42:15Z,False,,https://open.spotify.com/user/twgeb7mzdcv4u8h1...,https://api.spotify.com/v1/users/twgeb7mzdcv4u...,twgeb7mzdcv4u8h191dxrvlpc,user,spotify:user:twgeb7mzdcv4u8h191dxrvlpc,album,[{'external_urls': {'spotify': 'https://open.s...,...,4Vxu50qVrQcycjRyJQaZLC,False,Life Changes,62,,True,8,track,spotify:track:4Vxu50qVrQcycjRyJQaZLC,
2,2017-11-20T02:36:40Z,False,,https://open.spotify.com/user/twgeb7mzdcv4u8h1...,https://api.spotify.com/v1/users/twgeb7mzdcv4u...,twgeb7mzdcv4u8h191dxrvlpc,user,spotify:user:twgeb7mzdcv4u8h191dxrvlpc,album,[{'external_urls': {'spotify': 'https://open.s...,...,6b8Be6ljOzmkOmFslEb23P,False,24K Magic,83,https://p.scdn.co/mp3-preview/3a76820d510fa5f8...,True,1,track,spotify:track:6b8Be6ljOzmkOmFslEb23P,
3,2017-12-06T01:26:36Z,False,,https://open.spotify.com/user/twgeb7mzdcv4u8h1...,https://api.spotify.com/v1/users/twgeb7mzdcv4u...,twgeb7mzdcv4u8h191dxrvlpc,user,spotify:user:twgeb7mzdcv4u8h191dxrvlpc,album,[{'external_urls': {'spotify': 'https://open.s...,...,0afhq8XCExXpqazXczTSve,False,Galway Girl,82,https://p.scdn.co/mp3-preview/9b7635464dc2caea...,True,6,track,spotify:track:0afhq8XCExXpqazXczTSve,
4,2017-11-20T02:36:14Z,False,,https://open.spotify.com/user/twgeb7mzdcv4u8h1...,https://api.spotify.com/v1/users/twgeb7mzdcv4u...,twgeb7mzdcv4u8h191dxrvlpc,user,spotify:user:twgeb7mzdcv4u8h191dxrvlpc,album,[{'external_urls': {'spotify': 'https://open.s...,...,1HNkqx9Ahdgi1Ixy2xkKkL,False,Photograph,87,https://p.scdn.co/mp3-preview/d90f4e5f15d8ed41...,True,6,track,spotify:track:1HNkqx9Ahdgi1Ixy2xkKkL,


In [39]:
# flatten the dataframes by artist 

artists_pablo = pd.DataFrame(columns=['href', 'id', 'name', 'type', 'uri', 'external_urls.spotify','song_id', 'song_name', 'popularity' ])
for i in pablo_tracks.index:
    artists_for_song = json_normalize(pablo_tracks.iloc[i]['track.artists'])
    artists_for_song['song_id']    = pablo_tracks.iloc[i]['track.id']         # we want to keep song_id, it is the sae for all artists
    artists_for_song['song_name']  = pablo_tracks.iloc[i]['track.name']       # we want to keep song_name, it is the sae for all artists
    artists_for_song['popularity'] = pablo_tracks.iloc[i]['track.popularity'] # same for popularity   
    artists_pablo = pd.concat([artists_pablo, artists_for_song], axis=0)

In [40]:
len(artists_pablo)

12178

In [41]:
# reset index
artists_pablo = artists_pablo.reset_index(drop=True)

In [42]:
artists_pablo.head()

Unnamed: 0,href,id,name,type,uri,external_urls.spotify,song_id,song_name,popularity
0,https://api.spotify.com/v1/artists/06HL4z0CvFA...,06HL4z0CvFAxyc27GXpf02,Taylor Swift,artist,spotify:artist:06HL4z0CvFAxyc27GXpf02,https://open.spotify.com/artist/06HL4z0CvFAxyc...,7zgqtptZvhf8GEmdsM2vp2,...Ready For It?,0
1,https://api.spotify.com/v1/artists/6x2LnllRG5u...,6x2LnllRG5uGarZMsD4iO8,Thomas Rhett,artist,spotify:artist:6x2LnllRG5uGarZMsD4iO8,https://open.spotify.com/artist/6x2LnllRG5uGar...,4Vxu50qVrQcycjRyJQaZLC,Life Changes,62
2,https://api.spotify.com/v1/artists/0du5cEVh5yT...,0du5cEVh5yTK9QJze8zA0C,Bruno Mars,artist,spotify:artist:0du5cEVh5yTK9QJze8zA0C,https://open.spotify.com/artist/0du5cEVh5yTK9Q...,6b8Be6ljOzmkOmFslEb23P,24K Magic,83
3,https://api.spotify.com/v1/artists/6eUKZXaKkcv...,6eUKZXaKkcviH0Ku9w2n3V,Ed Sheeran,artist,spotify:artist:6eUKZXaKkcviH0Ku9w2n3V,https://open.spotify.com/artist/6eUKZXaKkcviH0...,0afhq8XCExXpqazXczTSve,Galway Girl,82
4,https://api.spotify.com/v1/artists/6eUKZXaKkcv...,6eUKZXaKkcviH0Ku9w2n3V,Ed Sheeran,artist,spotify:artist:6eUKZXaKkcviH0Ku9w2n3V,https://open.spotify.com/artist/6eUKZXaKkcviH0...,1HNkqx9Ahdgi1Ixy2xkKkL,Photograph,87


In [43]:
# column selection
artists_pablo = artists_pablo[['song_name', 'name', 'song_id', 'popularity']]

In [44]:
# check for null values
artists_pablo.isnull().sum()

song_name      0
name           0
song_id       20
popularity     0
dtype: int64

In [45]:
# remove rows that have a null value
artists_pablo = artists_pablo[artists_pablo['song_id'].isna()==False] 

In [46]:
artists_pablo.isnull().sum().sum()

0

In [47]:
# get the audio features from all the songs in artists
chunks = [(i, i+100) for i in range(0, len(artists_pablo), 100)]
chunks
audio_features_list = []
for chunk in chunks:
    id_list100 = artists_pablo['song_id'][chunk[0]:chunk[1]]
    audio_features_list = audio_features_list + sp.audio_features(id_list100)
    sleep(randint(1,3000)/1000)
len(audio_features_list)

12158

In [48]:
audio_features_df = json_normalize(audio_features_list)
audio_features_df

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.615,0.779,2.0,-6.454,1.0,0.1350,0.06650,0.000000,0.1550,0.453,160.000,audio_features,7zgqtptZvhf8GEmdsM2vp2,spotify:track:7zgqtptZvhf8GEmdsM2vp2,https://api.spotify.com/v1/tracks/7zgqtptZvhf8...,https://api.spotify.com/v1/audio-analysis/7zgq...,208198.0,4.0
1,0.687,0.845,7.0,-4.370,1.0,0.0576,0.10000,0.000000,0.0452,0.809,87.972,audio_features,4Vxu50qVrQcycjRyJQaZLC,spotify:track:4Vxu50qVrQcycjRyJQaZLC,https://api.spotify.com/v1/tracks/4Vxu50qVrQcy...,https://api.spotify.com/v1/audio-analysis/4Vxu...,190227.0,4.0
2,0.818,0.803,1.0,-4.282,1.0,0.0797,0.03400,0.000000,0.1530,0.632,106.970,audio_features,6b8Be6ljOzmkOmFslEb23P,spotify:track:6b8Be6ljOzmkOmFslEb23P,https://api.spotify.com/v1/tracks/6b8Be6ljOzmk...,https://api.spotify.com/v1/audio-analysis/6b8B...,225983.0,4.0
3,0.624,0.876,9.0,-3.374,1.0,0.1000,0.07350,0.000000,0.3270,0.781,99.943,audio_features,0afhq8XCExXpqazXczTSve,spotify:track:0afhq8XCExXpqazXczTSve,https://api.spotify.com/v1/tracks/0afhq8XCExXp...,https://api.spotify.com/v1/audio-analysis/0afh...,170827.0,4.0
4,0.614,0.379,4.0,-10.480,1.0,0.0476,0.60700,0.000464,0.0986,0.201,107.989,audio_features,1HNkqx9Ahdgi1Ixy2xkKkL,spotify:track:1HNkqx9Ahdgi1Ixy2xkKkL,https://api.spotify.com/v1/tracks/1HNkqx9Ahdgi...,https://api.spotify.com/v1/audio-analysis/1HNk...,258987.0,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12153,0.406,0.306,3.0,-10.482,1.0,0.0372,0.31300,0.007900,0.0727,0.073,135.272,audio_features,5pSSEkT0963muzzIjsVkrs,spotify:track:5pSSEkT0963muzzIjsVkrs,https://api.spotify.com/v1/tracks/5pSSEkT0963m...,https://api.spotify.com/v1/audio-analysis/5pSS...,652560.0,4.0
12154,0.362,0.507,1.0,-11.229,1.0,0.0394,0.01740,0.216000,0.1130,0.456,146.641,audio_features,7gC6Rbllqf1yXNC02e5jz2,spotify:track:7gC6Rbllqf1yXNC02e5jz2,https://api.spotify.com/v1/tracks/7gC6Rbllqf1y...,https://api.spotify.com/v1/audio-analysis/7gC6...,634440.0,3.0
12155,0.331,0.733,2.0,-8.671,1.0,0.0468,0.08870,0.001740,0.9470,0.380,128.512,audio_features,6Ff77WXC58MkhLE5A1qgY1,spotify:track:6Ff77WXC58MkhLE5A1qgY1,https://api.spotify.com/v1/tracks/6Ff77WXC58Mk...,https://api.spotify.com/v1/audio-analysis/6Ff7...,620747.0,4.0
12156,0.331,0.733,2.0,-8.671,1.0,0.0468,0.08870,0.001740,0.9470,0.380,128.512,audio_features,6Ff77WXC58MkhLE5A1qgY1,spotify:track:6Ff77WXC58MkhLE5A1qgY1,https://api.spotify.com/v1/tracks/6Ff77WXC58Mk...,https://api.spotify.com/v1/audio-analysis/6Ff7...,620747.0,4.0


In [49]:
# drop duplicates because some songs have more artists
audio_features_df.drop_duplicates(inplace=True) 

In [50]:
# perform a inner merge to join both dataframes on 'song_id' and 'id'
pablo_w_audio_ft = pd.merge(left=artists_pablo,
                        right=audio_features_df,
                        how='inner',
                        left_on='song_id',
                        right_on='id')
pablo_w_audio_ft

Unnamed: 0,song_name,name,song_id,popularity,danceability,energy,key,loudness,mode,speechiness,...,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,...Ready For It?,Taylor Swift,7zgqtptZvhf8GEmdsM2vp2,0,0.615,0.779,2.0,-6.454,1.0,0.1350,...,0.1550,0.453,160.000,audio_features,7zgqtptZvhf8GEmdsM2vp2,spotify:track:7zgqtptZvhf8GEmdsM2vp2,https://api.spotify.com/v1/tracks/7zgqtptZvhf8...,https://api.spotify.com/v1/audio-analysis/7zgq...,208198.0,4.0
1,Life Changes,Thomas Rhett,4Vxu50qVrQcycjRyJQaZLC,62,0.687,0.845,7.0,-4.370,1.0,0.0576,...,0.0452,0.809,87.972,audio_features,4Vxu50qVrQcycjRyJQaZLC,spotify:track:4Vxu50qVrQcycjRyJQaZLC,https://api.spotify.com/v1/tracks/4Vxu50qVrQcy...,https://api.spotify.com/v1/audio-analysis/4Vxu...,190227.0,4.0
2,24K Magic,Bruno Mars,6b8Be6ljOzmkOmFslEb23P,83,0.818,0.803,1.0,-4.282,1.0,0.0797,...,0.1530,0.632,106.970,audio_features,6b8Be6ljOzmkOmFslEb23P,spotify:track:6b8Be6ljOzmkOmFslEb23P,https://api.spotify.com/v1/tracks/6b8Be6ljOzmk...,https://api.spotify.com/v1/audio-analysis/6b8B...,225983.0,4.0
3,Galway Girl,Ed Sheeran,0afhq8XCExXpqazXczTSve,82,0.624,0.876,9.0,-3.374,1.0,0.1000,...,0.3270,0.781,99.943,audio_features,0afhq8XCExXpqazXczTSve,spotify:track:0afhq8XCExXpqazXczTSve,https://api.spotify.com/v1/tracks/0afhq8XCExXp...,https://api.spotify.com/v1/audio-analysis/0afh...,170827.0,4.0
4,Photograph,Ed Sheeran,1HNkqx9Ahdgi1Ixy2xkKkL,87,0.614,0.379,4.0,-10.480,1.0,0.0476,...,0.0986,0.201,107.989,audio_features,1HNkqx9Ahdgi1Ixy2xkKkL,spotify:track:1HNkqx9Ahdgi1Ixy2xkKkL,https://api.spotify.com/v1/tracks/1HNkqx9Ahdgi...,https://api.spotify.com/v1/audio-analysis/1HNk...,258987.0,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12132,Fool's Overture,Supertramp,5pSSEkT0963muzzIjsVkrs,55,0.406,0.306,3.0,-10.482,1.0,0.0372,...,0.0727,0.073,135.272,audio_features,5pSSEkT0963muzzIjsVkrs,spotify:track:5pSSEkT0963muzzIjsVkrs,https://api.spotify.com/v1/tracks/5pSSEkT0963m...,https://api.spotify.com/v1/audio-analysis/5pSS...,652560.0,4.0
12133,Heart of the Sunrise - 2003 Remaster,Yes,7gC6Rbllqf1yXNC02e5jz2,46,0.362,0.507,1.0,-11.229,1.0,0.0394,...,0.1130,0.456,146.641,audio_features,7gC6Rbllqf1yXNC02e5jz2,spotify:track:7gC6Rbllqf1yXNC02e5jz2,https://api.spotify.com/v1/tracks/7gC6Rbllqf1y...,https://api.spotify.com/v1/audio-analysis/7gC6...,634440.0,3.0
12134,Venus And Mars / Rock Show / Jet - Live / Rema...,Paul McCartney,6Ff77WXC58MkhLE5A1qgY1,0,0.331,0.733,2.0,-8.671,1.0,0.0468,...,0.9470,0.380,128.512,audio_features,6Ff77WXC58MkhLE5A1qgY1,spotify:track:6Ff77WXC58MkhLE5A1qgY1,https://api.spotify.com/v1/tracks/6Ff77WXC58Mk...,https://api.spotify.com/v1/audio-analysis/6Ff7...,620747.0,4.0
12135,Venus And Mars / Rock Show / Jet - Live / Rema...,Wings,6Ff77WXC58MkhLE5A1qgY1,0,0.331,0.733,2.0,-8.671,1.0,0.0468,...,0.9470,0.380,128.512,audio_features,6Ff77WXC58MkhLE5A1qgY1,spotify:track:6Ff77WXC58MkhLE5A1qgY1,https://api.spotify.com/v1/tracks/6Ff77WXC58Mk...,https://api.spotify.com/v1/audio-analysis/6Ff7...,620747.0,4.0


In [51]:
# save dataframe to csv
pablo_w_audio_ft.to_csv('pablo_w_audio_ft.csv', sep=',', index=False)

As I am doing this project individually but cooperating with other 2 people, we decided to save some time creating a huge dataframe with more than 20.000 songs with processing just 1 playlist each.

I save the dataframe I processed in order to send to them. I also read their csv files and read them.

In [52]:
# read the dataframe result of Katarzyna's playlist
katarzyna_w_audio_ft = pd.read_csv('playlist_katarzyna.csv').drop(['Unnamed: 0'], axis=1)
katarzyna_w_audio_ft

Unnamed: 0,song_name,name,song_id,popularity,danceability,energy,key,loudness,mode,speechiness,...,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,2K,Nosaj Thing,33xMbeHzmWd6Od0BmLZEUs,0,0.310,0.445,7,-13.355,0,0.0863,...,0.1130,0.122,95.360,audio_features,33xMbeHzmWd6Od0BmLZEUs,spotify:track:33xMbeHzmWd6Od0BmLZEUs,https://api.spotify.com/v1/tracks/33xMbeHzmWd6...,https://api.spotify.com/v1/audio-analysis/33xM...,152560,3
1,4 Billion Souls,The Doors,3UnyplmZaq547hwsfOR5yy,26,0.419,0.565,5,-11.565,1,0.0347,...,0.1280,0.648,151.277,audio_features,3UnyplmZaq547hwsfOR5yy,spotify:track:3UnyplmZaq547hwsfOR5yy,https://api.spotify.com/v1/tracks/3UnyplmZaq54...,https://api.spotify.com/v1/audio-analysis/3Uny...,197707,4
2,4 Minute Warning,Radiohead,1w8QCSDH4QobcQeT4uMKLm,0,0.354,0.302,9,-13.078,1,0.0326,...,0.1110,0.223,123.753,audio_features,1w8QCSDH4QobcQeT4uMKLm,spotify:track:1w8QCSDH4QobcQeT4uMKLm,https://api.spotify.com/v1/tracks/1w8QCSDH4Qob...,https://api.spotify.com/v1/audio-analysis/1w8Q...,244285,4
3,7 Element,Vitas,7J9mBHG4J2eIfDAv5BehKA,0,0.727,0.785,5,-6.707,0,0.0603,...,0.3100,0.960,129.649,audio_features,7J9mBHG4J2eIfDAv5BehKA,spotify:track:7J9mBHG4J2eIfDAv5BehKA,https://api.spotify.com/v1/tracks/7J9mBHG4J2eI...,https://api.spotify.com/v1/audio-analysis/7J9m...,249940,4
4,#9 Dream,R.E.M.,1VZedwJj1gyi88WFRhfThb,6,0.571,0.724,0,-5.967,1,0.0260,...,0.0919,0.385,116.755,audio_features,1VZedwJj1gyi88WFRhfThb,spotify:track:1VZedwJj1gyi88WFRhfThb,https://api.spotify.com/v1/tracks/1VZedwJj1gyi...,https://api.spotify.com/v1/audio-analysis/1VZe...,278320,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11766,London Calling - Remastered,The Clash,5jzma6gCzYtKB1DbEwFZKH,75,0.651,0.801,0,-7.340,1,0.0514,...,0.0825,0.776,133.769,audio_features,5jzma6gCzYtKB1DbEwFZKH,spotify:track:5jzma6gCzYtKB1DbEwFZKH,https://api.spotify.com/v1/tracks/5jzma6gCzYtK...,https://api.spotify.com/v1/audio-analysis/5jzm...,200480,4
11767,Low Rider,War,2fmMPJb5EzZCx8BcNJvVk4,0,0.811,0.647,0,-10.989,1,0.0498,...,0.0572,0.990,139.787,audio_features,2fmMPJb5EzZCx8BcNJvVk4,spotify:track:2fmMPJb5EzZCx8BcNJvVk4,https://api.spotify.com/v1/tracks/2fmMPJb5EzZC...,https://api.spotify.com/v1/audio-analysis/2fmM...,191560,4
11768,Flower,Moby,60rIdEPDrzyLiLC0icp3xz,0,0.686,0.610,7,-5.902,1,0.0262,...,0.0710,0.766,80.567,audio_features,60rIdEPDrzyLiLC0icp3xz,spotify:track:60rIdEPDrzyLiLC0icp3xz,https://api.spotify.com/v1/tracks/60rIdEPDrzyL...,https://api.spotify.com/v1/audio-analysis/60rI...,206293,4
11769,Brighter Than Gold,The Cat Empire,0sEm1ld0V8YTCPcjPVfIsc,47,0.711,0.718,6,-5.739,1,0.0380,...,0.0732,0.688,117.071,audio_features,0sEm1ld0V8YTCPcjPVfIsc,spotify:track:0sEm1ld0V8YTCPcjPVfIsc,https://api.spotify.com/v1/tracks/0sEm1ld0V8YT...,https://api.spotify.com/v1/audio-analysis/0sEm...,200293,4


In [53]:
# read the dataframe result of Tejal's playlist
tejal_w_audio_ft = pd.read_csv('tejal_Audio_track.csv').drop(['Unnamed: 0'], axis=1)
tejal_w_audio_ft

Unnamed: 0,song_name,name,song_id,popularity,danceability,energy,key,loudness,mode,speechiness,...,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,Pretty Pimpin,Kurt Vile,3l9eg9RtisizG12a1D6nZl,0,0.566,0.664,8,-9.081,1,0.0281,...,0.0903,0.764,91.117,audio_features,3l9eg9RtisizG12a1D6nZl,spotify:track:3l9eg9RtisizG12a1D6nZl,https://api.spotify.com/v1/tracks/3l9eg9Rtisiz...,https://api.spotify.com/v1/audio-analysis/3l9e...,298760,4
1,I Need My Girl,The National,50M7nY1oQuNHecs0ahWAtI,68,0.520,0.436,0,-11.508,0,0.0355,...,0.1230,0.114,126.053,audio_features,50M7nY1oQuNHecs0ahWAtI,spotify:track:50M7nY1oQuNHecs0ahWAtI,https://api.spotify.com/v1/tracks/50M7nY1oQuNH...,https://api.spotify.com/v1/audio-analysis/50M7...,245240,4
2,Strange,Galaxie 500,2TTAPkrGQQOZkaUTUlt21Q,48,0.312,0.844,7,-7.085,1,0.0511,...,0.0793,0.626,97.039,audio_features,2TTAPkrGQQOZkaUTUlt21Q,spotify:track:2TTAPkrGQQOZkaUTUlt21Q,https://api.spotify.com/v1/tracks/2TTAPkrGQQOZ...,https://api.spotify.com/v1/audio-analysis/2TTA...,199467,4
3,Todeswalzer,Windir,7BvcpEdO7PUDGGSER1S9LA,36,0.236,0.870,2,-4.747,0,0.0579,...,0.3050,0.186,167.845,audio_features,7BvcpEdO7PUDGGSER1S9LA,spotify:track:7BvcpEdO7PUDGGSER1S9LA,https://api.spotify.com/v1/tracks/7BvcpEdO7PUD...,https://api.spotify.com/v1/audio-analysis/7Bvc...,295131,4
4,Blinding Lights,The Weeknd,0VjIjW4GlUZAMYd2vXMi3b,93,0.514,0.730,1,-5.934,1,0.0598,...,0.0897,0.334,171.005,audio_features,0VjIjW4GlUZAMYd2vXMi3b,spotify:track:0VjIjW4GlUZAMYd2vXMi3b,https://api.spotify.com/v1/tracks/0VjIjW4GlUZA...,https://api.spotify.com/v1/audio-analysis/0VjI...,200040,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4948,Apologize,OneRepublic,6ucR4KfvsBFWCMVFDvyKKl,75,0.653,0.604,8,-6.017,1,0.0278,...,0.0970,0.101,118.016,audio_features,6ucR4KfvsBFWCMVFDvyKKl,spotify:track:6ucR4KfvsBFWCMVFDvyKKl,https://api.spotify.com/v1/tracks/6ucR4KfvsBFW...,https://api.spotify.com/v1/audio-analysis/6ucR...,184400,4
4949,Goddamn Lonely Love - Remastered,Drive-By Truckers,241LK9hqfXJyWpQ7oClQSN,48,0.427,0.541,7,-7.042,1,0.0294,...,0.2230,0.153,119.094,audio_features,241LK9hqfXJyWpQ7oClQSN,spotify:track:241LK9hqfXJyWpQ7oClQSN,https://api.spotify.com/v1/tracks/241LK9hqfXJy...,https://api.spotify.com/v1/audio-analysis/241L...,301720,4
4950,Graveless yet Dead,Convocation,6hF9etLMoADWLsoui5ejGW,19,0.262,0.689,2,-8.342,1,0.0705,...,0.1170,0.178,130.110,audio_features,6hF9etLMoADWLsoui5ejGW,spotify:track:6hF9etLMoADWLsoui5ejGW,https://api.spotify.com/v1/tracks/6hF9etLMoADW...,https://api.spotify.com/v1/audio-analysis/6hF9...,572149,4
4951,Gimme! Gimme! Gimme! (A Man After Midnight),ABBA,3vkQ5DAB1qQMYO4Mr9zJN6,84,0.749,0.491,10,-9.655,1,0.0403,...,0.1590,0.536,119.528,audio_features,3vkQ5DAB1qQMYO4Mr9zJN6,spotify:track:3vkQ5DAB1qQMYO4Mr9zJN6,https://api.spotify.com/v1/tracks/3vkQ5DAB1qQM...,https://api.spotify.com/v1/audio-analysis/3vkQ...,292613,4


In [54]:
# concatenate all the processed dataframes
all_w_audio_ft = pd.concat([pablo_w_audio_ft, tejal_w_audio_ft, katarzyna_w_audio_ft], axis=0)
display(all_w_audio_ft.shape)

# drop duplicates
all_w_audio_ft = all_w_audio_ft.drop_duplicates()
display(all_w_audio_ft.shape)

(28861, 22)

(26574, 22)

In [55]:
# save dataframe to csv
all_w_audio_ft.to_csv('all_w_audio_ft.csv', sep=',', index=False)