In [1]:
import sqlite3
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from sklearn.manifold import TSNE

In [2]:
conn = sqlite3.connect('spotify.sqlite')
cursor = conn.cursor()
query = "SELECT name FROM sqlite_master WHERE type='table';"
cursor.execute(query)
tables = cursor.fetchall()
print("List of tables in the database:")
for table in tables:
    print(table[0])

List of tables in the database:
albums
artists
audio_features
genres
r_albums_artists
r_albums_tracks
r_artist_genre
r_track_artist
tracks
artists_indexed
albums_indexed
tracks_indexed
audio_features_indexed


In [3]:
query = "SELECT * FROM tracks"
encoding = "latin1"
conn.text_factory = lambda x: str(x, encoding)
track_df = pd.read_sql_query(query, conn)
print(track_df)

                             id  disc_number  duration  explicit  \
0        1dizvxctg9dHEyaYTFufVi            1    275893         1   
1        2g8HN35AnVGIk7B8yMucww            1    252746         1   
2        49pnyECzcMGCKAqxfTB4JZ            3    315080         0   
3        4E5IFAXCob6QqZaJMTw5YN            1    240800         1   
4        1gSt2UlC7mtRtJIc5zqKWn            2    203666         0   
...                         ...          ...       ...       ...   
8741667  12S1TA13KXl13sCQZgtI5y            1    198000         0   
8741668  134MLEMX0AWp5kbMMf90f8            1    191360         0   
8741669  13WVJaMn0ztUCU8UFCKQFO            1    272572         0   
8741670  11SfAvTgnEEYy4Ef0fyFpE            1    237200         1   
8741671  13EW51DKru044OJDJ9Bm8o            1    153147         1   

               audio_feature_id  \
0        1dizvxctg9dHEyaYTFufVi   
1        2g8HN35AnVGIk7B8yMucww   
2        49pnyECzcMGCKAqxfTB4JZ   
3        4E5IFAXCob6QqZaJMTw5YN   
4       

In [4]:
track_sorted_df = track_df.sort_values(by='popularity', ascending=False)
track_sorted_df

Unnamed: 0,id,disc_number,duration,explicit,audio_feature_id,name,preview_url,track_number,popularity,is_playable
802846,4iJyoBOLtHqaGxP12qzhQI,1,198081,1,4iJyoBOLtHqaGxP12qzhQI,Peaches (feat. Daniel Caesar & Giveon),,12,100,
109193,7lPN2DXiMsVn7XUKtOW1CS,1,242013,1,7lPN2DXiMsVn7XUKtOW1CS,drivers license,,1,100,
6202114,3VqeTFIvhxu3DIe4eZVzGq,1,164441,0,3VqeTFIvhxu3DIe4eZVzGq,Butter,,1,100,
4025933,67BtfxlNbhBmCDR2L2l8qd,1,137875,1,67BtfxlNbhBmCDR2L2l8qd,MONTERO (Call Me By Your Name),https://p.scdn.co/mp3-preview/fee76abaab413004...,1,99,
613929,3BZEcbdtXQSo7OrvKRJ6mb,1,137875,1,3BZEcbdtXQSo7OrvKRJ6mb,MONTERO (Call Me By Your Name),,1,99,
...,...,...,...,...,...,...,...,...,...,...
4668250,0QLCx9hQIILb8OmrqeF75q,1,327610,1,0QLCx9hQIILb8OmrqeF75q,Dame Micro,,9,0,
4668247,1xSwsAa29i5iHQlIUDNVY7,1,261154,1,1xSwsAa29i5iHQlIUDNVY7,No Tiene Huesos,,10,0,
4668245,1fyCuwobAvxr8q0TU8jdNs,1,288740,1,1fyCuwobAvxr8q0TU8jdNs,Gracias,,3,0,
323081,18qyGR3v4026xRQQnDru8o,1,206336,0,18qyGR3v4026xRQQnDru8o,Eenie Meenie (George Gershwin Arrangement),https://p.scdn.co/mp3-preview/bc2087efdba03530...,7,0,


In [7]:
top_df = track_sorted_df.head(250)
top_df

Unnamed: 0,id,disc_number,duration,explicit,audio_feature_id,name,preview_url,track_number,popularity,is_playable
802846,4iJyoBOLtHqaGxP12qzhQI,1,198081,1,4iJyoBOLtHqaGxP12qzhQI,Peaches (feat. Daniel Caesar & Giveon),,12,100,
109193,7lPN2DXiMsVn7XUKtOW1CS,1,242013,1,7lPN2DXiMsVn7XUKtOW1CS,drivers license,,1,100,
6202114,3VqeTFIvhxu3DIe4eZVzGq,1,164441,0,3VqeTFIvhxu3DIe4eZVzGq,Butter,,1,100,
4025933,67BtfxlNbhBmCDR2L2l8qd,1,137875,1,67BtfxlNbhBmCDR2L2l8qd,MONTERO (Call Me By Your Name),https://p.scdn.co/mp3-preview/fee76abaab413004...,1,99,
613929,3BZEcbdtXQSo7OrvKRJ6mb,1,137875,1,3BZEcbdtXQSo7OrvKRJ6mb,MONTERO (Call Me By Your Name),,1,99,
...,...,...,...,...,...,...,...,...,...,...
107648,3e9HZxeyfWwjeyPAMmWSSQ,1,207320,1,3e9HZxeyfWwjeyPAMmWSSQ,"thank u, next",,11,85,
681603,1HNkqx9Ahdgi1Ixy2xkKkL,1,258986,0,1HNkqx9Ahdgi1Ixy2xkKkL,Photograph,https://p.scdn.co/mp3-preview/097c7b735ceb4109...,6,85,
44680,40riOy7x9W7GXjyGp4pjAv,1,391376,0,40riOy7x9W7GXjyGp4pjAv,Hotel California - 2013 Remaster,https://p.scdn.co/mp3-preview/50e82c99c20ffa42...,1,85,
104867,7GX5flRQZVHRAGd6B4TmDO,1,182706,1,7GX5flRQZVHRAGd6B4TmDO,XO Tour Llif3,https://p.scdn.co/mp3-preview/7add10ee576e9017...,16,85,


In [8]:
query = "SELECT * FROM audio_features"
encoding = "latin1"
conn.text_factory = lambda x: str(x, encoding)
features_df = pd.read_sql_query(query, conn)
print(features_df)

                             id  acousticness  \
0        2jKoVlU7VAmExKJ1Jh3w9P        0.1800   
1        4JYUDRtPZuVNi7FAnbHyux        0.2720   
2        6YjKAkDYmlasMqYw73iB0w        0.0783   
3        2YlvHjDb4Tyxl4A1IcDhAe        0.5840   
4        3UOuBNEin5peSRqdzvlnWM        0.1700   
...                         ...           ...   
8740038  7xq5qTqdCvTQJomW9I5pSZ        0.1430   
8740039  6ClotRUApTN25L3OnkzQjb        0.0417   
8740040  7mZL8mtlavFzAzIlIfOMv2        0.2570   
8740041  08ITn8qaxbnEyk3N08IC47        0.0361   
8740042  5Vqgrz2M1m49PRh2eR9et7        0.1510   

                                              analysis_url  danceability  \
0        https://api.spotify.com/v1/audio-analysis/2jKo...         0.893   
1        https://api.spotify.com/v1/audio-analysis/4JYU...         0.520   
2        https://api.spotify.com/v1/audio-analysis/6YjK...         0.918   
3        https://api.spotify.com/v1/audio-analysis/2Ylv...         0.877   
4        https://api.spotify.co

In [9]:
track_and_features_df = pd.merge(top_df, features_df, on='id', how='inner')

In [13]:
track_and_features_df

Unnamed: 0,id,disc_number,duration_x,explicit,name,preview_url,track_number,popularity,is_playable,acousticness,...,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,valence
0,4iJyoBOLtHqaGxP12qzhQI,1,198081,1,Peaches (feat. Daniel Caesar & Giveon),,12,100,,0.32100,...,0.696,0.000000,0,0.4200,-6.181,1,0.1190,90.029999,4,0.464
1,7lPN2DXiMsVn7XUKtOW1CS,1,242013,1,drivers license,,1,100,,0.72100,...,0.436,0.000013,10,0.1050,-8.761,1,0.0601,143.873993,4,0.132
2,3VqeTFIvhxu3DIe4eZVzGq,1,164441,0,Butter,,1,100,,0.00323,...,0.459,0.000000,8,0.0906,-5.187,1,0.0948,109.997002,4,0.695
3,67BtfxlNbhBmCDR2L2l8qd,1,137875,1,MONTERO (Call Me By Your Name),https://p.scdn.co/mp3-preview/fee76abaab413004...,1,99,,0.29700,...,0.508,0.000000,8,0.3840,-6.682,0,0.1520,178.817993,4,0.758
4,3BZEcbdtXQSo7OrvKRJ6mb,1,137875,1,MONTERO (Call Me By Your Name),,1,99,,0.29700,...,0.508,0.000000,8,0.3840,-6.682,0,0.1520,178.817993,4,0.758
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
245,3e9HZxeyfWwjeyPAMmWSSQ,1,207320,1,"thank u, next",,11,85,,0.22900,...,0.653,0.000000,1,0.1010,-5.634,1,0.0658,106.966003,4,0.412
246,1HNkqx9Ahdgi1Ixy2xkKkL,1,258986,0,Photograph,https://p.scdn.co/mp3-preview/097c7b735ceb4109...,6,85,,0.60700,...,0.379,0.000464,4,0.0986,-10.480,1,0.0476,107.988998,4,0.201
247,40riOy7x9W7GXjyGp4pjAv,1,391376,0,Hotel California - 2013 Remaster,https://p.scdn.co/mp3-preview/50e82c99c20ffa42...,1,85,,0.00574,...,0.508,0.000494,2,0.0575,-9.484,1,0.0270,147.125000,4,0.609
248,7GX5flRQZVHRAGd6B4TmDO,1,182706,1,XO Tour Llif3,https://p.scdn.co/mp3-preview/7add10ee576e9017...,16,85,,0.00264,...,0.750,0.000000,11,0.1090,-6.366,0,0.2310,155.095993,4,0.401


In [15]:
print(track_and_features_df.columns)

Index(['id', 'disc_number', 'duration_x', 'explicit', 'name', 'preview_url',
       'track_number', 'popularity', 'is_playable', 'acousticness',
       'analysis_url', 'danceability', 'duration_y', 'energy',
       'instrumentalness', 'key', 'liveness', 'loudness', 'mode',
       'speechiness', 'tempo', 'time_signature', 'valence'],
      dtype='object')


In [39]:
query = "SELECT * FROM r_track_artist"
encoding = "latin1"
conn.text_factory = lambda x: str(x, encoding)
r_track_artist_df = pd.read_sql_query(query, conn)
print(r_track_artist_df)

                        track_id               artist_id
0         2jKoVlU7VAmExKJ1Jh3w9P  4tujQJicOnuZRLiBFdp3Ou
1         2jKoVlU7VAmExKJ1Jh3w9P  2VX0o9LDIVmKIgpnwdJpOJ
2         2jKoVlU7VAmExKJ1Jh3w9P  3iBOsmwGzRKyR0vs2I61xP
3         2jKoVlU7VAmExKJ1Jh3w9P  22qf8cJRzBjIWb2Jc4JeOr
4         4JYUDRtPZuVNi7FAnbHyux  4akj4uteQQrrGxhX9Rjuyf
...                          ...                     ...
11840397  6ClotRUApTN25L3OnkzQjb  3y9hQYlwNKqhdybIJTFzTr
11840398  7mZL8mtlavFzAzIlIfOMv2  5e8vdggiGg9glSpuBgcaiv
11840399  7mZL8mtlavFzAzIlIfOMv2  6NgwMiz1Dwsktnn8p8yujE
11840400  08ITn8qaxbnEyk3N08IC47  4NEA8zkFgdVHYWZXANEHA7
11840401  5Vqgrz2M1m49PRh2eR9et7  4LOzEceyTOhRzbpebCciHR

[11840402 rows x 2 columns]


In [34]:
query = "SELECT * FROM r_artist_genre"
encoding = "latin1"
conn.text_factory = lambda x: str(x, encoding)
artist_genre_df = pd.read_sql_query(query, conn)
print(artist_genre_df)

                    genre_id               artist_id
0            detroit hip hop  4tujQJicOnuZRLiBFdp3Ou
1                     g funk  4tujQJicOnuZRLiBFdp3Ou
2               gangster rap  4tujQJicOnuZRLiBFdp3Ou
3           hardcore hip hop  4tujQJicOnuZRLiBFdp3Ou
4                    hip hop  4tujQJicOnuZRLiBFdp3Ou
...                      ...                     ...
487381                   edm  05KDKIpxshxrB9BMewaCBW
487382                   edm  6nS5roXSAGhTGr34W6n7Et
487383          samurai trap  5rcAoYM8NZrlnPZES9S8KV
487384           focus beats  0XBtKvTff8E8BIhtlxjVHv
487385  hip-hop experimental  6NgwMiz1Dwsktnn8p8yujE

[487386 rows x 2 columns]


In [40]:
r_track_artist_df = pd.merge(r_track_artist_df, artist_genre_df, on='artist_id', how='inner')
r_track_artist_df

Unnamed: 0,track_id,artist_id,genre_id
0,2jKoVlU7VAmExKJ1Jh3w9P,4tujQJicOnuZRLiBFdp3Ou,detroit hip hop
1,2jKoVlU7VAmExKJ1Jh3w9P,4tujQJicOnuZRLiBFdp3Ou,g funk
2,2jKoVlU7VAmExKJ1Jh3w9P,4tujQJicOnuZRLiBFdp3Ou,gangster rap
3,2jKoVlU7VAmExKJ1Jh3w9P,4tujQJicOnuZRLiBFdp3Ou,hardcore hip hop
4,2jKoVlU7VAmExKJ1Jh3w9P,4tujQJicOnuZRLiBFdp3Ou,hip hop
...,...,...,...
21678565,6Q42I6v0BiF8pzg9ELOX2m,5yUWxOhPOP18jZ2TwQxEtw,south african deep house
21678566,33tYADyL2aZctrvR59K1bQ,2YEnrpAWWaNRFumgde1lLH,disco house
21678567,73xL8Khrwd3OWJlBjKSpGk,2YEnrpAWWaNRFumgde1lLH,disco house
21678568,6oq0MdzLaFShOXXqs2yavp,2YEnrpAWWaNRFumgde1lLH,disco house


In [41]:
artists_df.rename(columns={'id': 'artist_id'}, inplace=True)
r_track_artist_merged_df = pd.merge(r_track_artist_df, artists_df[["name", "artist_id"]], on='artist_id', how='inner')

In [42]:
r_track_artist_merged_df

Unnamed: 0,track_id,artist_id,genre_id,name
0,2jKoVlU7VAmExKJ1Jh3w9P,4tujQJicOnuZRLiBFdp3Ou,detroit hip hop,Xzibit
1,2jKoVlU7VAmExKJ1Jh3w9P,4tujQJicOnuZRLiBFdp3Ou,g funk,Xzibit
2,2jKoVlU7VAmExKJ1Jh3w9P,4tujQJicOnuZRLiBFdp3Ou,gangster rap,Xzibit
3,2jKoVlU7VAmExKJ1Jh3w9P,4tujQJicOnuZRLiBFdp3Ou,hardcore hip hop,Xzibit
4,2jKoVlU7VAmExKJ1Jh3w9P,4tujQJicOnuZRLiBFdp3Ou,hip hop,Xzibit
...,...,...,...,...
21678565,6Q42I6v0BiF8pzg9ELOX2m,5yUWxOhPOP18jZ2TwQxEtw,south african deep house,SONIDO
21678566,33tYADyL2aZctrvR59K1bQ,2YEnrpAWWaNRFumgde1lLH,disco house,Oden & Fatzo
21678567,73xL8Khrwd3OWJlBjKSpGk,2YEnrpAWWaNRFumgde1lLH,disco house,Oden & Fatzo
21678568,6oq0MdzLaFShOXXqs2yavp,2YEnrpAWWaNRFumgde1lLH,disco house,Oden & Fatzo


In [28]:
query = "SELECT * FROM artists"
encoding = "latin1"
conn.text_factory = lambda x: str(x, encoding)
artists_df = pd.read_sql_query(query, conn)
print(artists_df)

Unnamed: 0,track_id,artist_id,name
0,2jKoVlU7VAmExKJ1Jh3w9P,4tujQJicOnuZRLiBFdp3Ou,Xzibit
1,6YjKAkDYmlasMqYw73iB0w,4tujQJicOnuZRLiBFdp3Ou,Xzibit
2,1ktyqk3vVMB3zg17NjttnX,4tujQJicOnuZRLiBFdp3Ou,Xzibit
3,0GQQzmMNA7JE4tOdcSFh1C,4tujQJicOnuZRLiBFdp3Ou,Xzibit
4,1SWVDBtw6h3tm9OehOkDhv,4tujQJicOnuZRLiBFdp3Ou,Xzibit
...,...,...,...
11840397,0fkDIeYWwY3s7G2yNqXPoF,7KcttLT1Ou7ICdL3TUlc5H,Justice Hall
11840398,0REqqxN5eYfUOZ7Rw6CnM9,68bLTKJw7zEdzEsJZS1As0,Miss Malibu
11840399,5vpFxh9WrSLjQO6V6Jvci5,0VzXnkUTwKDYznbgEvn3Ca,Jam'addict
11840400,5vpFxh9WrSLjQO6V6Jvci5,78hHMThaOCqyAGfytBUwGs,Sohaib


In [43]:
r_track_artist_merged_df

Unnamed: 0,track_id,artist_id,genre_id,name
0,2jKoVlU7VAmExKJ1Jh3w9P,4tujQJicOnuZRLiBFdp3Ou,detroit hip hop,Xzibit
1,2jKoVlU7VAmExKJ1Jh3w9P,4tujQJicOnuZRLiBFdp3Ou,g funk,Xzibit
2,2jKoVlU7VAmExKJ1Jh3w9P,4tujQJicOnuZRLiBFdp3Ou,gangster rap,Xzibit
3,2jKoVlU7VAmExKJ1Jh3w9P,4tujQJicOnuZRLiBFdp3Ou,hardcore hip hop,Xzibit
4,2jKoVlU7VAmExKJ1Jh3w9P,4tujQJicOnuZRLiBFdp3Ou,hip hop,Xzibit
...,...,...,...,...
21678565,6Q42I6v0BiF8pzg9ELOX2m,5yUWxOhPOP18jZ2TwQxEtw,south african deep house,SONIDO
21678566,33tYADyL2aZctrvR59K1bQ,2YEnrpAWWaNRFumgde1lLH,disco house,Oden & Fatzo
21678567,73xL8Khrwd3OWJlBjKSpGk,2YEnrpAWWaNRFumgde1lLH,disco house,Oden & Fatzo
21678568,6oq0MdzLaFShOXXqs2yavp,2YEnrpAWWaNRFumgde1lLH,disco house,Oden & Fatzo


In [22]:
track_and_features_df.rename(columns={'id': 'track_id'}, inplace=True)

In [47]:
grouped_artists = r_track_artist_merged_df.groupby('track_id')['name'].agg(list).reset_index()

In [48]:
grouped_artists

Unnamed: 0,track_id,name
0,0000QBRGPosiFRXKmMYnsO,[Sexteto La Playa]
1,0000gBWfr2zIFzE5tDzxca,[RattenfÃ¤nger]
2,0000uJA4xCdxThagdLkkLR,"[Cherryholmes, Cherryholmes, Cherryholmes]"
3,0001Lyv0YTjkZSqzT4WkLy,"[Half Japanese, Half Japanese, Half Japanese, ..."
4,0001QZSdENvrMx6cZXZJdo,"[Archers Of Loaf, Archers Of Loaf, Archers Of ..."
...,...,...
6558498,7zzxqMfB1N7apnN3kuos5v,"[Jonathan Butler, Jonathan Butler, Jonathan Bu..."
6558499,7zzy0bwJGb8gZ4e6qxPASG,"[Lead Belly, Lead Belly, Lead Belly, Lead Bell..."
6558500,7zzyILZOJRvpWhp7NtJJ51,"[03 Greedo, 03 Greedo, 03 Greedo, 03 Greedo, 0..."
6558501,7zzyNsSXglPKfmyawxn5KP,"[John O'Callaghan, John O'Callaghan, John O'Ca..."


In [51]:
grouped_genres = r_track_artist_merged_df.groupby('track_id')['genre_id'].agg(list).reset_index()
grouped_genres

Unnamed: 0,track_id,genre_id
0,0000QBRGPosiFRXKmMYnsO,[cha-cha-cha]
1,0000gBWfr2zIFzE5tDzxca,[ukrainian metal]
2,0000uJA4xCdxThagdLkkLR,"[bluegrass, neo-traditional bluegrass, progres..."
3,0001Lyv0YTjkZSqzT4WkLy,"[dunedin sound, experimental, experimental ind..."
4,0001QZSdENvrMx6cZXZJdo,"[alternative pop, alternative rock, emo, gbvfi..."
...,...,...
6558498,7zzxqMfB1N7apnN3kuos5v,"[smooth jazz, smooth saxophone, south african ..."
6558499,7zzy0bwJGb8gZ4e6qxPASG,"[acoustic blues, appalachian folk, blues, coun..."
6558500,7zzyILZOJRvpWhp7NtJJ51,"[cali rap, melodic rap, trap, underground hip ..."
6558501,7zzyNsSXglPKfmyawxn5KP,"[edm, pop dance, progressive house, progressiv..."


In [52]:
merged_artists_df = pd.merge(grouped_artists, grouped_genres, on='track_id', how='inner')
merged_artists_df

Unnamed: 0,track_id,name,genre_id
0,0000QBRGPosiFRXKmMYnsO,[Sexteto La Playa],[cha-cha-cha]
1,0000gBWfr2zIFzE5tDzxca,[RattenfÃ¤nger],[ukrainian metal]
2,0000uJA4xCdxThagdLkkLR,"[Cherryholmes, Cherryholmes, Cherryholmes]","[bluegrass, neo-traditional bluegrass, progres..."
3,0001Lyv0YTjkZSqzT4WkLy,"[Half Japanese, Half Japanese, Half Japanese, ...","[dunedin sound, experimental, experimental ind..."
4,0001QZSdENvrMx6cZXZJdo,"[Archers Of Loaf, Archers Of Loaf, Archers Of ...","[alternative pop, alternative rock, emo, gbvfi..."
...,...,...,...
6558498,7zzxqMfB1N7apnN3kuos5v,"[Jonathan Butler, Jonathan Butler, Jonathan Bu...","[smooth jazz, smooth saxophone, south african ..."
6558499,7zzy0bwJGb8gZ4e6qxPASG,"[Lead Belly, Lead Belly, Lead Belly, Lead Bell...","[acoustic blues, appalachian folk, blues, coun..."
6558500,7zzyILZOJRvpWhp7NtJJ51,"[03 Greedo, 03 Greedo, 03 Greedo, 03 Greedo, 0...","[cali rap, melodic rap, trap, underground hip ..."
6558501,7zzyNsSXglPKfmyawxn5KP,"[John O'Callaghan, John O'Callaghan, John O'Ca...","[edm, pop dance, progressive house, progressiv..."


In [62]:
all_df = pd.merge(track_and_features_df, merged_artists_df, on='track_id', how='inner')
all_df

Unnamed: 0,track_id,disc_number,duration_x,explicit,name_x,preview_url,track_number,popularity,is_playable,acousticness,...,key,liveness,loudness,mode,speechiness,tempo,time_signature,valence,name_y,genre_id
0,4iJyoBOLtHqaGxP12qzhQI,1,198081,1,Peaches (feat. Daniel Caesar & Giveon),,12,100,,0.32100,...,0,0.4200,-6.181,1,0.1190,90.029999,4,0.464,"[Justin Bieber, Justin Bieber, Justin Bieber, ...","[canadian pop, pop, post-teen pop, canadian co..."
1,7lPN2DXiMsVn7XUKtOW1CS,1,242013,1,drivers license,,1,100,,0.72100,...,10,0.1050,-8.761,1,0.0601,143.873993,4,0.132,"[Olivia Rodrigo, Olivia Rodrigo, Olivia Rodrigo]","[alt z, pop, post-teen pop]"
2,3VqeTFIvhxu3DIe4eZVzGq,1,164441,0,Butter,,1,100,,0.00323,...,8,0.0906,-5.187,1,0.0948,109.997002,4,0.695,"[BTS, BTS]","[k-pop, k-pop boy group]"
3,67BtfxlNbhBmCDR2L2l8qd,1,137875,1,MONTERO (Call Me By Your Name),https://p.scdn.co/mp3-preview/fee76abaab413004...,1,99,,0.29700,...,8,0.3840,-6.682,0,0.1520,178.817993,4,0.758,"[Lil Nas X, Lil Nas X, Lil Nas X, Lil Nas X]","[country rap, lgbtq+ hip hop, pop rap, queer c..."
4,3BZEcbdtXQSo7OrvKRJ6mb,1,137875,1,MONTERO (Call Me By Your Name),,1,99,,0.29700,...,8,0.3840,-6.682,0,0.1520,178.817993,4,0.758,"[Lil Nas X, Lil Nas X, Lil Nas X, Lil Nas X]","[country rap, lgbtq+ hip hop, pop rap, queer c..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
243,3e9HZxeyfWwjeyPAMmWSSQ,1,207320,1,"thank u, next",,11,85,,0.22900,...,1,0.1010,-5.634,1,0.0658,106.966003,4,0.412,"[Ariana Grande, Ariana Grande, Ariana Grande]","[pop, post-teen pop, dance pop]"
244,1HNkqx9Ahdgi1Ixy2xkKkL,1,258986,0,Photograph,https://p.scdn.co/mp3-preview/097c7b735ceb4109...,6,85,,0.60700,...,4,0.0986,-10.480,1,0.0476,107.988998,4,0.201,"[Ed Sheeran, Ed Sheeran]","[pop, uk pop]"
245,40riOy7x9W7GXjyGp4pjAv,1,391376,0,Hotel California - 2013 Remaster,https://p.scdn.co/mp3-preview/50e82c99c20ffa42...,1,85,,0.00574,...,2,0.0575,-9.484,1,0.0270,147.125000,4,0.609,"[Eagles, Eagles, Eagles, Eagles, Eagles, Eagle...","[album rock, classic rock, country rock, folk ..."
246,7GX5flRQZVHRAGd6B4TmDO,1,182706,1,XO Tour Llif3,https://p.scdn.co/mp3-preview/7add10ee576e9017...,16,85,,0.00264,...,11,0.1090,-6.366,0,0.2310,155.095993,4,0.401,"[Lil Uzi Vert, Lil Uzi Vert, Lil Uzi Vert, Lil...","[melodic rap, philly rap, rap, trap]"


In [63]:
all_df['name_y'] = all_df['name_y'].apply(lambda x: list(dict.fromkeys(x)))
all_df['genre_id'] = all_df['genre_id'].apply(lambda x: list(dict.fromkeys(x)))
all_df.drop(columns=['duration_y'], inplace=True)
all_df.rename(columns={'name_x': 'name', 'name_y': 'artist', 'duration_x': 'duration'}, inplace=True)
all_df

Unnamed: 0,track_id,disc_number,duration,explicit,name,preview_url,track_number,popularity,is_playable,acousticness,...,key,liveness,loudness,mode,speechiness,tempo,time_signature,valence,artist,genre_id
0,4iJyoBOLtHqaGxP12qzhQI,1,198081,1,Peaches (feat. Daniel Caesar & Giveon),,12,100,,0.32100,...,0,0.4200,-6.181,1,0.1190,90.029999,4,0.464,"[Justin Bieber, Daniel Caesar, Giveon]","[canadian pop, pop, post-teen pop, canadian co..."
1,7lPN2DXiMsVn7XUKtOW1CS,1,242013,1,drivers license,,1,100,,0.72100,...,10,0.1050,-8.761,1,0.0601,143.873993,4,0.132,[Olivia Rodrigo],"[alt z, pop, post-teen pop]"
2,3VqeTFIvhxu3DIe4eZVzGq,1,164441,0,Butter,,1,100,,0.00323,...,8,0.0906,-5.187,1,0.0948,109.997002,4,0.695,[BTS],"[k-pop, k-pop boy group]"
3,67BtfxlNbhBmCDR2L2l8qd,1,137875,1,MONTERO (Call Me By Your Name),https://p.scdn.co/mp3-preview/fee76abaab413004...,1,99,,0.29700,...,8,0.3840,-6.682,0,0.1520,178.817993,4,0.758,[Lil Nas X],"[country rap, lgbtq+ hip hop, pop rap, queer c..."
4,3BZEcbdtXQSo7OrvKRJ6mb,1,137875,1,MONTERO (Call Me By Your Name),,1,99,,0.29700,...,8,0.3840,-6.682,0,0.1520,178.817993,4,0.758,[Lil Nas X],"[country rap, lgbtq+ hip hop, pop rap, queer c..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
243,3e9HZxeyfWwjeyPAMmWSSQ,1,207320,1,"thank u, next",,11,85,,0.22900,...,1,0.1010,-5.634,1,0.0658,106.966003,4,0.412,[Ariana Grande],"[pop, post-teen pop, dance pop]"
244,1HNkqx9Ahdgi1Ixy2xkKkL,1,258986,0,Photograph,https://p.scdn.co/mp3-preview/097c7b735ceb4109...,6,85,,0.60700,...,4,0.0986,-10.480,1,0.0476,107.988998,4,0.201,[Ed Sheeran],"[pop, uk pop]"
245,40riOy7x9W7GXjyGp4pjAv,1,391376,0,Hotel California - 2013 Remaster,https://p.scdn.co/mp3-preview/50e82c99c20ffa42...,1,85,,0.00574,...,2,0.0575,-9.484,1,0.0270,147.125000,4,0.609,[Eagles],"[album rock, classic rock, country rock, folk ..."
246,7GX5flRQZVHRAGd6B4TmDO,1,182706,1,XO Tour Llif3,https://p.scdn.co/mp3-preview/7add10ee576e9017...,16,85,,0.00264,...,11,0.1090,-6.366,0,0.2310,155.095993,4,0.401,[Lil Uzi Vert],"[melodic rap, philly rap, rap, trap]"


In [64]:
all_df.to_csv('top_songs.csv', index=False)

In [60]:
cursor.close()
conn.close()