In [1]:
import numpy as np
import pandas as pd
import psycopg2 
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func, inspect, desc
from config import username, password

In [2]:
# create engine
engine = create_engine(f'postgresql+psycopg2://{username}:{password}@aws-gt-dataviz-finalpg-001.cloqvwuqbywl.us-east-1.rds.amazonaws.com:5432/spotify_db')


In [3]:
# reflect an existing database into a new model
Base = automap_base()

# reflect the tables
Base.prepare(engine, reflect=True)

In [4]:
# We can view all of the classes that automap found
Base.classes.keys()

['artistgenre', 'kmodel', 'artists', 'genres', 'tracks', 'years']

In [5]:
# Save references to each table
Artistgenre = Base.classes.artistgenre
Artists = Base.classes.artists
Genres = Base.classes.genres
Tracks = Base.classes.tracks
Years = Base.classes.years

In [6]:
# look at columns in tables
inspector = inspect(engine)
columns = inspector.get_columns('tracks')
for column in columns:
    print(column["name"], column["type"])

acousticness DOUBLE PRECISION
artists TEXT
danceability DOUBLE PRECISION
duration_ms BIGINT
energy DOUBLE PRECISION
explicit BIGINT
id TEXT
instrumentalness DOUBLE PRECISION
key BIGINT
liveness DOUBLE PRECISION
loudness DOUBLE PRECISION
mode BIGINT
name TEXT
popularity BIGINT
release_date TEXT
speechiness DOUBLE PRECISION
tempo DOUBLE PRECISION
valence DOUBLE PRECISION
year BIGINT


In [7]:
# Create our session (link) from Python to the DB
session = Session(engine)

In [8]:
tracks_q = session.query(Tracks.id, Tracks.name, Tracks.artists, Tracks.acousticness,
                         Tracks.danceability, Tracks.energy, Tracks.instrumentalness,
                         Tracks.valence, Tracks.popularity, Tracks.year,
                         Tracks.key, Tracks.liveness, Tracks.loudness, Tracks.tempo).\
                        filter(Tracks.popularity > 20).all()

# store results in dataframe
tracks_kDF = pd.DataFrame(tracks_q, columns=['id', 'name', 'artists', 'acousticness',
                                             'danceability' ,'energy' ,'instrumentalness',
                                             'valence', 'popularity', 'year',
                                            'key', 'liveness', 'loudness', 'tempo'])

#  Key: 0 is C natural, 1 is C♯, 2 is D♮ and so on up to 11, which is B♮
#  Loudness: Values typical range between -60 and 0 db.

In [9]:
tracks_kDF['popularity'] = tracks_kDF['popularity']/1000
tracks_kDF['year'] = tracks_kDF['year']/2021/10
tracks_kDF['key'] = tracks_kDF['key']/11
tracks_kDF['loudness'] = tracks_kDF['loudness']/60
tracks_kDF['tempo'] = tracks_kDF['tempo']/244

In [10]:
tracks_kDF

Unnamed: 0,id,name,artists,acousticness,danceability,energy,instrumentalness,valence,popularity,year,key,liveness,loudness,tempo
0,2wAfHM7Whz67VFbdanhZlk,Nobody Knows You When You're Down and Out,['Bessie Smith'],0.99600,0.614,0.0423,0.002930,0.211,0.041,0.095151,0.363636,0.1830,-0.200550,0.368123
1,3eMrYc092k7SIJfWJ7oasR,Weather Bird,"['Louis Armstrong', 'Earl Hines']",0.98400,0.831,0.2620,0.912000,0.901,0.037,0.095151,0.727273,0.2040,-0.206433,0.428713
2,2AZgaYZSwUosJD71J2N2Zo,'Tain't Nobody's Bizness If I Do,['Bessie Smith'],0.99600,0.537,0.0443,0.000265,0.137,0.029,0.095151,0.272727,0.1520,-0.274567,0.329787
3,0V1iYWPXCBTaB6dhbiprGF,Send Me to the 'Lectric Chair,['Bessie Smith'],0.98600,0.771,0.0905,0.000141,0.601,0.025,0.095151,0.272727,0.1520,-0.129800,0.358898
4,6qRvnXftofjYJm1Mg98UWL,Need a Little Sugar in My Bowl,['Bessie Smith'],0.99200,0.693,0.0270,0.000000,0.402,0.026,0.095151,0.000000,0.1340,-0.225100,0.310447
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98463,3NUmUIyzNLBp8bCFMH8Mif,Waiting On A War,['Foo Fighters'],0.00984,0.530,0.7590,0.000000,0.502,0.069,0.100000,0.636364,0.3190,-0.117783,0.540980
98464,0fJ1caLzidzTlIL3pPX1eU,Precious' Tale,['Jazmine Sullivan'],0.71500,0.734,0.3460,0.000000,0.930,0.059,0.100000,0.181818,0.3940,-0.195367,0.364135
98465,3HSUqAErTyFQWLfLdnFVnB,Connexion,['ZAYN'],0.49800,0.597,0.3680,0.000000,0.590,0.052,0.100000,0.181818,0.1090,-0.169183,0.704836
98466,660rulYF3eLCuW6rQpiMdL,Little Boy,['Ashnikko'],0.10500,0.781,0.4870,0.000000,0.327,0.061,0.100000,0.090909,0.0802,-0.121683,0.532545


In [11]:
tracks_kDF.describe()

Unnamed: 0,acousticness,danceability,energy,instrumentalness,valence,popularity,year,key,liveness,loudness,tempo
count,98468.0,98468.0,98468.0,98468.0,98468.0,98468.0,98468.0,98468.0,98468.0,98468.0,98468.0
mean,0.37253,0.548568,0.555895,0.114267,0.544981,0.042022,0.09831,0.473339,0.201544,-0.173715,0.489701
std,0.334264,0.173603,0.253578,0.262269,0.261449,0.014184,0.000839,0.320456,0.178854,0.08454,0.122758
min,0.0,0.0,0.0,0.0,0.0,0.021,0.095002,0.0,0.0,-1.0,0.0
25%,0.0498,0.431,0.365,0.0,0.336,0.031,0.097674,0.181818,0.0935,-0.218033,0.394701
50%,0.281,0.557,0.568,9e-05,0.555,0.04,0.098268,0.454545,0.13,-0.160283,0.480412
75%,0.679,0.674,0.763,0.0185,0.766,0.052,0.098911,0.727273,0.254,-0.112296,0.564842
max,0.996,0.988,1.0,1.0,1.0,0.1,0.1,1.0,1.0,0.0624,0.99798


In [12]:
session.close()

In [13]:
len(tracks_kDF)

98468

In [14]:
tracks_kDF['artists'].dtypes

dtype('O')

In [15]:
tracks_kDF['artists'] = tracks_kDF['artists'].str.strip("['']").astype(str)
tracks_kDF

Unnamed: 0,id,name,artists,acousticness,danceability,energy,instrumentalness,valence,popularity,year,key,liveness,loudness,tempo
0,2wAfHM7Whz67VFbdanhZlk,Nobody Knows You When You're Down and Out,Bessie Smith,0.99600,0.614,0.0423,0.002930,0.211,0.041,0.095151,0.363636,0.1830,-0.200550,0.368123
1,3eMrYc092k7SIJfWJ7oasR,Weather Bird,"Louis Armstrong', 'Earl Hines",0.98400,0.831,0.2620,0.912000,0.901,0.037,0.095151,0.727273,0.2040,-0.206433,0.428713
2,2AZgaYZSwUosJD71J2N2Zo,'Tain't Nobody's Bizness If I Do,Bessie Smith,0.99600,0.537,0.0443,0.000265,0.137,0.029,0.095151,0.272727,0.1520,-0.274567,0.329787
3,0V1iYWPXCBTaB6dhbiprGF,Send Me to the 'Lectric Chair,Bessie Smith,0.98600,0.771,0.0905,0.000141,0.601,0.025,0.095151,0.272727,0.1520,-0.129800,0.358898
4,6qRvnXftofjYJm1Mg98UWL,Need a Little Sugar in My Bowl,Bessie Smith,0.99200,0.693,0.0270,0.000000,0.402,0.026,0.095151,0.000000,0.1340,-0.225100,0.310447
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98463,3NUmUIyzNLBp8bCFMH8Mif,Waiting On A War,Foo Fighters,0.00984,0.530,0.7590,0.000000,0.502,0.069,0.100000,0.636364,0.3190,-0.117783,0.540980
98464,0fJ1caLzidzTlIL3pPX1eU,Precious' Tale,Jazmine Sullivan,0.71500,0.734,0.3460,0.000000,0.930,0.059,0.100000,0.181818,0.3940,-0.195367,0.364135
98465,3HSUqAErTyFQWLfLdnFVnB,Connexion,ZAYN,0.49800,0.597,0.3680,0.000000,0.590,0.052,0.100000,0.181818,0.1090,-0.169183,0.704836
98466,660rulYF3eLCuW6rQpiMdL,Little Boy,Ashnikko,0.10500,0.781,0.4870,0.000000,0.327,0.061,0.100000,0.090909,0.0802,-0.121683,0.532545


In [16]:
tracks_kDF['artists'] = tracks_kDF['artists'].apply(lambda x: x.replace("', '",", ")).astype(str) 
tracks_kDF

Unnamed: 0,id,name,artists,acousticness,danceability,energy,instrumentalness,valence,popularity,year,key,liveness,loudness,tempo
0,2wAfHM7Whz67VFbdanhZlk,Nobody Knows You When You're Down and Out,Bessie Smith,0.99600,0.614,0.0423,0.002930,0.211,0.041,0.095151,0.363636,0.1830,-0.200550,0.368123
1,3eMrYc092k7SIJfWJ7oasR,Weather Bird,"Louis Armstrong, Earl Hines",0.98400,0.831,0.2620,0.912000,0.901,0.037,0.095151,0.727273,0.2040,-0.206433,0.428713
2,2AZgaYZSwUosJD71J2N2Zo,'Tain't Nobody's Bizness If I Do,Bessie Smith,0.99600,0.537,0.0443,0.000265,0.137,0.029,0.095151,0.272727,0.1520,-0.274567,0.329787
3,0V1iYWPXCBTaB6dhbiprGF,Send Me to the 'Lectric Chair,Bessie Smith,0.98600,0.771,0.0905,0.000141,0.601,0.025,0.095151,0.272727,0.1520,-0.129800,0.358898
4,6qRvnXftofjYJm1Mg98UWL,Need a Little Sugar in My Bowl,Bessie Smith,0.99200,0.693,0.0270,0.000000,0.402,0.026,0.095151,0.000000,0.1340,-0.225100,0.310447
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98463,3NUmUIyzNLBp8bCFMH8Mif,Waiting On A War,Foo Fighters,0.00984,0.530,0.7590,0.000000,0.502,0.069,0.100000,0.636364,0.3190,-0.117783,0.540980
98464,0fJ1caLzidzTlIL3pPX1eU,Precious' Tale,Jazmine Sullivan,0.71500,0.734,0.3460,0.000000,0.930,0.059,0.100000,0.181818,0.3940,-0.195367,0.364135
98465,3HSUqAErTyFQWLfLdnFVnB,Connexion,ZAYN,0.49800,0.597,0.3680,0.000000,0.590,0.052,0.100000,0.181818,0.1090,-0.169183,0.704836
98466,660rulYF3eLCuW6rQpiMdL,Little Boy,Ashnikko,0.10500,0.781,0.4870,0.000000,0.327,0.061,0.100000,0.090909,0.0802,-0.121683,0.532545


In [17]:
tracks_kDF['artists'] = tracks_kDF['artists'].apply(lambda x: x.replace('"[""','').replace('""]"','').replace('"','')).astype(str) 


In [18]:
tracks_kDF

Unnamed: 0,id,name,artists,acousticness,danceability,energy,instrumentalness,valence,popularity,year,key,liveness,loudness,tempo
0,2wAfHM7Whz67VFbdanhZlk,Nobody Knows You When You're Down and Out,Bessie Smith,0.99600,0.614,0.0423,0.002930,0.211,0.041,0.095151,0.363636,0.1830,-0.200550,0.368123
1,3eMrYc092k7SIJfWJ7oasR,Weather Bird,"Louis Armstrong, Earl Hines",0.98400,0.831,0.2620,0.912000,0.901,0.037,0.095151,0.727273,0.2040,-0.206433,0.428713
2,2AZgaYZSwUosJD71J2N2Zo,'Tain't Nobody's Bizness If I Do,Bessie Smith,0.99600,0.537,0.0443,0.000265,0.137,0.029,0.095151,0.272727,0.1520,-0.274567,0.329787
3,0V1iYWPXCBTaB6dhbiprGF,Send Me to the 'Lectric Chair,Bessie Smith,0.98600,0.771,0.0905,0.000141,0.601,0.025,0.095151,0.272727,0.1520,-0.129800,0.358898
4,6qRvnXftofjYJm1Mg98UWL,Need a Little Sugar in My Bowl,Bessie Smith,0.99200,0.693,0.0270,0.000000,0.402,0.026,0.095151,0.000000,0.1340,-0.225100,0.310447
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98463,3NUmUIyzNLBp8bCFMH8Mif,Waiting On A War,Foo Fighters,0.00984,0.530,0.7590,0.000000,0.502,0.069,0.100000,0.636364,0.3190,-0.117783,0.540980
98464,0fJ1caLzidzTlIL3pPX1eU,Precious' Tale,Jazmine Sullivan,0.71500,0.734,0.3460,0.000000,0.930,0.059,0.100000,0.181818,0.3940,-0.195367,0.364135
98465,3HSUqAErTyFQWLfLdnFVnB,Connexion,ZAYN,0.49800,0.597,0.3680,0.000000,0.590,0.052,0.100000,0.181818,0.1090,-0.169183,0.704836
98466,660rulYF3eLCuW6rQpiMdL,Little Boy,Ashnikko,0.10500,0.781,0.4870,0.000000,0.327,0.061,0.100000,0.090909,0.0802,-0.121683,0.532545


In [19]:
tracks_kDF[tracks_kDF['artists'].str.contains("B-52")]

Unnamed: 0,id,name,artists,acousticness,danceability,energy,instrumentalness,valence,popularity,year,key,liveness,loudness,tempo
5881,2Q5wSOwq6BDSu7sSVMNrtT,Rock Lobster,The B-52's,0.00884,0.592,0.882,0.0155,0.442,0.058,0.097922,0.727273,0.0506,-0.138583,0.377693
6119,1y5V5qja332UyMeUurFhDS,Private Idaho,The B-52's,0.00724,0.551,0.882,1.1e-05,0.888,0.055,0.097971,0.181818,0.322,-0.113467,0.68198
7817,4W4wYHtsrgDiivRASVOINL,Love Shack,The B-52's,0.0325,0.715,0.817,2e-06,0.871,0.07,0.098417,0.454545,0.851,-0.103783,0.548598
7900,5fqcIHU6DhQtFKVO5XSdQs,Roam,The B-52's,0.0447,0.631,0.938,8e-06,0.682,0.056,0.098417,0.818182,0.3,-0.092883,0.552934
7938,64B4UbGRLtGRBtiN2m8OvF,Love Shack - Edit,The B-52's,0.142,0.701,0.792,1e-06,0.9,0.055,0.098417,0.636364,0.764,-0.144067,0.54693
17436,207qLd1ovrqfYVBBDrNMUM,Dance This Mess Around,The B-52's,0.0168,0.608,0.712,0.000369,0.542,0.047,0.097922,0.272727,0.129,-0.132617,0.660131
17476,5R0v2GLujsGk37QCkGdvMQ,Rock Lobster,The B-52's,0.0111,0.554,0.815,0.0486,0.6,0.045,0.097922,0.090909,0.0511,-0.208017,0.372496
17529,5aC0QmgjYGG8QYgAg06nTk,Planet Claire,The B-52's,0.0529,0.715,0.549,0.554,0.734,0.045,0.097922,0.363636,0.399,-0.1578,0.581127
17610,0hV8cbzJubHKLSFIsBnYUo,52 Girls,The B-52's,0.000855,0.635,0.752,0.00858,0.848,0.04,0.097922,0.636364,0.0326,-0.110217,0.414262
17683,4vKLfj7YuS2yRKAJ9Nn0Id,Give Me Back My Man,The B-52's,0.0137,0.571,0.96,0.00817,0.802,0.043,0.097971,0.636364,0.0591,-0.091633,0.665152


In [21]:
tracks_kDF['artists'] = tracks_kDF['artists'].str.strip("['']").astype(str)
tracks_kDF

Unnamed: 0,id,name,artists,acousticness,danceability,energy,instrumentalness,valence,popularity,year,key,liveness,loudness,tempo
0,2wAfHM7Whz67VFbdanhZlk,Nobody Knows You When You're Down and Out,Bessie Smith,0.99600,0.614,0.0423,0.002930,0.211,0.041,0.095151,0.363636,0.1830,-0.200550,0.368123
1,3eMrYc092k7SIJfWJ7oasR,Weather Bird,"Louis Armstrong, Earl Hines",0.98400,0.831,0.2620,0.912000,0.901,0.037,0.095151,0.727273,0.2040,-0.206433,0.428713
2,2AZgaYZSwUosJD71J2N2Zo,'Tain't Nobody's Bizness If I Do,Bessie Smith,0.99600,0.537,0.0443,0.000265,0.137,0.029,0.095151,0.272727,0.1520,-0.274567,0.329787
3,0V1iYWPXCBTaB6dhbiprGF,Send Me to the 'Lectric Chair,Bessie Smith,0.98600,0.771,0.0905,0.000141,0.601,0.025,0.095151,0.272727,0.1520,-0.129800,0.358898
4,6qRvnXftofjYJm1Mg98UWL,Need a Little Sugar in My Bowl,Bessie Smith,0.99200,0.693,0.0270,0.000000,0.402,0.026,0.095151,0.000000,0.1340,-0.225100,0.310447
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98463,3NUmUIyzNLBp8bCFMH8Mif,Waiting On A War,Foo Fighters,0.00984,0.530,0.7590,0.000000,0.502,0.069,0.100000,0.636364,0.3190,-0.117783,0.540980
98464,0fJ1caLzidzTlIL3pPX1eU,Precious' Tale,Jazmine Sullivan,0.71500,0.734,0.3460,0.000000,0.930,0.059,0.100000,0.181818,0.3940,-0.195367,0.364135
98465,3HSUqAErTyFQWLfLdnFVnB,Connexion,ZAYN,0.49800,0.597,0.3680,0.000000,0.590,0.052,0.100000,0.181818,0.1090,-0.169183,0.704836
98466,660rulYF3eLCuW6rQpiMdL,Little Boy,Ashnikko,0.10500,0.781,0.4870,0.000000,0.327,0.061,0.100000,0.090909,0.0802,-0.121683,0.532545


In [22]:
tracks_kDF[tracks_kDF['artists'].str.contains("Pickett")]


Unnamed: 0,id,name,artists,acousticness,danceability,energy,instrumentalness,valence,popularity,year,key,liveness,loudness,tempo
2495,0xxZY5C9xxij3D1HkzbnfC,Monster Mash,"Bobby Boris Pickett, The Crypt-Kickers",0.375,0.541,0.504,0.0,0.707,0.053,0.097081,1.0,0.421,-0.236517,0.280664
2544,6GmL39a9OazWtyMkAbJz7v,Monster Mash,Bobby Boris Pickett,0.113,0.66,0.542,0.0,0.592,0.05,0.097081,1.0,0.346,-0.225183,0.576316
3128,4NRQwaks9r58tTDvr4iEyv,In the Midnight Hour,Wilson Pickett,0.12,0.75,0.444,4e-06,0.849,0.061,0.097229,0.363636,0.118,-0.143833,0.458684
3353,76ICmoJ4PcoMWoooaTxnQs,Land of 1000 Dances,Wilson Pickett,0.0128,0.618,0.588,0.0266,0.768,0.061,0.097279,0.181818,0.351,-0.193733,0.35616
3915,1MMp1H2Kib2BCDtdL5nL63,Hey Jude,Wilson Pickett,0.146,0.561,0.385,0.000496,0.664,0.053,0.097427,0.545455,0.171,-0.2555,0.334422
4186,79krwDoFzJ6dbNRwceAwgH,Don't Let the Green Grass Fool You,Wilson Pickett,0.442,0.573,0.707,0.000241,0.961,0.051,0.097476,0.545455,0.123,-0.138167,0.624865
14233,3W3FDMXmY4mzCg7IMus1ZW,Transylvania Twist,"Bobby Boris Pickett, The Crypt-Kickers",0.12,0.615,0.663,0.673,0.971,0.027,0.097081,0.636364,0.06,-0.201283,0.386455
14264,45WXeYmMCNqnR1ZqLlFpWB,Skully Gully,"Bobby Boris Pickett, The Crypt-Kickers",0.205,0.744,0.413,0.0515,0.743,0.026,0.097081,0.0,0.363,-0.1949,0.502193
14327,3fLAkdIr3hLjb9Ft5nvOiH,Monster's Holiday,"Bobby Boris Pickett, The Crypt-Kickers",0.47,0.715,0.453,0.0,0.68,0.023,0.097081,0.0,0.193,-0.228533,0.546414
20073,7mRak6wBx9OGKXr3zStoHW,Mustang Sally,Wilson Pickett,0.0906,0.76,0.522,3e-06,0.551,0.052,0.098565,0.0,0.174,-0.1348,0.449758


In [23]:
tracks_kDF[tracks_kDF['artists'].str.contains("Yankovic")]

Unnamed: 0,id,name,artists,acousticness,danceability,energy,instrumentalness,valence,popularity,year,key,liveness,loudness,tempo
493,55sdccuwTv6aPlwz39UVso,Too Fat Polka,Frankie Yankovic,0.655,0.791,0.507,0.0,0.964,0.021,0.096338,0.636364,0.0943,-0.178767,0.514307
9382,5r96TaQquRrlo3Ym3ZlSL2,"""Amish Paradise (Parody of """"Gangsta's Paradis...",Weird Al Yankovic,0.103,0.728,0.448,0.0,0.483,0.054,0.098763,0.727273,0.267,-0.175667,0.331566
19267,4is3oF4FlWmedh3TK6Ke7z,Fat,Weird Al Yankovic,0.166,0.87,0.551,3.7e-05,0.409,0.046,0.098367,0.545455,0.0642,-0.17605,0.479164
21531,74sUbOF9Zm8LdGUJjxleTl,"""The Saga Begins (Lyrical Adaption of """"Americ...",Weird Al Yankovic,0.332,0.487,0.429,0.0,0.508,0.051,0.098911,0.363636,0.207,-0.145267,0.588451
25711,7fGW74qgJrknzuhQ4A5foT,In Heaven There Is No Beer,Frankie Yankovic & His Yanks,0.348,0.586,0.585,0.00028,0.961,0.026,0.097229,0.454545,0.0611,-0.1689,0.510586
29061,77exFA9gOKLvj6yhyX07HD,My Bologna,Weird Al Yankovic,0.162,0.689,0.874,1e-06,0.648,0.04,0.09812,0.818182,0.0565,-0.138617,0.392119
29200,6tBzYurAiGkaGopgYPdNo7,I Love Rocky Road,Weird Al Yankovic,0.429,0.829,0.727,0.0,0.907,0.036,0.09812,0.636364,0.0709,-0.143433,0.416574
29330,7uwJC9ngTvHYBtk1DH0aBr,Eat It,Weird Al Yankovic,0.16,0.751,0.768,0.0,0.881,0.04,0.098169,0.636364,0.0409,-0.163883,0.605156
29461,2QuYig9VyECgbJIHHxYirK,Dare to Be Stupid,Weird Al Yankovic,0.113,0.638,0.961,9e-06,0.775,0.039,0.098219,0.363636,0.352,-0.110667,0.367795
29552,3gH52R54Atk3CF41PJMhFB,Yoda,Weird Al Yankovic,0.0499,0.561,0.841,0.0,0.75,0.037,0.098219,0.818182,0.232,-0.150783,0.656426


In [24]:
# Kmeans

X = tracks_kDF.select_dtypes(np.number)
number_cols = list(X.columns)
tracks_fitted = KMeans(n_clusters=2200, init='k-means++', verbose=1).fit(X)


Initialization complete
Iteration 0, inertia 3871.940327419935
Iteration 1, inertia 3438.800566783255
Iteration 2, inertia 3336.0923259148744
Iteration 3, inertia 3286.302140714603
Iteration 4, inertia 3256.0639024691927
Iteration 5, inertia 3234.9374437694473
Iteration 6, inertia 3220.349579987637
Iteration 7, inertia 3209.5831735125635
Iteration 8, inertia 3201.310751979905
Iteration 9, inertia 3194.4322452361193
Iteration 10, inertia 3188.8947805778466
Iteration 11, inertia 3184.3850106388713
Iteration 12, inertia 3180.8236875449425
Iteration 13, inertia 3177.931600764323
Iteration 14, inertia 3175.393545398011
Iteration 15, inertia 3173.2271299233325
Iteration 16, inertia 3171.647244557804
Iteration 17, inertia 3170.467857089073
Iteration 18, inertia 3169.2965890439796
Iteration 19, inertia 3168.3358667633643
Iteration 20, inertia 3167.4768635660675
Iteration 21, inertia 3166.7585853912324
Iteration 22, inertia 3166.1152781303645
Iteration 23, inertia 3165.595875988755
Iteration 24

Iteration 33, inertia 3157.1954508572526
Iteration 34, inertia 3157.0785394461614
Iteration 35, inertia 3156.9214187707576
Iteration 36, inertia 3156.8152404484504
Iteration 37, inertia 3156.7338954822076
Iteration 38, inertia 3156.672755707679
Iteration 39, inertia 3156.5971953380517
Iteration 40, inertia 3156.50030635353
Iteration 41, inertia 3156.436553253656
Iteration 42, inertia 3156.37701287536
Iteration 43, inertia 3156.328607754744
Iteration 44, inertia 3156.298993823218
Iteration 45, inertia 3156.2740782591973
Iteration 46, inertia 3156.2557579233835
Iteration 47, inertia 3156.237923439992
Iteration 48, inertia 3156.2049928914766
Iteration 49, inertia 3156.1723841039316
Iteration 50, inertia 3156.1432324665616
Iteration 51, inertia 3156.114192610116
Iteration 52, inertia 3156.0902976756483
Iteration 53, inertia 3156.076206843485
Iteration 54, inertia 3156.0671551786336
Iteration 55, inertia 3156.0603726355225
Iteration 56, inertia 3156.0533453778985
Iteration 57, inertia 3156.

Iteration 10, inertia 3187.1183081177223
Iteration 11, inertia 3183.103993399801
Iteration 12, inertia 3179.8005753854236
Iteration 13, inertia 3177.0846110815683
Iteration 14, inertia 3174.9069329903446
Iteration 15, inertia 3172.842098847076
Iteration 16, inertia 3171.2498445690303
Iteration 17, inertia 3169.9607588161593
Iteration 18, inertia 3168.884553989844
Iteration 19, inertia 3167.955822335324
Iteration 20, inertia 3167.2030738348817
Iteration 21, inertia 3166.5224692571755
Iteration 22, inertia 3165.8078742648977
Iteration 23, inertia 3165.2710073202084
Iteration 24, inertia 3164.7770861411404
Iteration 25, inertia 3164.3589004525925
Iteration 26, inertia 3164.051934479136
Iteration 27, inertia 3163.7475590175827
Iteration 28, inertia 3163.4474049249807
Iteration 29, inertia 3163.1263962559856
Iteration 30, inertia 3162.8885931866457
Iteration 31, inertia 3162.671954716784
Iteration 32, inertia 3162.471226374482
Iteration 33, inertia 3162.2606228736413
Iteration 34, inertia 3

Iteration 14, inertia 3172.7125389746184
Iteration 15, inertia 3170.502458581446
Iteration 16, inertia 3168.49006053945
Iteration 17, inertia 3166.6646683214694
Iteration 18, inertia 3165.025497786384
Iteration 19, inertia 3163.6730365080316
Iteration 20, inertia 3162.5509685117268
Iteration 21, inertia 3161.4994063731997
Iteration 22, inertia 3160.585177354021
Iteration 23, inertia 3159.8443763657333
Iteration 24, inertia 3159.164250134119
Iteration 25, inertia 3158.616569824246
Iteration 26, inertia 3158.217971170485
Iteration 27, inertia 3157.8833132758477
Iteration 28, inertia 3157.582268825727
Iteration 29, inertia 3157.312668723519
Iteration 30, inertia 3157.1029248399873
Iteration 31, inertia 3156.931118984224
Iteration 32, inertia 3156.780463892532
Iteration 33, inertia 3156.6032465536287
Iteration 34, inertia 3156.428303296837
Iteration 35, inertia 3156.2570009725137
Iteration 36, inertia 3156.1054148005283
Iteration 37, inertia 3156.005010375546
Iteration 38, inertia 3155.896

In [25]:
song_cluster_labels = tracks_fitted.predict(X)


In [26]:
tracks_kDF['cluster_label'] = song_cluster_labels

In [27]:
# save model to csv
model_k = tracks_kDF[['id', 'name', 'artists', 'acousticness',
                                             'danceability' ,'energy' ,'instrumentalness',
                                             'valence', 'popularity', 'year',
                                            'key', 'liveness', 'loudness', 'tempo', 'cluster_label']]
model_k.to_csv('chose2200_Kmodel.csv', index=False)

# Explore Model

In [28]:
# tracks_kDF.loc[tracks_kDF['name']== "Hotel California"]
tracks_kDF[tracks_kDF['name'].str.contains("Hotel California")]

Unnamed: 0,id,name,artists,acousticness,danceability,energy,instrumentalness,valence,popularity,year,key,liveness,loudness,tempo,cluster_label
5211,40riOy7x9W7GXjyGp4pjAv,Hotel California - 2013 Remaster,Eagles,0.00574,0.579,0.508,0.000494,0.609,0.083,0.097773,0.181818,0.0575,-0.158067,0.602971,831
5348,0cNwyA4Qiyr29I90ezhr0X,"Hotel California - Live at The Forum, Los Ange...",Eagles,0.0145,0.427,0.665,0.00242,0.442,0.059,0.097773,0.181818,0.699,-0.141917,0.584844,822
10539,4Vn7TykT27PIygBiZjTR2s,Hotel California - 2013 Remaster,Eagles,0.00574,0.579,0.508,0.000494,0.609,0.061,0.099109,0.181818,0.0575,-0.158067,0.602971,831
17722,5JU7FgGJqgGPUaJA2nhZvm,Hotel California - Live; 1999 Remaster,Eagles,0.0136,0.411,0.855,0.0374,0.547,0.05,0.097971,0.181818,0.988,-0.110967,0.618578,1159
18711,2vIRpCpsekN5TiWq3Ua2Ht,Hotel California - 2013 Remaster,Eagles,0.00574,0.579,0.508,0.000494,0.609,0.044,0.098219,0.181818,0.0575,-0.158067,0.602971,831
20535,2GpBrAoCwt48fxjgjlzMd4,"Hotel California - Live On MTV, 1994",Eagles,0.368,0.55,0.49,0.0001,0.609,0.061,0.098664,0.181818,0.917,-0.178867,0.305967,2194
37893,21DMNxn3lLWYqaTm3MeGAn,Hotel California - 2013 Remaster,Eagles,0.00574,0.579,0.508,0.000494,0.609,0.041,0.097773,0.181818,0.0575,-0.158067,0.602971,831
42764,4Rvhe8O90hFIExTJkdrRPM,Hotel California (Spanish Mix),Gipsy Kings,0.533,0.456,0.822,3e-05,0.531,0.046,0.098961,1.0,0.103,-0.15115,0.41857,844


In [None]:
tracks_kDF['search']= tracks_kDF["artists"].str.find('Extreme')

# All Time Low, Taylor Swift, Fleetwood Mac, Luke Bryan, Dan + Shay

In [None]:
tracks_kDF.loc[tracks_kDF['search'] != -1].sort_values(by = 'popularity', ascending=False)

In [None]:
tracks_kDF.loc[tracks_kDF['search'] != -1]["cluster_label"].unique()

In [29]:
recommended = tracks_kDF.loc[tracks_kDF['cluster_label']== 2194].sort_values(by = 'popularity', ascending=False)
recommended

Unnamed: 0,id,name,artists,acousticness,danceability,energy,instrumentalness,valence,popularity,year,key,liveness,loudness,tempo,cluster_label
8462,3BEZCNZSmVv30vsMNSOCri,"Layla - Acoustic; Live at MTV Unplugged, Bray ...",Eric Clapton,0.321,0.558,0.524,0.000952,0.636,0.07,0.098565,0.181818,0.94,-0.193767,0.383828,2194
10384,3Jq9rsqmRtqcVKtzP9dnxZ,The Man Comes Around,Johnny Cash,0.52,0.635,0.584,0.0951,0.753,0.063,0.09906,0.0,0.962,-0.131867,0.397816,2194
20535,2GpBrAoCwt48fxjgjlzMd4,"Hotel California - Live On MTV, 1994",Eagles,0.368,0.55,0.49,0.0001,0.609,0.061,0.098664,0.181818,0.917,-0.178867,0.305967,2194
30859,0Se82Sof9IOUY4VdR9un2A,Nobody Knows You When You're Down and Out - Ac...,Eric Clapton,0.271,0.667,0.587,5e-06,0.691,0.054,0.098565,0.181818,0.896,-0.174167,0.365533,2194
16624,13oxThgkN4ylWbI1UukQ55,Reasons - Live,"Earth, Wind & Fire",0.453,0.49,0.549,7.9e-05,0.766,0.049,0.097724,0.272727,0.934,-0.157933,0.529664,2194
31612,4WEvmSK0jr7WnwsPbevF3h,PopurrÃ­ De Juan Gabriel,Paco BarrÃ³n y sus NorteÃ±os Clan,0.439,0.574,0.724,0.0,0.827,0.047,0.098714,0.181818,0.888,-0.103233,0.341783,2194
95270,7KQJyqHkmOk6LTf9OtooYc,El Rey - En Vivo,Luis Miguel,0.435,0.473,0.619,0.0,0.698,0.045,0.098714,0.181818,0.963,-0.137367,0.400098,2194
96618,6ef95vgK7yUu6Bg61m2BsJ,El Bato Gacho,Las Jilguerillas,0.43,0.572,0.468,1.9e-05,0.886,0.044,0.099109,0.181818,0.863,-0.150283,0.51707,2194
51981,3v8J7zbQIE6979t9Bk4Q8W,Popurri De Bronco,Paco BarrÃ³n y sus NorteÃ±os Clan,0.386,0.66,0.619,0.0,0.845,0.043,0.098812,0.181818,0.927,-0.144533,0.322418,2194
94694,4abPGwvqR26EVyXJI72VRP,"Layla - Acoustic; Live at MTV Unplugged, Bray ...",Eric Clapton,0.321,0.558,0.524,0.000952,0.636,0.038,0.098565,0.181818,0.94,-0.193767,0.383828,2194


In [None]:
tracks_kDF.describe()

In [None]:
len(recommended)

In [None]:
for i, row in recommended.iterrows():
    print(f"{row['name']} - {row['artists']}")

In [None]:
Sum_of_squared_distances = []
K = range(100,2500,100)
for k in K:
    km = KMeans(n_clusters=k)
    km = km.fit(X)
    Sum_of_squared_distances.append(km.inertia_)
    print(k)

In [None]:
# sample elbow after added columns
plt.plot(K, Sum_of_squared_distances, 'bx-')
plt.xlabel('k')
plt.ylabel('Sum_of_squared_distances')
plt.title('Elbow Method For Optimal k')

plt.savefig('ElbowMethod-addedCol-max2500.png')
plt.show()

In [None]:
for i in range(1,10):
    print(f'{K[i]} Clusters - {Sum_of_squared_distances[i]} inertia')

In [None]:
#don't mess with this one
import matplotlib.pyplot as plt

plt.plot(K, Sum_of_squared_distances, 'bx-')
plt.xlabel('k')
plt.ylabel('Sum_of_squared_distances')
plt.title('Elbow Method For Optimal k')

plt.savefig('ElbowMethodK.png')
plt.show()

In [None]:
for i in range(1,10):
    print(f'{K[i]} Clusters - {Sum_of_squared_distances[i]} inertia')

In [None]:
# Visualizing the Clusters with PCA
import plotly.express as px 
from sklearn.decomposition import PCA


song_embedding = PCA(n_components=2).fit_transform(X)
projection = pd.DataFrame(columns=['x', 'y'], data=song_embedding)
projection['title'] = tracks_kDF['name']
projection['cluster'] = tracks_kDF['cluster_label']

fig = px.scatter(projection, x='x', y='y', color='cluster', hover_data=['x', 'y', 'title'])
fig.show()