In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd
from sklearn import cluster, datasets
from sklearn.preprocessing import StandardScaler
from matplotlib.lines import Line2D
import random
from bs4 import BeautifulSoup as bs4
import requests
from sklearn.metrics import silhouette_score
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from time import sleep
from random import randint
import time

### making clusters

In [2]:
data = pd.read_csv("songlists.csv")
data = data.dropna()
X = data.select_dtypes(include=np.number)

In [3]:
scaler = StandardScaler().fit(X)
X_prep = scaler.transform(X)

### find proper "k"

choose k

In [None]:
K = range(2, 20)
inertia = []

for k in K:
    kmeans = cluster.KMeans(n_clusters=k,
                    random_state=1234)
    kmeans.fit(X_prep)
    inertia.append(kmeans.inertia_)

plt.figure(figsize=(16,8))
plt.plot(K, inertia, 'bx-')
plt.xlabel('k')
plt.ylabel('inertia')
plt.xticks(np.arange(min(K), max(K)+1, 1.0))
plt.title('Elbow Method showing the optimal k')

In [None]:
K = range(2, 20)
silhouette = []

for k in K:
    kmeans = cluster.KMeans(n_clusters=k,
                    random_state=1234)
    kmeans.fit(X_prep)
    silhouette.append(silhouette_score(X_prep, kmeans.predict(X_prep)))

plt.figure(figsize=(16,8))
plt.plot(K, silhouette, 'bx-')
plt.xlabel('k')
plt.ylabel('silhouette score')
plt.xticks(np.arange(min(K), max(K)+1, 1.0))
plt.title('Slhouette score showing the optimal k')

### continue with the work

In [4]:
kmeans = cluster.KMeans(n_clusters=9)
kmeans.fit(X_prep)
pred = kmeans.predict(X_prep).tolist()

### checking actual top 100 songs

In [6]:
url = "https://www.popvortex.com/music/charts/top-100-songs.php"
response = requests.get(url)
contents = response.content
soup = bs4(contents, "html.parser")

In [7]:
result = soup.find("div",{"class":"chart-wrapper"})
titles = []
artists = []
for i in range(1,101,1):
    i_id = "chart-position-" + str(i)
    song = result.find("div",{"id":i_id})
    title = song.find("p",{"class":"title-artist"}).find("cite").text
    artist = song.find("p",{"class":"title-artist"}).find("em").text
    titles.append(title)
    artists.append(artist)
top100 = pd.DataFrame({"title":titles,"artist":artists})
top100.head()

Unnamed: 0,title,artist
0,"Shakira: Bzrp Music Sessions, Vol. 53",Bizarrap & Shakira
1,Anti-Hero,Taylor Swift
2,Unholy,Sam Smith & Kim Petras
3,People Get Ready,Jeff Beck & Rod Stewart
4,Son Of A Sinner,Jelly Roll


### getting input from user

In [9]:
def getrandom(df):
    x = df.shape[0]
    x = random.randint(0,x)
    rec_title = df.iloc[x,0]
    rec_artist = df.iloc[x,1]
    return rec_title, rec_artist

In [10]:
t = input("type in one of your favorite song: ").lower()
a = input("type in the name of the artist: ").lower()
reactions = ["Nice one!","Great choice!","Not bad!","Interesting!","Cool!"]

### checking if input is in the top 100

In [11]:
title, artist = getrandom(top100)
recommend = False
if top100["title"].str.contains(t).any() or top100["artist"].str.contains(a).any():
    comment = random.choice(reactions)
    print(comment, "Maybe you'll like this song:\n\n" + title.title() + ", by", artist.title())
else:
    recommend = True
    print("Oh I think I know that one! That's a good choice for music.")

Oh I think I know that one! That's a good choice for music.


### getting songinfo from spotify

In [13]:
secrets_file = open("secrets.txt","r")
string = secrets_file.read()
secrets_dict={}
for line in string.split('\n'):
    if len(line) > 0:
        secrets_dict[line.split(':')[0]]=line.split(':')[1].strip()

sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=secrets_dict['clientid'],
                                                           client_secret=secrets_dict['clientsecret']))

In [38]:
def getsonginfo(t,a):
    track = sp.search(q=t+" "+a, limit=1)
    title = track["tracks"]["items"][0]["name"]
    artist = []
    artists = track["tracks"]["items"][0]["artists"]
    for i in artists:
        artist_n = i["name"]
        artist.append(artist_n)
    artist = ", ".join(artist)
    uri = track["tracks"]["items"][0]["uri"]
    songinfo = {"title":title, "artist":artist, "uri":uri}
    songinfo = pd.DataFrame(songinfo, index = [0])
    songuri = sp.audio_features(uri)
    songuri = pd.DataFrame(songuri)
    df = pd.concat([songinfo, songuri], axis = 1)
    return df

def getfromcluster(t,a, data):
    df = getsonginfo(t,a)
    x = df.select_dtypes(include=np.number)
    # x = x[[cols]]
    x_prep = scaler.transform(x)
    pred = kmeans.predict(x_prep)
    selection = data[data.cluster == pred[0]]
    title, artist = getrandom(data)
    return title, artist

### getting recommendation(s)

In [59]:
data["cluster"] = pred
rec_title, rec_artist = getfromcluster(t,a,data)
print("You might like this song:\n\n" + rec_title + ", by", rec_artist)
print("\n...Not to your liking? Get more song recommendations! (run this cell)")

You might like this song:

Liquid Love, by David Douglas

...Not to your liking? Get more song recommendations! (run this cell)
