In [378]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn import cluster, datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from matplotlib.lines import Line2D

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

import random
from sklearn.cluster import KMeans

In [379]:
itunes = pd.read_csv("itunes.csv").drop('Unnamed: 0', axis =1)

In [380]:
itunes.columns = ['song', 'artist']

In [381]:
itunes.head()

Unnamed: 0,song,artist
0,About Damn Time,Lizzo
1,This Love,Taylor Swift
2,Hold My Hand,Lady Gaga
3,As It Was,Harry Styles
4,First Class,Jack Harlow


In [382]:
rstone = pd.read_csv("rstone.csv").drop('Unnamed: 0', axis =1)

In [383]:
rstone.columns = ['song', 'artist']

In [384]:
rstone.head()

Unnamed: 0,song,artist
0,Like a Rolling Stone,Bob Dylan
1,(I Can’t Get No) Satisfaction,The Rolling Stones
2,Imagine,John Lennon
3,What’s Going On,Marvin Gaye
4,Respect,Aretha Franklin


In [385]:
hot = pd.concat([itunes, rstone], axis =0).drop_duplicates().reset_index(drop=True)

In [386]:
spot = pd.read_csv("df_song_features.csv").drop('Unnamed: 0', axis =1) #Try my own list before the Kaggle joint

In [387]:
spot.select_dtypes(np.number)

Unnamed: 0,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,9,0.390,0.06920,0,-23.312,1,0.0905,0.4350,0.504000,0.0822,0.0433,76.465,740413,4
1,34,0.283,0.00966,0,-28.107,1,0.0327,0.9930,0.807000,0.0884,0.1080,103.468,259042,5
2,31,0.694,0.76100,5,-4.087,0,0.0372,0.4460,0.000000,0.2890,0.5500,114.947,208467,4
3,34,0.487,0.75000,0,-8.348,1,0.0516,0.0582,0.000008,0.2830,0.8230,127.769,138688,4
4,21,0.646,0.27900,4,-3.575,1,0.0545,0.1490,0.915000,0.0993,0.7150,120.063,147587,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5882,31,0.571,0.75000,7,-5.987,0,0.0307,0.4190,0.000000,0.1040,0.5630,87.982,242867,4
5883,26,0.602,0.79500,2,-4.828,1,0.1110,0.0458,0.000000,0.1660,0.8580,127.993,149381,5
5884,2,0.866,0.69500,8,-7.347,1,0.0357,0.3350,0.884000,0.1260,0.8990,117.031,211160,4
5885,52,0.523,0.39100,4,-11.769,0,0.0300,0.4370,0.492000,0.2050,0.0378,109.952,169038,4


In [388]:
#get songs from Kaggle
#kaggle = pd.read_csv("SpotifyAudioFeaturesApril2019.csv")

In [389]:
#Initialize SpotiPy with user credentials
secrets_file = open("C:\\Users\\franc\\OneDrive\\Área de Trabalho\Data Analytics\\Week 2\\6.05 API Wrappers, Spotipy\\secrets.txt","r")
string = secrets_file.read()
string.split('\n')
secrets_dict={}
for line in string.split('\n'):
    if len(line) > 0:
        secrets_dict[line.split(':')[0]]=line.split(':')[1]
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=secrets_dict['cid'],
                                       client_secret=secrets_dict['csecret']))

### GNOD 1.0

In [390]:
def gnod_1_0(hot):
    import random
    
    #turn dataframes values into lower case:
    hot_lower = hot.copy()
    for column in hot_lower:
        hot_lower[column] = hot_lower[column].str.lower()
    
    #create list of songs and artists in lower case:
    song_list = list(hot_lower['title'])
    singer_list = list(hot_lower['singer'])
    play = True
    
    #program loop for using Gnod:
    while play:
        #Getting an artist or song input in either case format:
        value = input("Please, enter a song or an artist.").lower()
        if value in song_list or value in singer_list:
            print("Listen to", random.choice(hot['title']),", you'll love it!")
            buttom = input("Press Y for more recommendations, press N to quit Gnod.").upper()
            if buttom == 'N':
                play = False
        else:
            print("Sorry, we have no recommendations for you.")
            buttom = input("Press Y to try again or N to quit Gnod.").upper()
            if  buttom == 'N':
                play = False

### GNOD 2.0

In [391]:
#Function that clusters a playlist dataframe 
def cluster(playlist):
    play_num = playlist.select_dtypes(np.number)
    kmeans = KMeans(n_clusters=9, random_state=0)
    kmeans.fit(play_num)
    clusters = kmeans.predict(play_num)
    play_ref = playlist.copy()
    play_ref['clusters'] = clusters
    return kmeans, play_ref;

kmeans, play_ref = cluster(spot)

play_ref

In [392]:
#Function that searches for song and returns features as a dataframe
def get_features(song):
    #Search for song and features in the API
    results = sp.search(q=song, limit=1)
    song_uri = results["tracks"]["items"][0]["uri"]
    popularity = results['tracks']['items'][0]['popularity']
    features = sp.audio_features(song_uri)
    features = pd.DataFrame(features)
    features.insert(0, 'popularity', popularity)
    return features

features = get_features('vampiro')

features

In [393]:
#Function that clusters and recommends a song according to the playlist's clusters
def song_finder(features, playlist):
    features_num = features.select_dtypes(np.number)
    song_cluster = kmeans.predict(features_num)
    find_song = play_ref[play_ref['clusters'].isin([int(song_cluster)])]
    rand_song = random.choice(find_song['song'])
    print("Listen to", rand_song,", you'll love it!")
    del rand_song

In [394]:
def lower(hot):
    hot_lower = hot.copy()
    for column in hot_lower:
        hot_lower[column] = hot_lower[column].str.lower()
    return hot_lower

In [395]:
#Recommend song from hot songs:
def recommend_hot(hot):
    random_song = random.choice(hot['song'])
    artist = hot.loc[hot['song'] == random_song, 'artist'].iloc[0]
    print("Listen to \""+ random_song + "\" by " + artist + ", you'll love it!")

In [396]:
def gnod_2_0(playlist, hot):
    
    hot_lower = lower(hot)
    hot_list = list(hot_lower['song'])
    hot_artist = list(hot_lower['artist'])
    play = True
    
    #program loop for using Gnod:
    while play:
        #Getting an artist or song input in either case format:
        song = input("Please, enter a song or an artist.").lower()
        
        #Search for hot songs:
        if song in hot_list or song in hot_artist:
            recommend_hot(hot)
            buttom = input("Press Y for more recommendations, press N to quit Gnod.").upper()
            if buttom == 'N':
                play = False
        else:
            kmeans, play_ref = cluster(playlist)
            features = get_features(song)
            song_finder(features, play_ref)
            
            buttom = input("Press Y for more recommendations, press N to quit Gnod.").upper()
            if buttom == 'N':
                play = False
            

In [398]:
gnod_2_0(spot, hot)

Please, enter a song or an artist.zombie
Listen to "Whiskey On You" by Nate Smith, you'll love it!
Press Y for more recommendations, press N to quit Gnod.the beatles
Please, enter a song or an artist.garotos
Listen to Río Traicionero/ Pregonero de Campeche , you'll love it!
Press Y for more recommendations, press N to quit Gnod.vampiro
Please, enter a song or an artist.vampiro


KeyError: 485