In [1]:
import numpy as np
import pandas as pd
import spotipy
import os
from spotipy.oauth2 import SpotifyClientCredentials
from collections import defaultdict
import spotify_api as cred
from scipy.spatial.distance import cdist
from sklearn.metrics import euclidean_distances
import difflib
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline

number_cols = ['valence', 'year', 'acousticness', 'danceability', 'duration_ms', 'energy', 'explicit',
 'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'popularity', 'speechiness', 'tempo']
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=cred.CLIENT_ID, client_secret=cred.CLIENT_SECRET))
df_spotify = pd.read_csv('./data/data_o.csv')

In [9]:
cluster_pipeline_song = Pipeline([('scaler', StandardScaler()), ('kmeans', KMeans(n_clusters=20))])
X = df_spotify.select_dtypes(np.number)
df_spotify['cluster'] = cluster_pipeline_song.fit_predict(X)

In [12]:
df_spotify.to_csv('spotify_data.csv')

In [10]:
# https://www.kaggle.com/artempozdniakov/spotify-data-eda-and-music-recommendation
def search_song(name, year):
    song_data = defaultdict()
    results = sp.search(q='track: {} year: {}'.format(name, year), limit=1)
    if results['tracks']['items'] == []:
        return None
    
    results = results['tracks']['items'][0]

    audio_features = sp.audio_features(results['id'])[0]
    
    song_data['name'] = [name]
    song_data['year'] = [year]
    song_data['explicit'] = [int(results['explicit'])]
    song_data['duration_ms'] = [results['duration_ms']]
    song_data['popularity'] = [results['popularity']]
    
    for k,v in audio_features.items():
        song_data[k] = v
    
    return pd.DataFrame(song_data)

def get_mean_vector(song_list, spotify_data):
    song_vectors = []
    for song in song_list:
        song_data = None
        try:
            song_data = spotify_data[(spotify_data['name'].str.lower() == song['name'].lower()) &
                                     (spotify_data['year'] == song['year'])].iloc[0]
        except IndexError:
            song_data = search_song(song['name'], song['year'])
        if song_data is None:
            print('Song not found')
            continue
        song_vectors.append(song_data[number_cols].values)

    return np.mean(np.array(song_vectors,dtype="object"), axis=0)

def combined_data_dict(dict_list):
    combined_data = {}
    for dictionary in dict_list:
        for key, value in dictionary.items():
            if key in combined_data.keys():
                combined_data[key].append(value)
            else:
                combined_data[key] = [value]
    return combined_data

def recommend_songs(song_list, spotify_data, n_songs=10):
    metadata_cols = ['name', 'year', 'artists']
    song_dict = combined_data_dict(song_list)
    
    song_center = get_mean_vector(song_list, spotify_data)
    scaler = StandardScaler().fit(spotify_data[number_cols])
    scaled_data = scaler.transform(spotify_data[number_cols])
    scaled_song_center = scaler.transform(song_center.reshape(1,-1))
    distances = cdist(scaled_song_center, scaled_data, 'cosine')
    index = list(np.argsort(distances)[:, :n_songs][0])
    rec_songs = spotify_data.iloc[index]
    rec_songs = rec_songs[~rec_songs['name'].isin(song_dict['name'])]
    return rec_songs[metadata_cols].to_dict(orient='records')


In [11]:
recommend_songs([{'name': 'Come As You Are', 'year':1991},
                {'name': 'Smells Like Teen Spirit', 'year': 1991},
                {'name': 'Lithium', 'year': 1992},
                {'name': 'All Apologies', 'year': 1993},
                {'name': 'Stay Away', 'year': 1993}],  df_spotify)

[{'name': 'Life is a Highway - From "Cars"',
  'year': 2009,
  'artists': "['Rascal Flatts']"},
 {'name': 'Of Wolf And Man', 'year': 1991, 'artists': "['Metallica']"},
 {'name': 'Somebody Like You', 'year': 2002, 'artists': "['Keith Urban']"},
 {'name': 'Kayleigh', 'year': 1992, 'artists': "['Marillion']"},
 {'name': 'Corazón Mágico', 'year': 1995, 'artists': "['Los Fugitivos']"},
 {'name': 'Little Secrets', 'year': 2009, 'artists': "['Passion Pit']"},
 {'name': 'No Excuses', 'year': 1994, 'artists': "['Alice In Chains']"},
 {'name': 'If Today Was Your Last Day',
  'year': 2008,
  'artists': "['Nickelback']"},
 {'name': "Let's Get Rocked", 'year': 1992, 'artists': "['Def Leppard']"},
 {'name': "Things I'll Never Say",
  'year': 2002,
  'artists': "['Avril Lavigne']"}]

In [13]:
import flask

<module 'flask' from 'C:\\Users\\munis\\anaconda3\\lib\\site-packages\\flask\\__init__.py'>

In [None]:
import streamlit as st 
import streamlit.components.v1 as components

def main():
    st.title('Recommendation')

    html_temp2 = """
    <div style ="background-color:royalblue;padding:10px;border-radius:10px">
    <h2 style="color:white;text-align:center;">Spotify songsr </h2>
        <h1 style="color:white;text-align:center;">Recommendation</h1>
    </div>
    """
    components.html(html_temp2)

    components.html("""
                <img src="https://www.tech-recipes.com/wp-content/uploads/2016/02/Spotify.png" width="700" height="150">
                
                """)
    name = st.text_input("Name of the song", "Type Here")
    year = st.text_input("Year", "Type Here")

    result = ''
    result_year = ''
    result_artist = ''

    if st.button("Recommed"):
        input_data = [{'name': name, 'year': int(year)}]
        recommend_songs(input_data, df_spotify)
        st.success('The recommedation song is {}'.format(result))
    

