In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, I/O
from matplotlib import rcParams
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from wordcloud import WordCloud, STOPWORDS
# import nltk
import os
from PIL import Image
import math
import requests
import redis
import sys
import panel as pn
from panel.interact import interact
pn.extension('tabulator', sizing_mode="stretch_width")
import hvplot.pandas
import holoviews as hv
hv.extension('bokeh')

In [None]:
from bokeh.resources import INLINE
import bokeh.io

bokeh.io.output_notebook(INLINE)

In [None]:
path = './data'
df = pd.read_csv(path+'/songs_df_final.csv')

## Changes of music features over time

In [None]:
idf = df.interactive()
year_slider = pn.widgets.IntSlider(name='Year', start=1970,end=2020,step=5,value=2000)

In [None]:
genre_options = pn.widgets.RadioButtonGroup(
    name='genre options',
    options=['danceability','energy','loudness','speechiness','acousticness','instrumentalness', 'liveness', 'tempo', 'duration_s','valence', 'Happy','Sad','Fear','Surprise','Angry'],
    button_type='success'
)
genre = ["['pop']", "[rock']", "['dance/electronic']", "['rap']", "['hip hop']", "['soul']", "['reggae']", "['others']"]

feature_pipeline = (
    idf[
        (idf.year <= year_slider) & 
        (idf.genres.isin(genre))
    ]
    .groupby(['genres','year'])[genre_options].mean()
    .to_frame()
    .sort_values(by='year')
    .reset_index()
)
feature_plot = feature_pipeline.hvplot(x='year',by='genres',y=genre_options,line_width=2, title='Feature Characteristics by Genre')

In [None]:
def get_average_over_year(feature, year):
    try:
        data = (df[df['year']<=year].groupby(['year']).agg({str(feature):'mean'}))
        title = 'Average %s from 1970 to %d' % (feature, year)
        # row_gap = int(math.ceil((year-1970)/9))
        # line_chart = data.plot(title=chart_title, xticks=range(1970, year+2, row_gap))
        # line_chart = px.line(df, x="year", y=feature, title=chart_title)
        # line_chart = sns.lineplot(data=df, x="year", y=feature)
        line_chart = data.hvplot.line(x=title, y=feature)
        return line_chart
    
    except EOFError:
            return

## Correlation between any 2 features

In [None]:
def visualize_correlation(x,y):
    try:
        title='<b> ' + x + ' Vs ' + y
        scatter_plot = px.scatter(df,x=x,y=y,color=y,template='simple_white',title=title, trendline="ols")
        return scatter_plot
    
    except EOFError:
            return
    
first_feature_options = pn.widgets.RadioButtonGroup(
    name='first feature',
    options=['danceability','energy','loudness','speechiness','acousticness','instrumentalness', 'liveness', 'tempo', 'duration_s','valence', 'Happy','Sad','Fear','Surprise','Angry'],
    button_type='success'
)
second_feature_options = pn.widgets.RadioButtonGroup(
    name='second feature',
    options=['danceability','energy','loudness','speechiness','acousticness','instrumentalness', 'liveness', 'tempo', 'duration_s','valence', 'Happy','Sad','Fear','Surprise','Angry'],
    button_type='success',
)

## Wordcloud for artists

In [None]:
from matplotlib.figure import Figure
plt.rcParams["figure.figsize"] = plt.rcParamsDefault["figure.figsize"]

mask = np.array(Image.open('./images/mask_guitar.png'))

forbidden = ['(',')',"'",',','oh',"'s", 'yo',"'ll", 'el', "'re","'m","oh-oh","'d", "n't", "``", "ooh", "uah", "'em", "'ve", "eh", "pa", "brr", "yeah",
            'Im', 'Ill', 'Id', 'Ive', 'your', 'youre', "you're", 'youll', 'u'] 
stop_words_all = set(list(STOPWORDS) + forbidden)

def artist_wordcloud(artist):
    try:
        lyrics_list = df[df['primary_artist']==artist]['lyrics'].to_list()
        lyrics_words = " ".join(map(str,lyrics_list))
        wordcloud = WordCloud(stopwords=stop_words_all,background_color='white', 
                              max_words=200, mask=mask
                             ).generate(lyrics_words)

        fig = Figure(facecolor="#ffffff")
        ax = fig.add_subplot(111)
        ax.imshow(wordcloud, interpolation='bilinear')
        ax.axis("off")

        wordcloud_fig = pn.pane.Matplotlib(fig,
                                          height=450,
                                          width=800,
                                          tight=True,
                                          dpi=72,
                                          interactive=False
                                          )
        return wordcloud_fig
    
    except EOFError:
            return

artist_select = pn.widgets.Select(options=df['primary_artist'].to_list(), value='John Mayer')

## Genre Classification

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

In [None]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from urllib.parse import urlparse
from tqdm import tqdm_notebook

# Provide valid Spotify client credentials
cid = "2d24641c74f241a39c7897558130398b"
secret = "9d8a94229bcb4787a70ff3ebccaa196f"
client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret)
sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)

In [None]:
classification_df = pd.read_csv(path+'/classification_df_2.csv')
classification_df_reduced = classification_df.drop(
    columns=['nltk', 'Happy', 'Angry', 'Surprise', 'Sad', 'Fear'])

array = classification_df_reduced.values

x = array[:,0:9]
y = array[:,9]
X_train, X_validation, Y_train, Y_validation = train_test_split(x, y, test_size=0.20, random_state=1)
lr = LogisticRegression(solver='liblinear', multi_class='ovr')
lr.fit(X_train, Y_train)

In [None]:
artist_widget = pn.widgets.TextInput(name='Input the artist name', value='')
track_title_widget = pn.widgets.TextInput(name='Input the track title', value='')

rem_list = ['key','mode','type','id','uri','mode','track_href','analysis_url','duration_ms', 'time_signature']

# Create a Redis client
# RC = redis.Redis(host='localhost', port=3107, db=0, charset='utf-8', decode_responses=True)
# RC = redis.from_url(os.environ.get("REDIS_URL"))

def fetch_track_features(artist, track):
    query = "artist:%s track:%s" % (artist, track)
#     cache_key = "cache|" + query
#     cached_response = RC.get(cache_key)

#     if cached_response:
#         return cached_response, artist+' - '+track
    
    try:
        req = sp.search(q=query, type="track", market="US", limit=1)
        res_id = req['tracks']['items'][0]['id']
        res_track = sp.audio_features(res_id)
        track = res_track[0]
        track_details = dict([(key, val) for key, val in track.items() if key not in rem_list])
        track_features = np.array(list(track_details.values()))
        prediction = lr.predict([track_features])

        # RC.set(cache_key, prediction)
        track_name = req['tracks']['items'][0]['name']
        artist_name = req['tracks']['items'][0]['artists'][0]['name']
        return prediction, artist_name+' - '+track_name
    
    except:
        return "No songs found in Spotify. Please check your query params."

## Content-based Recommendation

In [None]:
### Songs
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

title_select = pn.widgets.Select(name='Track Name', options=(df['title'].drop_duplicates().to_list()))

# tfidf = TfidfVectorizer(stop_words=stop_words_all)
tfidf = TfidfVectorizer()

lyrics = df['lyrics'].dropna()
lyrics.reset_index(drop=True, inplace=True)

tfidf_matrix = tfidf.fit_transform(lyrics)

cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

indices = pd.Series(lyrics.index, index=df.dropna()['title']).drop_duplicates()

def get_song_recommendations(title, cosine_sim=cosine_sim):
    try:
        idx = indices[title]

        sim_scores = list(enumerate(cosine_sim[idx]))

        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

        sim_scores = sim_scores[1:11]

        songs_indices = [i[0] for i in sim_scores]

        results = df.iloc[songs_indices]
        results.set_index('title', inplace=True)

        return results[['lyrics']]
    
    except EOFError:
            return

In [None]:
### Keywords
keywords_widget = pn.widgets.TextInput(name='Input some key words', value='')

def get_lyrics_recommendations(keywords):
    try:
        lyrics = df['lyrics'].dropna()
        lyrics.reset_index(drop=True, inplace=True)
        lyrics.loc[len(lyrics.index)] = str(keywords)

        tfidf_matrix = tfidf.fit_transform(lyrics)

        cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)    

        sim_scores = list(enumerate(cosine_sim[lyrics.index[-1]]))

        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

        sim_scores = sim_scores[1:11]

        lyrics_indices = [i[0] for i in sim_scores]

        lyrics_arr = lyrics.iloc[lyrics_indices].values

        results = df[df['lyrics'].isin(lyrics_arr)]
        results.set_index('title', inplace=True)

        return results[['lyrics']]
    
    except EOFError:
            return

# **Creating Dashboard**

In [None]:
ACCENT_COLOR = pn.template.FastListTemplate.accent_base_color

template = pn.template.FastListTemplate(
    title="Top Spotify tracks of 1970-2020 Exploration",
    sidebar=[pn.pane.Markdown("## Settings"), 
             year_slider,
             # pn.pane.PNG('https://www.freepnglogos.com/uploads/spotify-logo-png/file-spotify-logo-png-4.png', sizing_mode='scale_both'),
             pn.pane.Markdown("## Content description"),
             pn.pane.Markdown("### 1. Visualize the changes of each feature (on average) over time, grouped by music genres. You can also slide to the right to see its curve regardless of genre."),
             pn.pane.Markdown("### 2. Visualize the correlation between any 2 features with scatter plot."),
             pn.pane.Markdown("### 3. Generate a (kind of blurry) wordcloud of your favorite artist. Words are picked based on their frequency of appearance in the artist's songs."),
             pn.pane.Markdown("### 4. Attempt to predict the genre of a track by its music features, fetched from cache or Spotify API. At the moment, the functionality is limited to classifying into rock, pop, rap and 'others'."),
             pn.pane.Markdown("### 5. Get 10 recommendations, accepting inputs as varied as an existing track or some random keywords."),
             pn.pane.Markdown("### *Since this is a demo, a lot of things do not look and function that great and the layout is not (yet) responsive, so ... sorry.")
            ],
    main=[pn.Row(pn.Column(genre_options,
                           pn.Row( 
                               feature_plot.panel(width=900),
                               pn.bind(get_average_over_year, feature=genre_options, year=year_slider),
                           ),
                          ),
                ), 
          pn.Row(pn.Column(
                 first_feature_options,
                 second_feature_options,
                 pn.bind(visualize_correlation, x=first_feature_options, y=second_feature_options)
          )),
          pn.Row(pn.Column('## Wordcloud', artist_select,
                   pn.bind(artist_wordcloud, artist=artist_select))
                ),
            pn.Row(pn.Column('## Genre Classifier', 
                             artist_widget,
                             track_title_widget,
                             pn.bind(fetch_track_features,
                                     artist=artist_widget,
                                    track=track_title_widget
                                    ),
                )),
          pn.Row(pn.Column('## Song Recommendations', title_select,
                           pn.bind(get_song_recommendations, title=title_select)
                           ),
                  pn.Column('## Lyrics Recommendations', keywords_widget,
                           pn.bind(get_lyrics_recommendations, keywords=keywords_widget)
                  ),
                 ),
        ],
    accent_base_color="#1DB954",
    header_background="#1DB954"
).servable();