In [1]:
# import libraries
import pandas as pd
import numpy as np

In [2]:
df=pd.read_csv('SpotifyAudioFeatures2019.csv')

In [3]:
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure
from bokeh.io import output_notebook, show
from bokeh.layouts import gridplot

In [4]:
from bokeh.transform import linear_cmap
from bokeh.util.hex import hexbin

y = df['popularity']/df['popularity'].max()

def make2dplot(title,x,y):
    bins = hexbin(x, y, 0.06)
    p = figure(title=title, tools="wheel_zoom,pan,reset",
           match_aspect=True, background_fill_color='#440154')
    p.grid.visible = False
    p.hex_tile(q="q", r="r", size=0.1, line_color=None, source=bins,
           fill_color=linear_cmap('counts', 'Viridis256', 0, max(bins.counts)))
    return p

p1 = make2dplot('Danceability', df['danceability']/df['danceability'].max(), y)
p2 = make2dplot('Acousticness', df['acousticness']/df['acousticness'].max(), y)
p3 = make2dplot('Duration', df['duration_ms']/df['duration_ms'].max(), y)
p4 = make2dplot('Energy', df['energy']/df['energy'].max(), y)
p5 = make2dplot('Instrumentalness', df['instrumentalness']/df['instrumentalness'].max(), y)
p6 = make2dplot('Key', df['key']/df['key'].max(), y)
p7 = make2dplot('Valence', df['valence']/df['valence'].max(), y)
p8 = make2dplot('Liveness', df['liveness']/df['liveness'].max(), y)
p9 = make2dplot('Loudness', df['loudness']/df['loudness'].max(), y)
p10 = make2dplot('Speechiness', df['speechiness']/df['speechiness'].max(), y)
p11 = make2dplot('Tempo', df['tempo']/df['tempo'].max(), y)


output_notebook()

show(gridplot([p1,p2,p3,p4,p5,p6,p7,p8,p9,p10,p11], ncols=4, color='red', plot_width=200, plot_height=200, toolbar_location=None))

In [5]:
def make_plot(title, hist, edges, x):
    p = figure(title=title, tools='', background_fill_color="#fafafa")
    p.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:],
           fill_color="navy", line_color="white", alpha=0.5)
    p.y_range.start = 0
    p.legend.location = "center"
    p.legend.background_fill_color = "#fefefe"
    p.xaxis.axis_label = 'x'
    p.yaxis.axis_label = ''
    p.grid.grid_line_color="white"
    return p

In [6]:
import numpy as np

df1=df[df['popularity']<40]

hist1, edges1 = np.histogram(df1['danceability'], bins=20)
hist2, edges2 = np.histogram(df1['valence'], bins=20)
hist3, edges3 = np.histogram(df1['acousticness'], bins=20)
hist4, edges4 = np.histogram(df1['duration_ms'], bins=20)
hist5, edges5 = np.histogram(df1['energy'], bins=20)
hist6, edges6 = np.histogram(df1['instrumentalness'], bins=20)
hist7, edges7 = np.histogram(df1['key'], bins=12)
hist9, edges9 = np.histogram(df1['liveness'], bins=20)
hist10, edges10 = np.histogram(df1['loudness'], bins=20)
hist11, edges11 = np.histogram(df1['speechiness'], bins=20)
hist12, edges12 = np.histogram(df1['tempo'], bins=20)


x = df.index

p1 = make_plot("Danceability Distribution", hist1, edges1, x)
p2 = make_plot("Valence Distribution", hist2, edges2, x)
p3 = make_plot("Acousticness Distribution", hist3, edges3, x)
p4 = make_plot("Duration Distribution", hist4, edges4, x)
p5 = make_plot("Energy Distribution", hist5, edges5, x)
p6 = make_plot("Instrumental Distribution", hist6, edges6, x)
p7 = make_plot("Key Distribution", hist7, edges7, x)
p9 = make_plot("Liveness Distribution", hist9, edges9, x)
p10 = make_plot("Loudness Distribution", hist10, edges10, x)
p11 = make_plot("Speechiness Distribution", hist11, edges11, x)
p12 = make_plot("Tempo Distribution", hist12, edges12, x)


output_notebook()

show(gridplot([p1,p2,p3,p4,p5,p6,p7,p9,p10,p11,p12], ncols=4, plot_width=200, plot_height=200, toolbar_location=None))

In [7]:
df2=df[df['popularity']>=80]

hist1, edges1 = np.histogram(df2['danceability'], bins=20)
hist2, edges2 = np.histogram(df2['valence'], bins=20)
hist3, edges3 = np.histogram(df2['acousticness'], bins=20)
hist4, edges4 = np.histogram(df2['duration_ms'], bins=20)
hist5, edges5 = np.histogram(df2['energy'], bins=20)
hist6, edges6 = np.histogram(df2['instrumentalness'], bins=20)
hist7, edges7 = np.histogram(df2['key'], bins=12)
hist9, edges9 = np.histogram(df2['liveness'], bins=20)
hist10, edges10 = np.histogram(df2['loudness'], bins=20)
hist11, edges11 = np.histogram(df2['speechiness'], bins=20)
hist12, edges12 = np.histogram(df2['tempo'], bins=20)


x = df.index

p1 = make_plot("Danceability Distribution", hist1, edges1, x)
p2 = make_plot("Valence Distribution", hist2, edges2, x)
p3 = make_plot("Acousticness Distribution", hist3, edges3, x)
p4 = make_plot("Duration Distribution", hist4, edges4, x)
p5 = make_plot("Energy Distribution", hist5, edges5, x)
p6 = make_plot("Instrumental Distribution", hist6, edges6, x)
p7 = make_plot("Key Distribution", hist7, edges7, x)
p9 = make_plot("Liveness Distribution", hist9, edges9, x)
p10 = make_plot("Loudness Distribution", hist10, edges10, x)
p11 = make_plot("Speechiness Distribution", hist11, edges11, x)
p12 = make_plot("Tempo Distribution", hist12, edges12, x)


output_notebook()

show(gridplot([p1,p2,p3,p4,p5,p6,p7,p9,p10,p11,p12], ncols=4, color='red', plot_width=200, plot_height=200, toolbar_location=None))