In [None]:
# Import modules 

import pandas as pd
from ast import literal_eval
import os
from functools import reduce
from jupyter_dash import JupyterDash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import plotly.express as px
from dash_table import DataTable

In [2]:
# List the data
for path, _, filename in os.walk("../data"):
    print(filename)

['data_w_genres.csv', 'super_genres.json', 'data_by_artist.csv', 'data_by_genres.csv', 'data_by_year.csv', 'data.csv']


In [3]:
# Load data and peek into it

data = pd.read_csv("../data/data.csv", parse_dates=['year'])
data['artists'] = data.artists.apply(lambda x: literal_eval(x))

# Select import columns

columns = [ 'release_date', 'Unnamed: 0', 'id', 'key', 'mode']
data.drop(columns = columns, inplace=True)

In [4]:
# How trends change over time?

cols = ['acousticness', 'danceability', 'energy', 'instrumentalness', 'liveness', 'speechiness', 'valence', 'year']
trends  = data[cols].resample('A', on="year").mean()

fig_trends = px.line(
    pd.melt(
        trends.reset_index(),
        id_vars=["year"]
    ), 
    x = "year", 
    y="value", 
    color="variable",
    title="How trends change over time?"
)

# Number of explicit songs released per year

cols = ['explicit', "year"]
explicit_trends = data[cols].resample('A', on="year").sum()

fig_explicit = px.line(
    pd.melt(
        explicit_trends.reset_index(),
        id_vars=["year"]
    ), 
    x = "year", 
    y="value",
    color="variable", 
    title="Number of explicit songs per year"
)

# Popularity of explicit music over time

explicit_music = data[data.explicit == 1]
explicit_music_popularity = explicit_music.resample('A', on="year").first().popularity

fig_explicit_popularity = px.line(
    pd.melt(
        explicit_music_popularity.reset_index(),
        id_vars=["year"]
    ), 
    x = "year",
    y="value",
    color="variable",
    title="Popularity of explicit songs over time"
)

fig_explicit_popularity.update_traces(connectgaps=True )

# Top 5 popular songs of Eminem

def check_artist(x, artist='Eminem'):
    for i in x:
        if i == artist:
            return True
    return False


eminem = data[data.artists.apply(check_artist)]
eminem_top_5 = data[data.artists.apply(check_artist)].nlargest(5, 'popularity')[["year", "name", "popularity"]]

In [None]:
# Load data and peek into it

data_w_genres = pd.read_csv("../data/data_w_genres.csv")
columns = ['Unnamed: 0', 'key', 'mode']
data_w_genres.drop(columns = columns, inplace=True)

# Top 5 positive music artists (No vocal)

no_vocal  = data_w_genres[data_w_genres.instrumentalness > 0.5]
top_5_positive_no_vocal = no_vocal.nlargest(5, 'valence')[["artists", "valence", "instrumentalness", "popularity", "genres"]]


# Most popular rapper

def check_for_genre(x, genre='rap'):
    for i in x:
        if genre in i:
            return True
    return False

data_w_genres['genres'] = data_w_genres.genres.apply(lambda x: literal_eval(x))
mask = data_w_genres['genres'].apply(check_for_genre)
rappers = data_w_genres[mask]
top_5_rappers = rappers.nlargest(5, 'popularity')[["artists", "popularity", "genres"]]

In [None]:
# Load data and peek into it

data_by_genres = pd.read_csv("../data/data_by_genres.csv")
columns = ['key', 'mode']
data_by_genres.drop(columns = columns, inplace=True)

# Most Popular Genres 

popular_genre = data_by_genres.nlargest(5, 'popularity')[["genres", "popularity"]]

In [17]:
# Songs

tab1 = [
    html.Div([
        html.Label(
            "Peek Into DataFrame",
            style={"display": "block", "text-align": "center"}
        ),
        DataTable(
            data=data.head().to_dict("records"), 
            columns=[{"name": i, "id": i} for i in data.columns]
        )
    ]),
    html.Div([
        dcc.Graph(figure=fig_trends)
    ]),
    html.Div([
        dcc.Graph(figure=fig_explicit)
    ]),
    html.Div([
        dcc.Graph(figure=fig_explicit_popularity)
    ]),
    html.Div([
        html.Label(
            "Top 5 popular songs of Eminem",
            style={"display": "block", "text-align": "center"}
        ),
        DataTable(
            data=eminem_top_5.head().to_dict("records"),
            columns=[{"name": i, "id": i} for i in eminem_top_5.columns]
        )
    ],
         style={"padding": "20px"}
    )
]

# Artists

tab2 =  html.Div([ 
    html.Div([
        html.Label(
            "Peek Into DataFrame",
             style={"display": "block", "text-align": "center"}
        ),
        DataTable(
            data=data_w_genres.head().to_dict("records"), 
            columns=[{"name": i, "id": i} for i in data_w_genres.columns]
        )
    ],
         style={"padding": "20px"}
    ),
    html.Div([
        html.Label(
            "Top 5 positive music artists (No vocal)",
             style={"display": "block", "text-align": "center"}
        ),
        DataTable(
            data=top_5_positive_no_vocal.head().to_dict("records"),
            columns=[{"name": i, "id": i} for i in top_5_positive_no_vocal.columns]
        )
    ],
         style={"padding": "20px"}
    ),
    html.Div([
        html.Label(
            "Most popular rapper",
             style={"display": "block", "text-align": "center"}
        ),
        DataTable(
            data=top_5_rappers.head().to_dict("records"), 
            columns=[{"name": i, "id": i} for i in top_5_rappers.columns]
        )
    ],
         style={"padding": "20px"}
    )
])

# Genres

tab3 =  html.Div([ 
    html.Div([
        html.Label(
            "Peek Into DataFrame", 
            style={"display": "block", "text-align": "center"}
        ),
        DataTable(
            data=data_by_genres.head().to_dict("records"), 
            columns=[{"name": i, "id": i} for i in data_by_genres.columns]
        )
    ],
        style={"padding": "20px"}
    ),
    html.Div([
        html.Label(
            "Most Popular Genres",
             style={"display": "block", "text-align": "center"}
        ),
        DataTable(
            data=popular_genre.head().to_dict("records"),
            columns=[{"name": i, "id": i} for i in popular_genre.columns]
        )
    ],
        style={"padding": "20px"}
    )
])


In [18]:
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = JupyterDash(external_stylesheets=external_stylesheets)

app.layout =  html.Div([ 
    dcc.Tabs([
        dcc.Tab(
            label="Songs",
            value="songs",
            id="songs",
            children=tab1
        ),
        dcc.Tab(
            label="Artists",
            value="artists",
            id="artists",
            children=tab2 
        ),
        dcc.Tab(
            label="Genres",
            value="genres",
            id="genres",
            children=tab3
        )
    ])
])


app.run_server()

# Acousticness have been decreased and energy have been increased over time.
# Explicit music have been increased.

Dash app running on http://127.0.0.1:8050/
