In [1]:
import pandas as pd
import numpy as np

In [2]:
critics_df = pd.read_json("../output/critics.json")
critics_df.head()

Unnamed: 0,album description,album id,album name,album rating,publication name
0,"Direct, honest and powerful, Patch The Sky can...",/music/patch-the-sky/bob-mould,Patch the Sky,80,The Source
1,It's not Puff's best collection by any stretch...,/music/mmm-mixtape/puff-daddy,MMM [Mixtape],60,The Source
2,"Save a few moments of ""over-rapping,"" his pen ...",/music/the-incredible-true-story/logic,The Incredible True Story,80,The Source
3,Free TC moves way less outside contribution th...,/music/free-tc/ty-dolla-ign,Free TC,60,The Source
4,"T5DOA is a fine body of work, but falls short ...",/music/top-5-dead-or-alive/jadakiss,Top 5 Dead or Alive,70,The Source


In [3]:
critics_df.describe()

Unnamed: 0,album rating
count,187304.0
mean,70.914337
std,14.12342
min,0.0
25%,60.0
50%,70.0
75%,80.0
max,100.0


In [3]:
import plotly
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

plotly.offline.init_notebook_mode(connected=True)

In [4]:
data1 = go.Histogram(x = critics_df['album rating'],)
layout1 = go.Layout(
    title= 'Histogram of Album Scores',
    #hovermode= 'closest',
    xaxis = dict(
        title= 'Score',
    ),
    yaxis = dict(
        title = 'Number of Reviews'
    )
)

fig1 = dict(data=[data1], layout=layout1)
iplot(fig1, filename='basic histogram')

In [5]:
grouped = critics_df.groupby('publication name')
num_ratings = grouped['album rating'].count()
avg_ratings = grouped['album rating'].mean()

grouped_df = pd.DataFrame()
grouped_df['num_ratings'] = num_ratings
grouped_df['avg_ratings'] = avg_ratings
groups = list(grouped.groups)

In [6]:
data2 = go.Histogram(x=grouped_df['num_ratings'])
layout2 = go.Layout(
    title= 'Number of Reviews Per Publication',
    hovermode= 'closest',
    xaxis = dict(
        title= 'Number of Reviews',
    ),
    yaxis = dict(
        title = 'Number of Publications'
    )
)

fig2 = dict(data=[data2], layout=layout2)
iplot(fig2, filename='basic histogram')

In [7]:
data3 = go.Box(
    y=grouped_df['num_ratings'],
    name='Number of Reviews',
    boxmean=True,
    jitter = 0.3,
    boxpoints = 'outliers',
)

layout3 = go.Layout(
    title= 'Number of Reviews Per Publication',
    hovermode= 'closest',
    yaxis = dict(
        title = 'Number of Reviews'
    )
)
fig3 = dict(data=[data3])
iplot(fig3, filename='basic histogram')

q75, q25 = np.percentile(grouped_df['num_ratings'], [75 ,25])
iqr = q75 - q25
upper_fence = 1.5*iqr + q75

In [8]:
num_ratings_truncated = grouped_df[grouped_df['num_ratings'] <= upper_fence]

data2_1 = go.Histogram(x=num_ratings_truncated['num_ratings'])
layout2_1 = go.Layout(
    title= 'Number of Reviews Per Publication (Outliers Omitted, n < ' + str(upper_fence) + ')',
    hovermode= 'closest',
    xaxis = dict(
        title= 'Number of Reviews',
    ),
    yaxis = dict(
        title = 'Number of Publications'
    )
)

fig2_1 = dict(data=[data2_1], layout=layout2_1)
iplot(fig2_1, filename='basic histogram')

In [9]:
data3 = go.Scatter(
    x = grouped_df['num_ratings'],
    y = grouped_df['avg_ratings'].round(1),
    text = groups,
    name = 'Above',
    mode = 'markers',
    marker = dict(
        size = 5,
        color = 'rgba(152, 0, 0, .8)',
        line = dict(
            width = 2,
            color = 'rgb(0, 0, 0)'
        )
    )
)

layout3= go.Layout(
    title= 'Average Rating vs. Number of Reviews By Publication',
    hovermode= 'closest',
    xaxis= dict(
        title= 'Number of Reviews',
        #type = 'log',
        zeroline= False,
        gridwidth= 2,
    ),
    yaxis=dict(
        title= 'Average Rating (100 max)',
        gridwidth= 2,
    ),
    showlegend= False
)

fig3 = dict(data=[data3], layout=layout3)
iplot(fig3, filename='styled-scatter')