In [None]:
import pandas as pd
import pandas_profiling as pp
from wordcloud import WordCloud, STOPWORDS 
import matplotlib.pyplot as plt
import plotly
import plotly.express as px
import plotly.graph_objects as go
import plotly.offline as pyo
import re
pyo.init_notebook_mode()

In [None]:
data = pd.read_csv('../input/amazon-top-50-bestselling-books-2009-2019/bestsellers with categories.csv')
data[:5]

In [None]:
data['Name'] = data['Name'].str.replace('that','That')

In [None]:
pp.ProfileReport(data)

In [None]:
data.Author.nunique()

In [None]:
data.Name.nunique()

In [None]:
data.Price.replace(0, data.Price.median(), inplace=True)

In [None]:
data_bar = data.Name.value_counts()
data_bar = data_bar[data_bar > 4]

fig = go.Figure(go.Bar(
            x=data_bar,
            y=data_bar.index,
            orientation='h'))
fig.update_layout(yaxis=dict(autorange="reversed"), 
                  margin=dict(l=550),
                  title='Books that appears 5 and more times', hovermode='y')

fig.show()

In [None]:
data_bar = data.Author.value_counts()
data_bar = data_bar[data_bar > 5]

fig = go.Figure(go.Bar(
            x=data_bar,
            y=data_bar.index,
            orientation='h'))
fig.update_layout(yaxis=dict(autorange="reversed"), title='Authors that appears more than 5 times', hovermode='y')

fig.show()

In [None]:
data_scater = data[data.Author.isin(data_bar.index)]
data_scater = data_scater.groupby(['Author','Year'])['Name'].apply('; '.join).reset_index()
data_scater["quantity"] = data_scater["Name"].str.count(';', re.I) + 1
data_scater["quantity"] = [str(i) + " book" if i == 1 else str(i) + " books" for i in data_scater["quantity"]]

In [None]:
fig = px.scatter(data_scater, x="Year", y="Author", color="quantity", hover_data={'Name': True})
fig.update_layout(xaxis = dict(tickmode = 'linear'), title='Top Authors and their Books/years ')

fig.show()

In [None]:
fig = px.histogram(data, x="User Rating", color="Genre", title='Histogram of Users Rating')
fig.update_layout(hovermode='x')
fig.show()

In [None]:
fig = px.violin(data, y="User Rating", color="Genre", title='Violin of Users Rating')
fig.show()

In [None]:
data_bar = data.groupby(['Year', 'Genre'])['Reviews'].agg(Total_revievs='sum').reset_index()
x = data_bar.Year.unique()

fig = go.Figure(data=[
    go.Bar(name='Non Fiction', x=x, y=data_bar[data_bar.Genre == 'Non Fiction']["Total_revievs"]),
    go.Bar(name='Fiction', x=x, y=data_bar[data_bar.Genre == 'Fiction']["Total_revievs"])
])

fig.update_layout(barmode='group', yaxis_title="Total revievs", xaxis = dict(tickmode = 'linear'), hovermode='x',
                 title='Total reviews by year')
fig.show()

In [None]:
fig = px.violin(data, y="Price", color="Genre", title='Violin of Price')
fig.show()

In [None]:
fig = px.scatter(data, x="Price", y="User Rating", color="Genre", hover_data={'Name': True})
fig.update_layout(title='User Raiting/Price')

fig.show()

In [None]:
comment_words = ''
stopwords = set(STOPWORDS)
for val in data['Name']:
    val = str(val)
    tokens = val.split()
    for i in range(len(tokens)):
        tokens[i] = tokens[i].lower()
    comment_words += " ".join(tokens)+" "

wordcloud = WordCloud(width = 700, height = 700,
                background_color ='white',
                stopwords = stopwords,
                min_font_size = 10).generate(comment_words)

plt.figure(figsize = (7, 7), facecolor = None)
plt.imshow(wordcloud)
plt.axis("off")
plt.tight_layout(pad = 0)
plt.show()