



*   Which author's books receive the highest average rating?
*   Which author has written the most bestsellers?
*   Which genres become bestsellers more often?
*   Which book has the most reviews?





In [None]:
import matplotlib.pyplot as plt
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.figure_factory as ff
import pandas as pd
import numpy as np
import seaborn as sns

In [None]:
df = pd.read_csv('amazon_books.csv')
df.head()

Unnamed: 0,Name,Author,User Rating,Reviews,Price,Year,Genre
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,8,2016,Non Fiction
1,11/22/63: A Novel,Stephen King,4.6,2052,22,2011,Fiction
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,18979,15,2018,Non Fiction
3,1984 (Signet Classics),George Orwell,4.7,21424,6,2017,Fiction
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8,7665,12,2019,Non Fiction


In [None]:
from flask import Flask, jsonify
group = df.groupby('Genre')[['User Rating']].mean().rename(columns={'User Rating': 'Average rating'})
group.loc['Fiction', :].to_json()

'{"Average rating":4.6483333333}'

In [None]:
group.to_json()

'{"User Rating":{"Abraham Verghese":4.6,"Adam Gasiewski":4.4,"Adam Mansbach":4.8,"Adir Levy":4.8,"Admiral William H. McRaven":4.7,"Adult Coloring Book Designs":4.5,"Alan Moore":4.8,"Alex Michaelides":4.5,"Alice Schertle":4.9,"Allie Brosh":4.7,"American Psychiatric Association":4.5,"American Psychological Association":4.5,"Amor Towles":4.7,"Amy Ramos":4.3,"Amy Shields":4.8,"Andy Weir":4.7,"Angie Grace":4.6,"Angie Thomas":4.8,"Ann Voskamp":4.6,"Ann Whitford Paul":4.8,"Anthony Bourdain":4.8,"Anthony Doerr":4.6,"Atul Gawande":4.8,"Audrey Niffenegger":4.4,"B. J. Novak":4.8,"Bessel van der Kolk M.D.":4.8,"Bill Martin Jr.":4.9,"Bill O\'Reilly":4.6428571429,"Bill Simmons":4.7,"Blue Star Coloring":4.5,"Bob Woodward":4.4,"Brandon Stanton":4.8333333333,"Brene\\u0301 Brown":4.8,"Brian Kilmeade":4.6,"Bruce Springsteen":4.7,"Carol S. Dweck":4.6,"Celeste Ng":4.5,"Charlaine Harris":4.45,"Charles Duhigg":4.6,"Charles Krauthammer":4.7,"Cheryl Strayed":4.4,"Chip Gaines":4.9,"Chip Heath":4.6,"Chris Cleave

In [None]:
# authors with the highest average rating
top_13_autors = df.groupby('Author')[['User Rating']].mean()\
                                                              .sort_values('User Rating', ascending=False)\
                                                              .head(13)\
                                                              .reset_index()
# authors who have written the most bestsellers
number_of_books_written = df.groupby('Author')[['Name']].count()\
                                                                 .sort_values('Name', ascending=False)\
                                                                 .head(10)\
                                                                 .reset_index()

# books with the number of reviews
books_with_the_number_of_reviews = df.groupby('Name')[['Reviews']].sum()\
                                                                           .sort_values('Reviews', ascending=False)\
                                                                           .head(5)\
                                                                           .reset_index()

# the number of books by genre
number_of_books_by_genre = df.groupby('Genre')[['Name']].count()\
                                                                 .sort_values('Name', ascending=False)\
                                                                 .head(10)\
                                                                 .reset_index()

In [None]:
fig_1 = make_subplots(rows=1, cols=1, subplot_titles=("")) # Let's set the number of rows and columns


# Setting table parameters
fig_1.add_trace(go.Table(columnorder = [1,2],
                         columnwidth = [100,50],
                         header=dict(values=top_13_autors.columns,
                                     line_color='darkslategray',
                                     fill_color='Salmon',
                                     height=30),
                         cells=dict(values=[top_13_autors['Author'],
                                    top_13_autors['User Rating']],
                                    line_color='darkslategray',
                                    fill_color='White')))

# Setting the parameters of the chart when displaying
fig_1.update_layout(showlegend=False,
                    title_text='Top 13 best authors',
                    title_font_size=16,
                    title_font_family='Arial',
                    title_x=0.5,
                    font=dict(family='Arial',
                              size=12,
                              color='black'))

# Displaying the graph
fig_1.show()

# Let's set the visualization parameters
fig_2 = make_subplots(rows=1, cols=2,
                      specs=[[{'type': 'xy'}, {"type": "xy"}]],
                      subplot_titles=("Top 10 authors by number of bestsellers",
                                      "Top 5 books by number of reviews"))

# Setting Bar parameters
fig_2.add_trace(go.Bar(x=number_of_books_written['Author'],
                       y=number_of_books_written['Name'],
                       name ='Count books',
                       marker_color=['#ff6666','#f76e6e', '#f07575', '#e87d7d', '#e08585',
                                     '#d98c8c', '#d19494', '#c99c9c', '#c2a3a3', '#baabab']),
                       row=1, col=1)

# Setting Bar parameters
fig_2.add_trace(go.Bar(x=books_with_the_number_of_reviews['Name'],
                       y=books_with_the_number_of_reviews['Reviews'],
                       marker_color=['#ff6666','#f47171', '#e87d7d', '#dd8888', '#d19494'],
                       name ='Reviews'),
                       row=1, col=2)

# Setting the parameters of the chart when displaying
fig_2.update_traces(marker_line_color='Gray',
                    marker_line_width=1)

# Setting the parameters of the chart when displaying
fig_2.update_layout(showlegend=False,
                    plot_bgcolor='rgba(0,0,0,0)',
                    font=dict(family='Arial',
                              size=12,
                              color='black'))

# Displaying the graph
fig_2.show()

# Let's set the visualization parameters
fig_3 = make_subplots(rows=1, cols=1,
                      specs=[[{'type': 'domain'}]],
                      subplot_titles=(""))

# Setting Pie parameters
fig_3.add_trace(go.Pie(values=number_of_books_by_genre['Name'],
                       title='Genre',
                       labels=['Non Fiction', 'Fiction'],
                       textinfo='label+percent',
                       insidetextorientation='radial',
                       hole=0.3,
                       marker=dict(colors=['Gray','Salmon'],
                                   line_color='Gray',
                                   line_width=1),
                       textfont={'color': '#FFFFFF', 'size': 12},
                       textfont_size=12))

# Setting the parameters of the chart when displaying
fig_3.update_layout(margin_t=30,
                    showlegend=False,
                    title_text='Count of books genre',
                    title_x=0.5,
                    font=dict(family='Arial',
                              size=12,
                              color='black'))

# Displaying the graph
fig_3.show()


In [None]:
fig_4 = make_subplots(rows=1, cols=1, specs=[[{'type': 'xy'}]])

# Setting Box parameters
fig_4.add_trace(go.Box(x=df['User Rating'],
                       name='User Rating'))

# Setting the parameters of the Box when displaying
fig_4.update_traces(marker_color='Salmon')

# Setting the parameters of the Box when displaying
fig_4.update_layout(showlegend=False,
                    template='simple_white',
                    font=dict(family='Arial',
                              size=12,
                              color='black'))

# Displaying the Box
fig_4.show()

# Let's set the visualization parameters
fig_5 = make_subplots(rows=1, cols=1, specs=[[{'type': 'xy'}]])

# Setting Box parameters
fig_5.add_trace(go.Box(x=df['Reviews'],
                       name='Reviews'))

# Setting the parameters of the Box when displaying
fig_5.update_traces(marker_color='Salmon')

# Setting the parameters of the Box when displaying
fig_5.update_layout(showlegend=False,
                    template='simple_white',
                    font=dict(family='Arial',
                              size=12,
                              color='black'))

# Displaying the Box
fig_5.show()

# Let's set the visualization parameters
fig_6 = make_subplots(rows=1, cols=1, specs=[[{'type': 'xy'}]])

# Setting Box parameters
fig_6.add_trace(go.Box(x=df['Price'],
                       name='Price'))

# Setting the parameters of the Box when displaying
fig_6.update_traces(marker_color='Salmon')

# Setting the parameters of the Box when displaying
fig_6.update_layout(showlegend=False,
                    template='simple_white',
                    font=dict(family='Arial',
                              size=12,
                              color='black'))

# Displaying the Box
fig_6.show()

In [None]:
fig_7 = make_subplots(rows=1, cols=1, specs=[[{'type': 'xy'}]])

# Setting Scatter parameters
fig_7.add_trace(go.Scattergl(x=df['User Rating'],
                             y=df['Reviews'],
                             mode='markers',
                             marker_color='salmon'))

# Setting the parameters of the Scatter when displaying
fig_7.update_layout(template='simple_white',
                    showlegend=False,
                    xaxis = {'title':'User Rating'},
                    yaxis = {'title':'Reviews'},
                    font=dict(family='Arial',
                              size=12,
                              color='black'))

# Displaying the Scatter
fig_7.show()

# Let's set the visualization parameters
fig_8 = make_subplots(rows=1, cols=1, specs=[[{'type': 'xy'}]])

# Setting Scatter parameters
fig_8.add_trace(go.Scattergl(x=df['User Rating'],
                             y=df['Price'],
                             mode='markers',
                             marker_color='salmon'))

# Setting the parameters of the Scatter when displaying
fig_8.update_layout(template='simple_white',
                    showlegend=False,
                    xaxis = {'title':'User Rating'},
                    yaxis = {'title':'Price'},
                    font=dict(family='Arial',
                              size=12,
                              color='black'))

# Displaying the Scatter
fig_8.show()

# Let's set the visualization parameters
fig_9 = make_subplots(rows=1, cols=1, specs=[[{'type': 'xy'}]])

# Setting Scatter parameters
fig_9.add_trace(go.Scattergl(x=df['Price'],
                             y=df['Reviews'],
                             mode='markers',
                             marker_color='salmon'))

# Setting the parameters of the Scatter when displaying
fig_9.update_layout(template='simple_white',
                    showlegend=False,
                    xaxis = {'title':'Price'},
                    yaxis = {'title':'Reviews'},
                    font=dict(family='Arial', size=12, color='black'))

# Displaying the Scatter
fig_9.show()