## data visualizations with plotly,seaborn,matplotlib

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf

import matplotlib.pyplot as plt
plt.style.use('ggplot')          # python visualization package
plt.rcParams['figure.dpi']=300   # the resolution in dots per inch
import missingno as msno
import seaborn as sns
sns.set_palette('husl')

import plotly.express as px
import plotly.graph_objs as go
import plotly.offline as pof                   #plotting offline
from plotly.subplots import make_subplots




In [None]:
from tensorflow.python.client import device_lib

device_lib.list_local_devices()

### let's view the data 

In [None]:
df=pd.read_csv('../input/amazon-top-50-bestselling-books-2009-2019/bestsellers with categories.csv')
df.head()

In [None]:
df.describe()    #statistics of columns

In [None]:
df.info()

### Visualizations

In [None]:
msno.matrix(df,sort="descending", figsize=(8,4))   # graph plots the non-null values

In [None]:
s=df['Name'][df.duplicated(subset=['Name'])==True].value_counts() # duplicate books present
s

In [None]:
#pie plot of top 10 books repeating with maximum count

list_names=df['Name'][df.duplicated(subset=['Name'])==True].value_counts().sort_values(ascending=False)[:10]

names=list(list_names.index)
list_names_count=list(list_names.values)

plt.pie(list_names_count,labels=names)
# plt.pie(list_names,labels=list_names.index)

In [None]:
df=df.drop_duplicates(subset=['Name'])  ## dropping duplicate books
df.head()

In [None]:
df.info()

In [None]:
s=df['User Rating'].value_counts() #### Ratings Count
s

In [None]:
## ratings

graph1=go.Bar(x=list(s.index),y=list(s.values),marker=dict(color='rgb(255,165,0)',line=dict(color='rgb(255,165,0)',width=1.5)))
layout=go.Layout(template="plotly_dark",title='rating count',xaxis=dict(title='ratings'),yaxis=dict(title='counts'))


fig=go.Figure(data=[graph1],layout=layout)
fig.show()

In [None]:
# px.bar(df['User Rating'].value_counts().reset_index(),x='index',y='User Rating',title='rating count')
##better use graph objects in plotly for manual labelling

In [None]:
# ### top 10 authors with most numbers of books

t=df['Author'].value_counts()[:10]
t.head(10)

In [None]:
def pie_plot():
    graph2=go.Pie(values=t.values,labels=t.index,title='top 10 authors',hoverinfo='percent+value', 
                  textinfo='percent',textposition='inside',hole=0.8,showlegend=True)
    fig=go.Figure(data=[graph2])
    fig.show()

pie_plot()

In [None]:
## books vs reviews
# Top 10 books with reviews

c=df.sort_values(by='Reviews',ascending=False)[:10][['Name','Reviews']]
c.head()

In [None]:
graph3=go.Bar(x=c.Name,y=c.Reviews,marker=dict(color='rgb(255,165,0)',line=dict(color='rgb(255,165,0)',width=1.5)))
layout=go.Layout(title='Top 10 books with reviews',xaxis=dict(title='book name'),yaxis=dict(title='reviews'),width=1000,height=1000)
fig=go.Figure(data=[graph3],layout=layout)
fig.show()


In [None]:
### price visualization
# creating bins of prices
bins=pd.qcut(df.Price,10)
bins

In [None]:
t=df[['Name','Price']]
t['bins']=bins
t.head()

In [None]:
# prices
plt.figure(figsize=(5,5))
plt.hist(t['Price'].values,bins=10)

In [None]:
## Years with number of books published
sns.set(rc={'figure.figsize': (10,8)})
sns.countplot(df.Year)

In [None]:
### Genre vs user Ratings
t=df[['Genre','User Rating']].groupby('Genre')['User Rating'].value_counts()
# fic_counts,non_fic_counts=[t[i] for i in t.index if i[0]=='Fiction'],[t[i] for i in t.index if i[0]=='Non Fiction']
t

In [None]:
## fiction and non-fiction group
f=df[['User Rating']][df['Genre']=='Fiction'].reset_index(drop=True)
nf=df[['User Rating']][df['Genre']=='Non Fiction'].reset_index(drop=True)


In [None]:
## user rating vs fiction visualization
sns.countplot(f['User Rating'])
plt.title('Fiction')
plt.show()

In [None]:
## user rating vs non-fiction visualization

sns.countplot(nf['User Rating'])
plt.title('Non- Fiction')
plt.show()

In [None]:
df.head()

In [None]:
##Total 10 counts of books written by authors

# go.Bar(x=c.Name,y=c.Reviews,marker=dict(color='rgb(255,165,0)',line=dict(color='rgb(255,165,0)',width=1.5)))

z=df[['Author','Name']].groupby('Author')
z.get_group('Jeff Kinney').value_counts()

In [None]:
author_list=list(df['Author'].unique())
books_count_with_author=[(sum(list((z.get_group(x).value_counts().values))),x) for x in author_list]
books_count_with_author=sorted(books_count_with_author, key=lambda x:x[0], reverse = True)
books_count,authors=[x[0] for x in books_count_with_author][:10],[x[1] for x in books_count_with_author][:10]
print(books_count,authors)


graph4=go.Line(x=authors,y=books_count,marker=dict(color="rgb(255,0,123)",line=dict(color="rgb(255,10,145)",width=2.0)))
layout4=go.Layout(template='plotly_dark',xaxis=dict(title='authors'),yaxis=dict(title='book counts'),title='Total no.of books written by each author')
fig=go.Figure([graph4],layout=layout4)
fig.show()


In [None]:
### price vs reviews vs year

fig = make_subplots(rows=1, cols=2, subplot_titles=("Plot 1", "Plot 2"))

fig.add_trace(go.Scatter(x=df['Price'].values,y=df['Reviews'].values,mode='markers',name='price vs reviews'),row=1,col=1)
fig.update_xaxes(title_text='price', row=1, col=1)
fig.update_yaxes(title_text='reviews', row=1, col=1)

fig.add_trace(go.Bar(x=df['Price'].values,y=df['Year'].values,name='price vs years'),row=1,col=2)
fig.update_xaxes(title_text='price', row=1, col=2)
fig.update_yaxes(title_text='year', row=1, col=2)

fig.update_layout(
    title="price vs reviews vs year",
    font=dict(
        family="Courier New, monospace",
        size=18,
        color="RebeccaPurple"
    ),
)

In [None]:
px.scatter(df,x='Price',y='Reviews',color='Year')  ## plotly express api visualization

In [None]:
### price vs reviews vs Genre
px.scatter(df,x='Price',y='Reviews',color='Genre')

In [None]:
df.info()

In [None]:
### Year vs Genre
# matplotlib visualization

list(zip(df['Genre'][df['Year']==2016].value_counts().index,df['Genre'][df['Year']==2016].value_counts().values))

In [None]:
unique_years=list(df['Year'].unique())
X = np.arange(len(df['Year'].unique()))
barWidth=0.25
counts=[]
for year in unique_years:
    counts.append(list(df['Genre'][df['Year']==year].value_counts().values))
counts=np.array(counts)

plt.bar(X,counts[:,0],color ='r',width = barWidth,edgecolor='grey',label='non-fiction')
plt.bar(X+0.25,counts[:,1],color='b',width = barWidth,edgecolor='grey',label='fiction')
plt.xticks([i+0.25 for i in range(len(unique_years))],unique_years)
plt.legend()