![](https://images-na.ssl-images-amazon.com/images/G/01/subsamazon/merch/abb/AmazonBookBox_books_page_content_grid_440x344._CB418110359_.jpg)

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_style('darkgrid')

In [None]:
data = pd.read_csv('../input/amazon-top-50-bestselling-books-2009-2019/bestsellers with categories.csv')
data.head()

In [None]:
data.duplicated().sum()

In [None]:
data = data.rename(columns={'User Rating': 'Rating'})

In [None]:
data.isna().sum().to_frame('Null Values')

In [None]:
data.info()

In [None]:
data.nunique().to_frame('Count')

In [None]:
plt.figure(figsize=(15,7))
sns.scatterplot(data.Author, data.Year, hue=data.Genre, s=100, palette='viridis')
plt.yticks(fontsize=12)
plt.xticks(rotation=45)
plt.show()

In [None]:
plt.figure(figsize=(15,5))
plt.title('RATING', fontsize=15)
sns.boxplot(data.Year, data.Rating, hue=data.Genre, palette='cividis')
plt.show()
plt.figure(figsize=(15,5))
plt.title('REVIEWS', fontsize=15)
sns.boxplot(data.Year, data.Reviews, hue=data.Genre, palette='mako')
plt.show()
plt.figure(figsize=(15,5))
plt.title('PRICE', fontsize=15)
sns.boxplot(data.Year, data.Price, hue=data.Genre, palette='summer')
plt.show()

In [None]:
fig, ax = plt.subplots(1,2, figsize=(15,7))
data.Genre.value_counts().plot.pie(shadow=True, radius=0.7, autopct='%1.2f%%',
                       textprops = {"fontsize":15}, explode=(0.03, 0), ax=ax[0])
sns.countplot(y=data.Year, hue=data.Genre, ax=ax[1])
fig.show()
data.Genre.value_counts().to_frame('Count')

### Extracting Necessary Data

In [None]:
data.iloc[531:533].style.set_properties(**{'background-color': '#AFEEEE'})

In [None]:
data.iloc[540:545].style.set_properties(**{'background-color': '#AFEEEE'})

In [None]:
data.iloc[546:].style.set_properties(**{'background-color': '#AFEEEE'})

In [None]:
# Removing the duplicates
new = data.drop('Year', axis=1).drop_duplicates()
new.head()

In [None]:
new.Name.value_counts()

In [None]:
new[new.Name == 'The Help'].style.set_properties(**{'background-color': '#AFEEEE'})                                                  

In [None]:
# New DF by extracting unique entries
new = new.groupby(['Name', 'Author']).max().reset_index()
new.Author = new.Author.replace(['J. K. Rowling'], 'J.K. Rowling')
new.Author = new.Author.replace(['George R. R. Martin'], 'George R.R. Martin')
new.head()

In [None]:
new.shape

In [None]:
new.info()

In [None]:
new.describe()

In [None]:
plt.figure(figsize=(4,4))
plt.title('Correlation', fontsize=15)
sns.heatmap(new.corr(), annot=True, cmap='crest', annot_kws={'size':18}, cbar=False)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.show()

In [None]:
sns.pairplot(data=new, height=2.5)
plt.show()

In [None]:
col = ['Rating','Reviews','Price']
fig, ax = plt.subplots(1,3, figsize=(15,2))
for i in range(0,3):
    ax[i].set_title(col[i], fontsize=15)
    sns.kdeplot(new[col[i]], ax=ax[i], color='#800000')
fig, ax = plt.subplots(1,3, figsize=(15,2))
for i in range(0,3):
    sns.boxplot(new[col[i]], ax=ax[i], color='#778899')
fig.show()
fig, ax = plt.subplots(1,3, figsize=(15,2))
for i in range(0,3):
    sns.swarmplot(new[col[i]], ax=ax[i], s=7, color='#D2691E')
fig.show()
fig, ax = plt.subplots(1,3, figsize=(15,2))
for i in range(0,3):
    sns.violinplot(new[col[i]], ax=ax[i], color='#808000')
fig.show()

In [None]:
fig, ax = plt.subplots(1,2, figsize=(15,5))
new.Genre.value_counts().plot.pie(shadow=True, autopct='%1.2f%%', cmap='tab20', 
                       textprops = {"fontsize":15}, explode=(0.05, 0), ax=ax[0])
sns.countplot(new.Rating, palette='tab20c', hue=new.Genre, ax=ax[1])
fig.show()
new.Genre.value_counts().to_frame('Count')

In [None]:
fig, ax = plt.subplots(1,2, figsize=(18,7))
ax[0].set_title('Fiction: Rating', fontsize=16)
new[new.Genre == 'Fiction']['Rating'].value_counts().plot.pie(shadow=True, autopct='%1.2f%%', cmap='cividis_r', 
                       textprops = {"fontsize":12}, ax=ax[0])
ax[1].set_title('Non-Fiction: Rating', fontsize=16)
new[new.Genre == 'Non Fiction']['Rating'].value_counts().plot.pie(shadow=True, autopct='%1.2f%%', cmap='summer_r', 
                       textprops = {"fontsize":12}, ax=ax[1])
fig.show()

## Reviwes & Rating

In [None]:
fiction = new[(new.Genre=='Fiction')].drop('Genre', axis=1).sort_values('Author')
non_fict = new[(new.Genre=='Non Fiction')].drop('Genre', axis=1).sort_values('Author')
fict_avg = fiction.groupby('Author').sum()
fict_avg['Rating'] = np.round(fiction.groupby('Author')['Rating'].mean(), decimals=1)
nonf_avg = non_fict.groupby('Author').sum()
nonf_avg['Rating'] = np.round(non_fict.groupby('Author')['Rating'].mean(), decimals=1)

In [None]:
fig, ax = plt.subplots(1,2, figsize=(15,30))
ax[0].set_title('Fiction: Reviews & Rating', fontsize=18)
sns.scatterplot(fict_avg.Rating, fict_avg.index, marker='*', s=300, palette='Paired', hue=fict_avg.Rating, ax=ax[0])
for line in range(0,len(fict_avg)):
     ax[0].text(fict_avg.Rating[line]+0.01, fict_avg.index[line], fict_avg.Reviews[line], fontsize=12)
ax[0].xaxis.tick_top()

ax[1].set_title('Non-Fiction: Reviews & Rating', fontsize=18)
ax[1].yaxis.tick_right()
ax[1].xaxis.tick_top()
sns.scatterplot(nonf_avg.Rating, nonf_avg.index, marker='*', s=300, palette='Paired', hue=nonf_avg.Rating, ax=ax[1])
for line in range(0,len(nonf_avg)):
     ax[1].text(nonf_avg.Rating[line]+0.01, nonf_avg.index[line], nonf_avg.Reviews[line], fontsize=12)
fig.show()

In [None]:
# Total Books Published
books = new.Author.value_counts().to_frame('Count')[:15]
plt.figure(figsize=(12,7))
plot = sns.barplot(books.Count, books.index, palette='cividis')
plt.yticks(fontsize=14)
plt.show()

## Jeff Kinney: Author of 'Diary of a Wimpy Kid' Series

In [None]:
jeff = new[new.Author == 'Jeff Kinney'].drop(['Author', 'Price'], axis=1)
jeff.Name = jeff.Name.replace(['Cabin Fever (Diary of a Wimpy Kid, Book 6)'], 'Cabin Fever')
jeff.Name = jeff.Name.replace(['Diary of a Wimpy Kid: Hard Luck, Book 8'], 'Hard Luck')
jeff.Name = jeff.Name.replace(['Diary of a Wimpy Kid: The Last Straw (Book 3)'], 'The Last Straw')
jeff.Name = jeff.Name.replace(['Diary of a Wimpy Kid: The Long Haul'], 'The Long Haul')
jeff.Name = jeff.Name.replace(['Dog Days (Diary of a Wimpy Kid, Book 4) (Volume 4)'], 'Dog Days')
jeff.Name = jeff.Name.replace(['Double Down (Diary of a Wimpy Kid #11)'], 'Double Down')
jeff.Name = jeff.Name.replace(['Old School (Diary of a Wimpy Kid #10)'], 'Old School')
jeff.Name = jeff.Name.replace(['The Meltdown (Diary of a Wimpy Kid Book 13)'], 'The Meltdown')
jeff.Name = jeff.Name.replace(['The Third Wheel (Diary of a Wimpy Kid, Book 7)'], 'The Third Wheel')
jeff.Name = jeff.Name.replace(['The Ugly Truth (Diary of a Wimpy Kid, Book 5)'], 'The Ugly Truth')
jeff.Name = jeff.Name.replace(['Wrecking Ball (Diary of a Wimpy Kid Book 14)'], 'Wrecking Ball')
jeff = jeff.sort_values('Reviews', ascending=False).set_index('Name')

In [None]:
jeff.style.set_properties(**{'background-color': '#EEE8AA'})

In [None]:
print('JEFF KINNEY')
print('Total Books:', len(jeff))
print('Total Reviews:', jeff.Reviews.sum())
print('Average Rating:', np.round(jeff.Rating.sum()/len(jeff), decimals=2))
plt.figure(figsize=(12,5))
plt.title('Jeff Kinney: Diary of a Wimpy Kid (Reviews & Rating)', fontsize=15)
sns.barplot(jeff.Reviews, jeff.index, palette='Paired')
sns.scatterplot(jeff.Reviews+120, jeff.index, marker='*', s=250, color='black')
for line in range(len(jeff)):
     plt.text(jeff.Reviews[line]+170, jeff.index[line], jeff.Rating[line], fontsize=13)
plt.yticks(fontsize=12)
plt.xticks(fontsize=12)
plt.show()
plt.figure(figsize=(8,8))
plt.title('Diary of a Wimpy Kid: Reviews', fontsize=18)
jeff.Reviews.plot.pie(shadow=True, autopct='%1.2f%%', cmap='Paired', textprops = {"fontsize":13})
plt.show()

In [None]:
import wordcloud
from wordcloud import WordCloud, ImageColorGenerator
from PIL import Image

In [None]:
authors = fiction.Author.tolist()
def gen_cloud():
    word_cloud = {}
    for author in authors:
        if author not in word_cloud:
            word_cloud[author] = 0
        word_cloud[author] += 1
        
    image = np.array(Image.open('../input/amazon-logo/amazon.png'))
    cloud = WordCloud(background_color='white', mask=image).generate_from_frequencies(word_cloud)
    colors = ImageColorGenerator(image)
    cloud.recolor(color_func=colors)
    return cloud

In [None]:
plt.figure(figsize=(15,14))
plt.imshow(gen_cloud(), interpolation='bilinear')
plt.axis('off')
plt.show()

## Thank You!