In [None]:
import numpy as np
import pandas as pd 
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

- [Importing necessary libraries](#1)
- [Data loading and quick look at the data](#2)
- [Exploratory data analysis](#3)
    - [Frequency(ratio) of Genres](#4)
    - [Distributions of numeric values](#5)
    - [Boxplots of numeric values](#6)
    - [Number of the most rated books by author](#7)
    - [Average prices of books per year](#8)
    - [Number of books written by year](#9)
    - [Number of reviews written by year](#10)

# Importing necessary libraries <a id="1"></a>

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import warnings
warnings.filterwarnings('ignore')

# Data loading and quick look at the data <a id="1"></a>

In [None]:
df = pd.read_csv('/kaggle/input/amazon-top-50-bestselling-books-2009-2019/bestsellers with categories.csv')
df.head()

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
plt.style.use('fivethirtyeight')

# Exploratory data analysis <a id="3"></a>

## Frequency(ratio) of Genres <a id="4"></a>

In [None]:
plt.figure(figsize=(10, 5))
sns.countplot(x=df['Genre'])

## Distributions of numeric values <a id="5"></a>

In [None]:
for col in df.select_dtypes(['float64', 'int64']).columns:
    plt.figure(figsize=(15,5))
    plt.title(col)
    sns.distplot(x=df[col])

## Boxplots of numeric values <a id="6"></a>

In [None]:
for col in df.select_dtypes(['float64', 'int64']).columns:
    plt.figure(figsize=(15,5))
    plt.title(col)
    sns.boxplot(x=df[col])

## Number of the most rated books by author <a id="7"></a>

In [None]:
df_top_rating = df.sort_values(["User Rating","Reviews"],ascending = False).head(100)

In [None]:
plt.figure(figsize=(10, 10))
ax = sns.countplot(y='Author', data=df_top_rating, color='#FF5733')

## Average prices of books per year <a id="8"></a>

In [None]:
plt.figure(figsize=(12,5))
# df_fiction = df.query("Genre == 'Fiction'")
# df_non_fiction = df.query("Genre == 'Non Fiction'")
# sns.lineplot(x='Year', y='Price', data=df_fiction, hue='Price')
# sns.lineplot(x='Year', y='Price', data=df_non_fiction)
year_info = df.groupby(['Year', 'Genre'], as_index=False).agg({'Price':'mean'})
sns.lineplot(x='Year', y='Price', data=year_info, hue='Genre')

## Number of books written by year <a id="9"></a>

In [None]:
for year in sorted(set(df.Year)):
    print(str(year)+': '+str(df.Year[df.Year == 2010].count()))

In [None]:
plt.figure(figsize=(10, 5))
sns.countplot(df['Year'], color='#8FBD14')

## Number of reviews written by year <a id="10"></a>

In [None]:
year_reviews = df.groupby('Year')['Reviews'].sum()
year_reviews

In [None]:
plt.figure(figsize=(12,6))
sns.barplot(x=year_reviews.index, y=year_reviews, color='#1465BD')