In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

### Read CSV

In [None]:
books = pd.read_csv("../input/goodreadsbooks/books.csv", error_bad_lines = False)

## set "bookID" as index
books.set_index("bookID", inplace = True) 

## parse "publication_date"
books["publication_date"] = pd.to_datetime(books["publication_date"], format="%m/%d/%Y", errors='coerce')

print("The dataset has {} rows and {} columns".format(books.shape[0], books.shape[1]))

#### Books with Average Rating of 5

In [None]:
books_rated_5 = books[books["average_rating"] == 5.0].loc[:,["title", "authors", "average_rating", "language_code"]]
print("{} books have 5 as their average ratings".format(books_rated_5.shape[0]))
books_rated_5

#### Books with Average Rating of 1

In [None]:
books_rated_1 = books[books["average_rating"] == 1.0].loc[:,["title", "authors", "average_rating", "language_code"]]
print("{} books have 1 as their average ratings".format(books_rated_1.shape[0]))
books_rated_1

#### Top 10 Books with Highest Rating Count

In [None]:
books_most_rated = books.sort_values("ratings_count", ascending=False)[:10].loc[:,["title", "ratings_count"]]
books_most_rated

plt.figure(figsize=(15,6))
sns.set_theme(style="darkgrid")
a=sns.barplot(x=books_most_rated["ratings_count"], y=books_most_rated["title"], palette="husl")
plt.title("Top 10 Books with Highest Rating Count")
plt.xlabel("Rating Count")
plt.ylabel("Title")
plt.show()

#### Top 10 Books with Highest Reviews Count

In [None]:
books_most_reviewed = books.sort_values("text_reviews_count", ascending=False)[:10]
books_most_reviewed

plt.figure(figsize=(15,6))
sns.set_theme(style="dark")
sns.barplot(x=books_most_reviewed["text_reviews_count"], y=books_most_reviewed["title"], palette="pastel")
plt.title("Top 10 Books with Highest Reviews Count")
plt.xlabel("Reviews Count")
plt.ylabel("Title")
plt.show()

#### Top 10 Authors with Most Books Published

In [None]:
authors_with_most_books = books.groupby("authors").size().sort_values(ascending=False)[:10]

plt.figure(figsize=(15,6))
authors_with_most_books.plot.barh(label="books count", legend=True)