In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [None]:
books = pd.read_csv('BX-CSV-Dump/BX-Books.csv', sep=';', error_bad_lines=False)
users = pd.read_csv('BX-CSV-Dump/BX-Users.csv', sep=';', error_bad_lines=False)
ratings = pd.read_csv('BX-CSV-Dump/BX-Book-Ratings.csv', sep=';', error_bad_lines=False)

In [None]:
books.head()

In [None]:
users.head()

In [None]:
users=users.fillna(0)
users.head()

In [None]:
ratings.head()

In [None]:
ratings['Book-Rating'].describe()

In [None]:
print(books.shape)
print(users.shape)
print(ratings.shape)

In [None]:
plt.rc("font", size = 15)
ratings['Book-Rating'].value_counts(sort = False).plot(kind = 'bar')
plt.title('Rating Distribution\n')
plt.xticks(rotation=0, horizontalalignment="center")
plt.xlabel('Rating')
plt.ylabel('Count')

In [None]:
plt.rc("font", size = 15)
users['Age'].hist(bins=range(10, 110))
plt.title('Age distribution\n')
plt.xlabel('Age')
plt.ylabel('Count')

In [None]:
book_with_rating = pd.merge(ratings, books, on='ISBN')
columns = ['Book-Author', 'Year-Of-Publication', 'Publisher', 'Image-URL-S', 'Image-URL-M', 'Image-URL-L']
book_with_rating = book_with_rating.drop(columns, axis=1)
book_with_rating.head()

In [None]:
book_with_rating = book_with_rating.dropna(axis = 0, subset = ['Book-Title'])

book_rating_count = (book_with_rating.
     groupby(by = ['Book-Title'])['Book-Rating'].
     count().
     reset_index().
     rename(columns = {'Book-Rating': 'Total_Rating_Count'})
     [['Book-Title', 'Total_Rating_Count']]
    )
book_rating_count.head()

In [None]:
rating_with_totalRatingCount = book_with_rating.merge(book_rating_count, left_on = 'Book-Title', right_on = 'Book-Title', how = 'left')
rating_with_totalRatingCount.head()

In [None]:
print(book_rating_count['Total_Rating_Count'].describe())

In [None]:
print(book_rating_count['Total_Rating_Count'].quantile(np.arange(.9, 1, .01)))

In [None]:
popular_books = rating_with_totalRatingCount.query('Total_Rating_Count >= 20')
popular_books.head()

In [None]:
popular_books.shape

In [None]:
combined = popular_books.merge(users, left_on = 'User-ID', right_on = 'User-ID', how = 'left')
combined.head()

In [None]:
germans_between_age=combined.loc[
    (combined['Age'] >= 18) 
    & (combined['Age'] < 40) 
    & (combined['Location'].str.contains("germany"))
]

germans_between_age=germans_between_age.drop('Location', axis = 1)
germans_between_age.head()

In [None]:
germans_between_age.shape