In [None]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
%matplotlib inline
sns.set_style(style = 'darkgrid')

In [None]:
books = pd.read_csv('../input/amazon-top-50-bestselling-books-2009-2019/bestsellers with categories.csv')

In [None]:
books.head()

In [None]:
books.info()

In [None]:
books.describe()

In [None]:
print('Checking for Null values')

In [None]:
plt.figure(figsize=(10,6))
sns.heatmap(books.isnull(), cbar = False, yticklabels= False, cmap = 'viridis')

In [None]:
plt.figure(figsize=(10,6))
sns.distplot(books['Reviews'], color = 'red', bins = 50)

In [None]:
plt.figure(figsize=(10,6))
sns.distplot(books['User Rating'], color = 'fuchsia', bins = 50)

In [None]:
plt.figure(figsize=(10,6))
sns.distplot(books['Price'], color = 'maroon', bins = 50)

In [None]:
sns.countplot(x = 'Year', data = books, palette = 'hot')

In [None]:
sns.countplot(x = 'Genre', data = books, palette = None)

In [None]:
plt.figure(figsize=(10,6))
sns.countplot(x = 'Year', hue = 'Genre', data = books, palette = 'seismic')

In [None]:
plt.figure(figsize=(10,6))
sns.boxplot(x = 'Year', y = 'Price', data = books, palette = 'prism', hue = 'Genre')

In [None]:
plt.figure(figsize=(10,6))
sns.violinplot(x = 'Year', y = 'Price', data = books, palette = 'flag', hue = 'Genre', split = True)

In [None]:
plt.figure(figsize=(10,6))
sns.jointplot('User Rating', 'Price', data = books, kind = 'kde', colour = 'lime')

In [None]:
plt.figure(figsize=(10,6))
plt.yticks()
sns.heatmap(books.corr(), cmap = 'Spectral', annot = True, yticklabels= False)

In [None]:
sns.pairplot(books, hue = 'Genre', diag_kws={'bw': 0.2})

In [None]:
print('Predicting user rating of a book')

In [None]:
# Getting dummy values for the Genre column
Non_fiction = pd.get_dummies(books['Genre'], drop_first= True)

In [None]:
Non_fiction

In [None]:
books = pd.concat([books,Non_fiction], axis = 1)

In [None]:
books.head()

In [None]:
books.drop(['Name', 'Author', 'Genre'], axis = 1, inplace = True)

In [None]:
books.head()

In [None]:
from sklearn.preprocessing import LabelEncoder
lab_enc = LabelEncoder()

In [None]:
books['Year'] = lab_enc.fit_transform(books['Year'])

In [None]:
books.head()

In [None]:
X = books.drop('Non Fiction', axis = 1)

In [None]:
y = books['Non Fiction']

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=101)

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
rfc = RandomForestClassifier()

In [None]:
rfc.fit(X_train, y_train)

In [None]:
rfcpred = rfc.predict(X_test)

In [None]:
# Checking model accuracy
from sklearn.metrics import confusion_matrix, classification_report

In [None]:
print(confusion_matrix(y_test, rfcpred))
sns.heatmap(confusion_matrix(y_test, rfcpred), annot = True, yticklabels= False, cmap='Blues', cbar = False)

In [None]:
print(classification_report(y_test, rfcpred))