In [1]:
# import pandas
import pandas as pd

In [2]:
# Optional: use the pd.set_option() to display all rows in a dataframe by default
# pd.set_option('display.max_rows', 600)

# Create a DataFrame

In [3]:
# read the "bestsellers with categories" csv file (Dataset on Amazon's Top 50 bestselling books from 2009 to 2019.)
df_books = pd.read_csv('bestsellers with categories.csv')

In [4]:
# get access to the shape attribute
df_books.shape

(550, 7)

In [5]:
# find the data types of each column
df_books.dtypes

Name            object
Author          object
User Rating    float64
Reviews          int64
Price            int64
Year             int64
Genre           object
dtype: object

# Display a DataFrame

In [6]:
# show first 5 rows in a dataframe
df_books.head()

Unnamed: 0,Name,Author,User Rating,Reviews,Price,Year,Genre
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,8,2016,Non Fiction
1,11/22/63: A Novel,Stephen King,4.6,2052,22,2011,Fiction
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,18979,15,2018,Non Fiction
3,1984 (Signet Classics),George Orwell,4.7,21424,6,2017,Fiction
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8,7665,12,2019,Non Fiction


In [7]:
# describe basic statistics of the dataframe (mean, std, min, max)
df_books.describe()

Unnamed: 0,User Rating,Reviews,Price,Year
count,550.0,550.0,550.0,550.0
mean,4.618364,11953.281818,13.1,2014.0
std,0.22698,11731.132017,10.842262,3.165156
min,3.3,37.0,0.0,2009.0
25%,4.5,4058.0,7.0,2011.0
50%,4.7,8580.0,11.0,2014.0
75%,4.8,17253.25,16.0,2017.0
max,4.9,87841.0,105.0,2019.0


# Add a new Column

In [8]:
# Your task is to create a column named 'Critic Rating' that should have random integer numbers between 1 and 4

# 1. import numpy and create 550 random integer numbers between 1 and 4
import numpy as np
int_rating = np.random.randint(1, 4, 550)

# 2. add new 'Critic Rating' column to dataframe using the random numbers created
df_books['Critic Rating'] = int_rating

# Note the random numbers in this new 'Critic Rating' column will be different between your solution and mine, but we'll focus only on the code, in this section.

In [9]:
# show first 5 rows
df_books.head()

Unnamed: 0,Name,Author,User Rating,Reviews,Price,Year,Genre,Critic Rating
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,8,2016,Non Fiction,2
1,11/22/63: A Novel,Stephen King,4.6,2052,22,2011,Fiction,3
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,18979,15,2018,Non Fiction,2
3,1984 (Signet Classics),George Orwell,4.7,21424,6,2017,Fiction,1
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8,7665,12,2019,Non Fiction,2


# Basic Attributes, Methods and Functions

In [10]:
# get access to the columns attribute
df_books.columns

Index(['Name', 'Author', 'User Rating', 'Reviews', 'Price', 'Year', 'Genre',
       'Critic Rating'],
      dtype='object')

# Selecting Two or More Columns from a Dataframe

In [11]:
# move the new 'Critic Rating' column between the columns "User Rating" and "Reviews" Then update the dataframe

# Tip: Copy and paste the column names obtained with the columns attribute and then rearrange elements using [[]]
df_books = df_books[['Name', 'Author', 'User Rating', 'Critic Rating', 'Reviews', 'Price', 'Year', 'Genre']]

In [12]:
# show first 5 rows
df_books.head()

Unnamed: 0,Name,Author,User Rating,Critic Rating,Reviews,Price,Year,Genre
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,2,17350,8,2016,Non Fiction
1,11/22/63: A Novel,Stephen King,4.6,3,2052,22,2011,Fiction
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,2,18979,15,2018,Non Fiction
3,1984 (Signet Classics),George Orwell,4.7,1,21424,6,2017,Fiction
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8,2,7665,12,2019,Non Fiction


# Operations on Dataframes

In [13]:
# create a column named "Average Rating" by using the following formula: Average Rating = (User Rating + Critic Rating)/2
df_books['Average Rating'] = (df_books['User Rating'] + df_books['Critic Rating'])/2

In [14]:
# use the round function to round the values of the dataframe to 1 decimal and update the dataframe
df_books = round(df_books, 1)

# Value Counts

In [15]:
# count elements in "Genre" column by category and return the relative frequency
df_books['Genre'].value_counts(normalize=True)

Non Fiction    0.563636
Fiction        0.436364
Name: Genre, dtype: float64

# Rename Columns

In [16]:
# rename columns "User Rating," "Critic Rating" and "Average Rating" to "UR," "CR" and "AR" then update the dataframe with the inplace paraneter
df_books.rename(columns={'User Rating':'UR',
                         'Critic Rating':'CR',
                         'Average Rating':'AR'}, inplace=True)

In [17]:
# show first 5 rows
df_books.head()

Unnamed: 0,Name,Author,UR,CR,Reviews,Price,Year,Genre,AR
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,2,17350,8,2016,Non Fiction,3.4
1,11/22/63: A Novel,Stephen King,4.6,3,2052,22,2011,Fiction,3.8
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,2,18979,15,2018,Non Fiction,3.4
3,1984 (Signet Classics),George Orwell,4.7,1,21424,6,2017,Fiction,2.8
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8,2,7665,12,2019,Non Fiction,3.4


In [18]:
# select only "Name", "Author", "UR", "CR", "AR" and "Year" columns and update dataframe
df_books = df_books[["Name", "Author", "UR", "CR", "AR", 'Year']]

# Sort a dataframe

In [19]:
# sort the dataframe descending by "UR" and "CR"
df_books.sort_values(['UR', 'CR'], ascending=False)

Unnamed: 0,Name,Author,UR,CR,AR,Year
84,Dog Man: Brawl of the Wild: From the Creator o...,Dav Pilkey,4.9,3,4.0,2019
86,Dog Man: For Whom the Ball Rolls: From the Cre...,Dav Pilkey,4.9,3,4.0,2019
147,"Goodnight, Goodnight Construction Site (Hardco...",Sherri Duskey Rinker,4.9,3,4.0,2013
151,Hamilton: The Revolution,Lin-Manuel Miranda,4.9,3,4.0,2016
155,Harry Potter and the Goblet of Fire: The Illus...,J. K. Rowling,4.9,3,4.0,2019
...,...,...,...,...,...,...
393,The Goldfinch: A Novel (Pulitzer Prize for Fic...,Donna Tartt,3.9,3,3.4,2014
107,Fifty Shades of Grey: Book One of the Fifty Sh...,E L James,3.8,2,2.9,2013
106,Fifty Shades of Grey: Book One of the Fifty Sh...,E L James,3.8,1,2.4,2012
132,Go Set a Watchman: A Novel,Harper Lee,3.6,1,2.3,2015
