In [1]:
# Import pandas library
import pandas as pd

# Read the csv file and store it in a DataFrame object
df = pd.read_csv(r"audible datasets\audible_cleaned.csv")

# Print the first five rows of the DataFrame
print(df.head())
df.info()

                                         name           author  \
0                  Geronimo Stilton #11 & #12  GeronimoStilton   
1                            The Burning Maze      RickRiordan   
2                                The Deep End       JeffKinney   
3                        Daughter of the Deep      RickRiordan   
4  The Lightning Thief: Percy Jackson, Book 1      RickRiordan   

         narrator  time releasedate language  stars  price  ratings  
0      BillLobely   140  2008-04-08  English    5.0  468.0     34.0  
1   RobbieDaymond   788  2018-01-05  English    4.5  820.0     41.0  
2      DanRussell   123  2020-06-11  English    4.5  410.0     38.0  
3  SoneelaNankani   676  2021-05-10  English    4.5  615.0     12.0  
4  JesseBernstein   600  2010-01-13  English    4.5  820.0    181.0  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 87489 entries, 0 to 87488
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       -------------- 

In [14]:
# Define a function to get the best books based on some criteria
def get_best_books(df, min_stars=4.5, min_ratings=100, max_price=1000):
  # Filter the DataFrame by the criteria
  filtered_df = df[(df['stars'] >= min_stars) & (df['ratings'] >= min_ratings) & (df['price'] <= max_price)]
  # Sort the filtered DataFrame by ratings in descending order
  sorted_df = filtered_df.sort_values(by='ratings', ascending=False)
  # Return the sorted DataFrame
  return sorted_df

# Call the function and print the first ten rows of the result
best_books = get_best_books(df)
best_books.head(10)


Unnamed: 0,name,author,narrator,time,releasedate,language,stars,price,ratings
62272,Atomic Habits,JamesClear,JamesClear,335,2018-10-18,English,5.0,820.0,12573.0
35942,Atomic Habits,JamesClear,JamesClear,335,2018-10-18,English,5.0,820.0,12569.0
62292,Ikigai,"HéctorGarcía,FrancescMiralles",NaokoMori,203,2017-09-18,English,4.5,615.0,6981.0
58570,The Psychology of Money,MorganHousel,ChrisHill,348,2020-08-09,English,4.5,668.0,5973.0
57858,The Psychology of Money,MorganHousel,ChrisHill,348,2020-08-09,English,4.5,668.0,5972.0
41082,Sapiens,YuvalNoahHarari,DerekPerkins,918,2015-04-30,English,5.0,957.0,5500.0
57863,Rich Dad Poor Dad,RobertT.Kiyosaki,TimWheeler,369,2012-06-15,English,4.5,837.0,5324.0
26754,Becoming,MichelleObama,MichelleObama,1143,2018-11-13,English,4.5,957.0,2924.0
62289,How to Win Friends and Influence People,DaleCarnegie,ShernazPatel,485,2018-08-11,English,4.5,155.0,2494.0
62290,Can't Hurt Me,DavidGoggins,"DavidGoggins,AdamSkolnick",817,2018-11-28,English,5.0,836.0,2277.0


In [3]:
# Define a function to get the most popular books with the most price
def get_popular_expensive_books(df, min_ratings=100):
  # Filter the DataFrame by the minimum ratings
  filtered_df = df[df['ratings'] >= min_ratings]
  # Sort the filtered DataFrame by price and ratings in descending order
  sorted_df = filtered_df.sort_values(by=['price', 'ratings'], ascending=False)
  # Return the sorted DataFrame
  return sorted_df

# Call the function and print the first ten rows of the result
popular_expensive_books = get_popular_expensive_books(df)
popular_expensive_books.head(10)


Unnamed: 0,name,author,narrator,time,releasedate,language,stars,price,ratings
48143,Sherlock Holmes: The Definitive Collection,"StephenFry-introductions,ArthurConanDoyle",StephenFry,4317,2017-02-27,English,5.0,3037.0,838.0
57861,The Intelligent Investor Rev Ed.,BenjaminGraham,LukeDaniels,1068,2015-07-07,English,4.5,2082.0,302.0
27282,Good to Great,JimCollins,JimCollins,601,2010-07-13,English,4.5,1800.0,157.0
58578,Think and Grow Rich,NapoleonHill,"NapoleonHill,NapoleonHillFoundation",460,2014-10-14,English,4.5,1774.0,128.0
22174,A Promised Land,BarackObama,BarackObama,1750,2020-11-17,English,4.5,1708.0,1636.0
49814,A Fine Balance,RohintonMistry,VikasAdam,1549,2018-06-19,English,4.5,1673.0,110.0
7555,"Harry Potter and the Deathly Hallows, Book 7",J.K.Rowling,StephenFry,1440,2015-11-20,English,5.0,1599.0,1006.0
7557,"Harry Potter and the Goblet of Fire, Book 4",J.K.Rowling,StephenFry,1337,2015-11-20,English,5.0,1599.0,664.0
7552,"Harry Potter and the Order of the Phoenix, Book 5",J.K.Rowling,StephenFry,1818,2015-11-20,English,5.0,1599.0,636.0
7556,"Harry Potter and the Half-Blood Prince, Book 6",J.K.Rowling,StephenFry,1287,2015-11-20,English,5.0,1599.0,572.0


In [10]:
# Define a function to get the books with the highest average score based on stars, price and ratings
def get_highest_score_books(df, min_ratings=100):
  # Filter the DataFrame by the minimum ratings
  filtered_df = df[df['ratings'] >= min_ratings]
  # Normalize the stars, price and ratings columns to have values between 0 and 1
  normalized_df = filtered_df.copy()
  normalized_df['stars'] = (normalized_df['stars'] - normalized_df['stars'].min()) / (normalized_df['stars'].max() - normalized_df['stars'].min())
  normalized_df['price'] = (normalized_df['price'] - normalized_df['price'].min()) / (normalized_df['price'].max() - normalized_df['price'].min())
  normalized_df['ratings'] = (normalized_df['ratings'] - normalized_df['ratings'].min()) / (normalized_df['ratings'].max() - normalized_df['ratings'].min())
  # Define the weights for each column
  weights = {'stars': 0.4, 'price': 0.3, 'ratings': 0.3}
  # Calculate the weighted average score for each row
  normalized_df['score'] = normalized_df['stars'] * weights['stars'] + normalized_df['price'] * weights['price'] + normalized_df['ratings'] * weights['ratings']
  # Sort the DataFrame by score in descending order
  sorted_df = normalized_df.sort_values(by='score', ascending=False)
  # Return the sorted DataFrame with the original values of stars, price and ratings
  return filtered_df.loc[sorted_df.index]

# Call the function and print the first ten rows of the result
highest_score_books = get_highest_score_books(df)
highest_score_books.head(10)


Unnamed: 0,name,author,narrator,time,releasedate,language,stars,price,ratings
62272,Atomic Habits,JamesClear,JamesClear,335,2018-10-18,English,5.0,820.0,12573.0
35942,Atomic Habits,JamesClear,JamesClear,335,2018-10-18,English,5.0,820.0,12569.0
48143,Sherlock Holmes: The Definitive Collection,"StephenFry-introductions,ArthurConanDoyle",StephenFry,4317,2017-02-27,English,5.0,3037.0,838.0
41082,Sapiens,YuvalNoahHarari,DerekPerkins,918,2015-04-30,English,5.0,957.0,5500.0
7555,"Harry Potter and the Deathly Hallows, Book 7",J.K.Rowling,StephenFry,1440,2015-11-20,English,5.0,1599.0,1006.0
7557,"Harry Potter and the Goblet of Fire, Book 4",J.K.Rowling,StephenFry,1337,2015-11-20,English,5.0,1599.0,664.0
7552,"Harry Potter and the Order of the Phoenix, Book 5",J.K.Rowling,StephenFry,1818,2015-11-20,English,5.0,1599.0,636.0
7556,"Harry Potter and the Half-Blood Prince, Book 6",J.K.Rowling,StephenFry,1287,2015-11-20,English,5.0,1599.0,572.0
60802,The Da Vinci Code,DanBrown,JeffHarding,999,2005-05-12,English,5.0,1435.0,240.0
28425,The Psychology of Selling,BrianTracy,BrianTracy,319,2014-10-14,English,5.0,1439.0,212.0
