In [14]:
import pandas as pd

# Load the dataset
ulta_perfume_data = pd.read_csv('ulta_perfume_data.csv')

ulta_perfume_data['Brand'] = ulta_perfume_data['Brand'].str.replace('\n', '')

ulta_perfume_data.head()

Unnamed: 0,Product Name,Brand,Reviews,Price
0,r.e.m. Cherry Eclipse Eau de Parfum,Ariana Grande,4.7 out of 5 stars ; 268 reviews,$60.00 - $70.00
1,Cosmic Kylie Jenner Eau de Parfum,KYLIE JENNER FRAGRANCES,4.3 out of 5 stars ; 3003 reviews,$50.00 - $80.00
2,Donna Born In Roma Eau de Parfum,Valentino,4.8 out of 5 stars ; 6101 reviews,$100.00 - $170.00
3,Eilish Eau de Parfum,Billie Eilish,4.5 out of 5 stars ; 3556 reviews,$68.00 - $78.00
4,Gaultier Divine Eau de Parfum,Jean Paul Gaultier,4.5 out of 5 stars ; 1608 reviews,$92.00 - $164.00


In [15]:
# Split the 'Reviews' column into 'Rating' and 'Review Count'
ulta_perfume_data[['Rating', 'Review Count']] = ulta_perfume_data['Reviews'].str.extract(r'(\d\.\d) out of 5 stars ; (\d+) reviews')

# Convert 'Rating' to float and 'Review Count' to integer
ulta_perfume_data['Rating'] = ulta_perfume_data['Rating'].astype(float)
ulta_perfume_data['Review Count'] = ulta_perfume_data['Review Count'].astype(float)

ulta_perfume_data = ulta_perfume_data.drop(columns=['Reviews'])

# Fill NA values in 'Review Count' with 0 and convert to integer
ulta_perfume_data['Review Count'] = ulta_perfume_data['Review Count'].fillna(0).astype(int)

# Display the first few rows of the updated dataset
ulta_perfume_data.head()

Unnamed: 0,Product Name,Brand,Price,Rating,Review Count
0,r.e.m. Cherry Eclipse Eau de Parfum,Ariana Grande,$60.00 - $70.00,4.7,268
1,Cosmic Kylie Jenner Eau de Parfum,KYLIE JENNER FRAGRANCES,$50.00 - $80.00,4.3,3003
2,Donna Born In Roma Eau de Parfum,Valentino,$100.00 - $170.00,4.8,6101
3,Eilish Eau de Parfum,Billie Eilish,$68.00 - $78.00,4.5,3556
4,Gaultier Divine Eau de Parfum,Jean Paul Gaultier,$92.00 - $164.00,4.5,1608


To ensure fair and reliable ratings, I am using a weighted average adjustment with a smoothing factor (m) of 100. This prevents products with very few reviews from having disproportionately high or low scores!

In [16]:
# Calculate global average rating
global_avg = ulta_perfume_data["Rating"].sum() / len(ulta_perfume_data)

# Smoothing factor
m = 100  # Can be adjusted

# Compute adjusted scores
ulta_perfume_data["Adjusted Score"] = ulta_perfume_data.apply(
    lambda p: round(((p["Rating"] * p["Review Count"] + global_avg * m) / (p["Review Count"] + m)), 2),
    axis=1
)

# Print results
for index, row in ulta_perfume_data.iterrows():
    print(f'{row["Product Name"]}: {row["Adjusted Score"]}')

ulta_perfume_data

r.e.m. Cherry Eclipse Eau de Parfum: 4.52
Cosmic Kylie Jenner Eau de Parfum: 4.29
Donna Born In Roma Eau de Parfum: 4.79
Eilish Eau de Parfum: 4.49
Gaultier Divine Eau de Parfum: 4.47
XO KHLOÉ: 4.44
Good Girl Blush Eau de Parfum: 4.42
Miss Dior Eau de Parfum: 4.59
Burberry Goddess Eau de Parfum: 4.66
Her Eau de Parfum: 4.49
COCO MADEMOISELLE Eau de Parfum Spray: 4.4
Cloud Eau de Parfum: 4.29
Devotion Eau de Parfum: 4.17
Paradoxe Eau de Parfum: 4.67
Her Elixir de Parfum: 4.58
J'adore Eau de Parfum: 4.58
Libre Eau de Parfum: 4.59
Good Girl Eau de Parfum: 4.57
Black Opium Eau de Parfum: 4.69
CHANCE EAU TENDRE Eau de Parfum Spray: 4.31
MOD Vanilla Eau de Parfum: 4.27
Flowerbomb Eau de Parfum: 4.79
Feminine Fragrance Must-Haves: nan
Light Blue Eau de Toilette: 4.57
Donna Born in Roma Coral Fantasy Eau de Parfum: 4.65
Donna Born in Roma Intense Eau de Parfum: 4.66
Donna Born in Roma Green Stravaganza Eau de Parfum: 4.67
Guilty Love Edition Eau de Parfum for Women: 4.5
CHANCE EAU TENDRE Eau d

Unnamed: 0,Product Name,Brand,Price,Rating,Review Count,Adjusted Score
0,r.e.m. Cherry Eclipse Eau de Parfum,Ariana Grande,$60.00 - $70.00,4.7,268,4.52
1,Cosmic Kylie Jenner Eau de Parfum,KYLIE JENNER FRAGRANCES,$50.00 - $80.00,4.3,3003,4.29
2,Donna Born In Roma Eau de Parfum,Valentino,$100.00 - $170.00,4.8,6101,4.79
3,Eilish Eau de Parfum,Billie Eilish,$68.00 - $78.00,4.5,3556,4.49
4,Gaultier Divine Eau de Parfum,Jean Paul Gaultier,$92.00 - $164.00,4.5,1608,4.47
5,XO KHLOÉ,KHLOÉ KARDASHIAN,$78.00,4.5,673,4.44
6,Good Girl Blush Eau de Parfum,Carolina Herrera,$98.00 - $170.00,4.5,458,4.42
7,Miss Dior Eau de Parfum,Dior,$108.00 - $215.00,4.6,9730,4.59
8,Burberry Goddess Eau de Parfum,Burberry,$105.00 - $168.00,4.7,1504,4.66
9,Her Eau de Parfum,Burberry,$105.00 - $204.00,4.5,5098,4.49


In [17]:
#Price format is super wonky. Need to fix this.
def clean_price(price):
    if '\n' in price:
        parts = price.split('\n')
        
        # If the first part has "sale price" or similar, we should take the second part
        if 'sale price' in parts[0].lower():
            return parts[1].strip()  # Get the price/range from the second part
        # If it's a price range, we want to return the second part (price range)
        elif '-' in parts[1]:  
            return parts[1].strip()  # price range
        # Otherwise, take the first part (before the first \n)
        else:
            return parts[0].strip()  # just the price
    return price.strip()  # If no \n, return as it is

# Apply the function to the 'Price' column
ulta_perfume_data['Price'] = ulta_perfume_data['Price'].apply(clean_price)

# Print the cleaned 'Price' column
print(ulta_perfume_data['Price'])

0        $60.00 - $70.00
1        $50.00 - $80.00
2      $100.00 - $170.00
3        $68.00 - $78.00
4       $92.00 - $164.00
5                 $78.00
6       $98.00 - $170.00
7      $108.00 - $215.00
8      $105.00 - $168.00
9      $105.00 - $204.00
10     $143.00 - $176.00
11       $48.00 - $70.00
12     $101.00 - $170.00
13     $100.00 - $170.00
14     $112.00 - $180.00
15     $108.00 - $215.00
16     $100.00 - $210.00
17      $88.00 - $180.00
18      $98.00 - $205.00
19     $143.00 - $176.00
20       $48.00 - $70.00
21      $97.00 - $235.00
22                $35.00
23      $80.00 - $175.00
24     $100.00 - $170.00
25     $105.00 - $175.00
26     $100.00 - $170.00
27     $138.00 - $168.00
28     $116.00 - $150.00
29      $98.00 - $165.00
30      $93.00 - $178.00
31     $115.00 - $215.00
32      $70.00 - $130.00
33     $105.00 - $168.00
34                $39.00
35                $50.00
36      $98.00 - $170.00
37     $108.00 - $175.00
38      $86.00 - $155.00
39      $76.00 - $186.00


In [18]:
#The data is set and ready to be visualized! :)
ulta_perfume_data

Unnamed: 0,Product Name,Brand,Price,Rating,Review Count,Adjusted Score
0,r.e.m. Cherry Eclipse Eau de Parfum,Ariana Grande,$60.00 - $70.00,4.7,268,4.52
1,Cosmic Kylie Jenner Eau de Parfum,KYLIE JENNER FRAGRANCES,$50.00 - $80.00,4.3,3003,4.29
2,Donna Born In Roma Eau de Parfum,Valentino,$100.00 - $170.00,4.8,6101,4.79
3,Eilish Eau de Parfum,Billie Eilish,$68.00 - $78.00,4.5,3556,4.49
4,Gaultier Divine Eau de Parfum,Jean Paul Gaultier,$92.00 - $164.00,4.5,1608,4.47
5,XO KHLOÉ,KHLOÉ KARDASHIAN,$78.00,4.5,673,4.44
6,Good Girl Blush Eau de Parfum,Carolina Herrera,$98.00 - $170.00,4.5,458,4.42
7,Miss Dior Eau de Parfum,Dior,$108.00 - $215.00,4.6,9730,4.59
8,Burberry Goddess Eau de Parfum,Burberry,$105.00 - $168.00,4.7,1504,4.66
9,Her Eau de Parfum,Burberry,$105.00 - $204.00,4.5,5098,4.49
