In [3]:
import numpy as np
import pandas as pd

In [7]:
def load_data(path):
    data = pd.read_csv(path, sep=';', encoding='ISO-8859-1')
    return data

def data_prep(data):
    # ','를 '.'로 바꾸고 float 형식으로 변환
    data['Rating Value'] = data['Rating Value'].astype(str).str.replace(',', '.').astype(float)
    data['Perfume'] = data['Perfume'].astype(str).str.replace('-', ' ').astype(str)
    data['Brand'] = data['Brand'].astype(str).str.replace('-', ' ').astype(str)
    data['Year'] = data['Year'].fillna(0).astype(int)
    data = data.replace({'unknown','Unknown',0, 0.0}, np.nan)
    data['Year'] = data['Year'].fillna(0).astype(int)
    return data

def data_filtering(data):
    filtered_data = data.drop(data[(data['Rating Value'] < 3) | 
                         (data['Rating Count'] < 50) | 
                         (data['Year'] < 2000)].index)
    brand_counts = filtered_data['Brand'].value_counts()
    valid_brands = brand_counts[brand_counts >= 3].index
    filtered_data = filtered_data[filtered_data['Brand'].isin(valid_brands)].reset_index(drop=True)
    return filtered_data
    

def generate_description(data):
    data['description'] = data.apply(
        lambda row: (
            f"{row['Perfume'].title()} by {row['Brand'].title()} is a {row['Gender']} fragrance featuring top notes of {row['Top']}, "
            f"middle notes of {row['Middle']}, and base notes of {row['Base']}. "
            f"The main accords are {', '.join(filter(pd.notna, [row['mainaccord1'], row['mainaccord2'], row['mainaccord3'], row['mainaccord4'], row['mainaccord5']]))}. "
            f"Released in {row['Year']} from {row['Country']}, this fragrance has a rating of {row['Rating Value']} out of 5 from {row['Rating Count']} votes. "
            f"{('Crafted by perfumer ' + row['Perfumer1'].title() + '.') if pd.notna(row['Perfumer1']) else ''} {(' and ' + row['Perfumer2'].title()) if pd.notna(row['Perfumer2']) else ''}"
        ),
        axis=1
    )
    return data

def generate_olfactory_discription(data):
    data['olfactory'] = data.apply(
        lambda row: (
            f"{row['Perfume'].title()} by {row['Brand'].title()} is a {row['Gender']} fragrance including top notes of {row['Top']}, "
            f"middle notes of {row['Middle']}, and base notes of {row['Base']}, giving it "
            f"{', '.join(filter(pd.notna, [row['mainaccord1'], row['mainaccord2'], row['mainaccord3'], row['mainaccord4'], row['mainaccord5']]))} scents. "
        ),
        axis=1
    )
    return data

def notes_integration(data):
    data["notes"] = data.apply(
        lambda row: 
        list(map(str.strip, row["Top"].split(','))) +
        list(map(str.strip, row["Middle"].split(','))) +
        list(map(str.strip, row["Base"].split(','))),
        axis=1
    )
    return data

def save_csv(data, path="./res/fragrantica_cleaned_with_description.csv"):
    data.to_csv(path, sep=';', encoding='ISO-8859-1', index=False, header = True)
    return

In [8]:
data = load_data('fragrantica_cleaned.csv')
data = data_prep(data)
data = data_filtering(data)
data = generate_description(data)
data = generate_olfactory_discription(data)
data = notes_integration(data)
save_csv(data, "./res/fragrantica_database.csv")

TypeError: 'DataFrame' object is not callable

In [5]:
data

Unnamed: 0,url,Perfume,Brand,Country,Gender,Rating Value,Rating Count,Year,Top,Middle,Base,Perfumer1,Perfumer2,mainaccord1,mainaccord2,mainaccord3,mainaccord4,mainaccord5,description,olfactory
0,https://www.fragrantica.com/perfume/alexandre-...,western leather white,alexandre j,France,women,3.00,116,2014,"apple, rhubarb, red berries","pineapple, pear, black currant, freesia, rose,...","white musk, violet, amberwood, moss",,,fruity,sweet,fresh,green,musky,Western Leather White by Alexandre J is a wome...,Western Leather White by Alexandre J is a wome...
1,https://www.fragrantica.com/perfume/haute-frag...,chic blossom,haute fragrance company hfc,France,women,3.01,85,2017,"indian jasmine, italian lemon, italian mandarin","jasmine sambac, amber","musk, benzoin, tobacco",vincent ricord,,white floral,musky,amber,powdery,citrus,Chic Blossom by Haute Fragrance Company Hfc is...,Chic Blossom by Haute Fragrance Company Hfc is...
2,https://www.fragrantica.com/perfume/frederic-m...,synthetic nature,frederic malle,France,unisex,3.02,58,2024,"black currant, lily of the valley, basil","jasmine, hyacinth, ylang ylang",patchouli,anne flipo,,white floral,fruity,green,fresh spicy,floral,Synthetic Nature by Frederic Malle is a unisex...,Synthetic Nature by Frederic Malle is a unisex...
3,https://www.fragrantica.com/perfume/karl-lager...,lagerfeld femme,karl lagerfeld,France,women,3.02,232,2000,"rhubarb, green mandarin, elemi resin","narcissus, lily","cedar, musk, cherry",christine nagel,,green,aromatic,yellow floral,citrus,fruity,Lagerfeld Femme by Karl Lagerfeld is a women f...,Lagerfeld Femme by Karl Lagerfeld is a women f...
4,https://www.fragrantica.com/perfume/arabesque-...,pearl,arabesque perfumes,UAE,unisex,3.02,95,2020,"green apple, guava","sweet notes, tonka bean, peach, orange","white musk, white flowers, vanilla",luz vaquero,,sweet,fruity,musky,powdery,vanilla,Pearl by Arabesque Perfumes is a unisex fragra...,Pearl by Arabesque Perfumes is a unisex fragra...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15887,https://www.fragrantica.com/perfume/arabian-ou...,royal oud,arabian oud,Arabia saudi,men,4.71,56,2019,rose,musk,cambodian oud,,,woody,animalic,musky,rose,warm spicy,Royal Oud by Arabian Oud is a men fragrance fe...,Royal Oud by Arabian Oud is a men fragrance in...
15888,https://www.fragrantica.com/perfume/ramon-mone...,alhambra oud,ramon monegal,Spain,unisex,4.72,163,2019,"agarwood (oud), apple, orange blossom","agarwood (oud), jasmine, rose","agarwood (oud), ambroxan, birch",ramon monegal,,oud,white floral,amber,fruity,fresh,Alhambra Oud by Ramon Monegal is a unisex frag...,Alhambra Oud by Ramon Monegal is a unisex frag...
15889,https://www.fragrantica.com/perfume/ermenegild...,passion,ermenegildo zegna,Italy,men,4.72,151,2017,"cognac, rum",olibanum,"saffron, amber",,,amber,warm spicy,woody,rum,metallic,Passion by Ermenegildo Zegna is a men fragranc...,Passion by Ermenegildo Zegna is a men fragranc...
15890,https://www.fragrantica.com/perfume/rasasi/oud...,oudh al boruzz rooh al assam,rasasi,UAE,unisex,4.73,62,2015,"rose, saffron","indian oud, leather, spanish labdanum","agarwood (oud), cypriol oil or nagarmotha, san...",raphael haury,,oud,rose,leather,woody,warm spicy,Oudh Al Boruzz Rooh Al Assam by Rasasi is a un...,Oudh Al Boruzz Rooh Al Assam by Rasasi is a un...
