## 데이터셋 정보

<a href="https://colab.research.google.com/github/sforseohn/Perfume-Recommender-System/blob/main/Data_Import.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("joehusseinmama/fragrantica-data")

print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/fragrantica-data


## 데이터 전처리

In [None]:
# import pandas as pd
# import re
# import csv

# # CSV 파일 읽기
# df = pd.read_csv('/content/perfumes_table.csv', on_bad_lines='skip', engine='python')

# # (1) NaN 및 빈 문자열 제거
# df = df[df['title'].notna()]  # 1단계: NaN 제거
# df = df[df['title'].str.strip() != '']  # 2단계: 빈 문자열 제거

# # (2) 소문자로 시작하는 행 제거
# df = df[~df['title'].str.strip().str.match(r'^[a-z]')]

# # (3) 영어/숫자/기호만 포함된 경우만 남기기
# def is_english_or_numeric(text):
#     if pd.isna(text):
#         return False
#     text = str(text).strip()
#     return bool(re.fullmatch(r'[A-Za-z0-9\s.,!?\'"()\-:;]+', text))

# df = df[df['title'].apply(is_english_or_numeric)]

# # 필터링된 결과 저장
# df.to_csv('/content/filter.csv', index=False, quoting=csv.QUOTE_ALL, lineterminator='\n')

## 패키지 임포트

In [None]:
!pip install neo4j



## Neo4j 연결

In [None]:
from neo4j import GraphDatabase

# Neo4j 연결 정보
NEO4J_URI = "neo4j+s://60565c57.databases.neo4j.io"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "hxsvIsKRMP4Q0Jg5XoIZbmhrzUo5Yv0hxfGnbH9xBBU"

driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))


## Cypher 쿼리 실행

In [None]:
import pandas as pd
from neo4j import GraphDatabase
from IPython.display import HTML

# Cypher 쿼리 실행
def run_cypher_query(driver, query: str, parameters: dict = None):
    with driver.session() as session:
        result = session.run(query, parameters)
        return [record.data() for record in result]

# 추천 기준이 되는 향수 이름
input_perfume_title = "Bleu Royal Princesse Marina De Bourbon for women"

# 향수 존재 여부 확인
check_query = """
MATCH (p:Perfume)
WHERE toLower(p.title) = toLower($title)
RETURN p.title AS title
"""

check_result = run_cypher_query(driver, check_query, {"title": input_perfume_title})
if not check_result:
    raise ValueError(f"향수 '{input_perfume_title}' 를 찾을 수 없습니다.")

# 입력된 향수의 노트 리스트 가져오기
note_query = """
MATCH (p:Perfume {title: $title})-[:HAS_NOTE]->(n:Note)
RETURN COLLECT(DISTINCT toLower(n.name)) AS likedNotes
"""

note_result = run_cypher_query(driver, note_query, {"title": input_perfume_title})

if not note_result:
    raise ValueError("해당 제목의 향수를 찾을 수 없습니다.")

liked_notes = note_result[0]['likedNotes']
note_filter = ', '.join(f'"{note}"' for note in liked_notes)

# 입력 향수의 노트 및 카테고리 가져오기
note_info_query = """
MATCH (p:Perfume)-[:HAS_NOTE]->(n:Note)-[:BELONGS_TO]->(c:NoteCategory)
WHERE toLower(p.title) = toLower($title)
RETURN COLLECT(DISTINCT toLower(n.name)) AS inputNotes,
       COLLECT(DISTINCT toLower(c.name)) AS inputCategories
"""
note_info = run_cypher_query(driver, note_info_query, {"title": input_perfume_title})[0]
input_notes = note_info['inputNotes']
input_categories = note_info['inputCategories']

# 추천 쿼리 (카테고리가 많이 겹치는 순 > 노트가 많이 겹치는 순)
query = """
MATCH (p:Perfume)-[:HAS_NOTE]->(n:Note)-[:BELONGS_TO]->(c:NoteCategory)
WITH p,
     COLLECT(toLower(n.name)) AS allNotes,
     COLLECT(toLower(c.name)) AS allCategories
WITH p, allNotes, allCategories,
     [note IN allNotes WHERE note IN $inputNotes] AS matchedNotes,
     [cat IN allCategories WHERE cat IN $inputCategories] AS matchedCategories,
     SIZE([note IN allNotes WHERE note IN $inputNotes]) AS matchCount,
     SIZE([cat IN allCategories WHERE cat IN $inputCategories]) AS matchedCategoryCount
WHERE matchCount > 0 OR matchedCategoryCount > 0
ORDER BY matchedCategoryCount DESC, matchCount DESC
LIMIT 30
RETURN
    p.title AS Title,
    p.designer AS Designer,
    p.description AS Description,
    p.rating AS Rating,
    allNotes AS Notes,
    matchedCategoryCount AS CommonCategories,
    matchCount AS CommonNotes
"""

# 쿼리 실행
recommendations = run_cypher_query(driver, query, {
    "inputTitle": input_perfume_title,
    "inputNotes": input_notes,
    "inputCategories": input_categories
})


# DataFrame 변환
df = pd.DataFrame(recommendations)
df['Notes'] = df['Notes'].apply(lambda x: ', '.join(x))
df.index += 1

# 표 출력
HTML(df.to_html(escape=False))

Unnamed: 0,Title,Designer,Description,Rating,Notes,CommonCategories,CommonNotes
1,Ferre by Ferre Gianfranco Ferre for women,gianfranco ferre perfumes and colognes,"Ferre by Ferre by Gianfranco Ferre is a Floral Aldehyde fragrance for women. Ferre by Ferre was launched in 1991. Top notes are Aldehydes, Neroli, Peach, Green Leaves, Bergamot, Orange and Lemon; middle notes are Oakmoss, Ylang-Ylang, Rose, Violet Root, Mimosa, Carnation, Passionfruit, Orange Blossom, Jasmine, Cassia and Lily-of-the-Valley; base notes are Iris, Sandalwood, Musk, Vanilla, Tonka Bean, Benzoin, Styrax, Amber, Spices and Vetiver.",4.36,"peach, rose, jasmine, iris, orange blossom, ylang-ylang, vanilla, sandalwood, vetiver, oakmoss, benzoin, styrax, musk, aldehydes, bergamot, lemon, orange",12,4
2,Lelong Pour Femme Lucien Lelong for women,lucien lelong perfumes and colognes,"Lelong Pour Femme by Lucien Lelong is a Amber Floral fragrance for women. Lelong Pour Femme was launched in 1999. Top notes are Lilac, Bergamot, Magnolia, Fig and Mandarin Orange; middle notes are Orchid, White Orchid, Ylang-Ylang, Iris, Tuberose, May Rose and Jasmine; base notes are Oakmoss, Sandalwood, Musk and Vetiver.",4.53,"fig, jasmine, iris, magnolia, tuberose, ylang-ylang, sandalwood, vetiver, oakmoss, musk, bergamot, mandarin orange",11,3
3,Forum Woman Tufi Duek for women,tufi duek perfumes and colognes,"Forum Woman by Tufi Duek is a Amber Floral fragrance for women. Forum Woman was launched in 2013. Top notes are Bergamot, Red Apple, Peach, Freesia, Galbanum and Tagetes; middle notes are Rose, Black Iris, Violet, Jasmine, Carnation, Lily-of-the-Valley and Vetiver; base notes are Crystal Amber, Cashmere Wood, Tonka Bean, Caramel, Vanilla, Sandalwood, Musk and Patchouli.",3.5,"peach, rose, jasmine, violet, freesia, galbanum, vanilla, caramel, sandalwood, patchouli, vetiver, cashmere wood, musk, bergamot",10,5
4,L'Edition Elle for women,elle perfumes and colognes,"L'Edition by Elle is a Floral Fruity fragrance for women. L'Edition was launched in 2018. Top notes are Mandarin Orange, Pear, Freesia and Lemon; middle notes are Peony, Lily-of-the-Valley, Peach, Jasmine and Rose; base notes are Musk, Cedar and Sandalwood.",4.03,"pear, peach, rose, jasmine, peony, freesia, cedar, sandalwood, musk, lemon, mandarin orange",10,2
5,Amuro 7 Dzintars for women,dzintars perfumes and colognes,"Amuro 7 by Dzintars is a Floral Woody Musk fragrance for women. Amuro 7 was launched in 2013. Top notes are Bergamot, Violet, Freesia, Lily of the Valley, Mimosa and Rose; middle notes are Orange Blossom, Geranium, Iris, Coriander, Chamomile, Tuberose and Jasmine; base notes are Amber, Musk, Sandalwood and Vanilla.",5.0,"rose, jasmine, violet, iris, freesia, orange blossom, tuberose, vanilla, sandalwood, musk, bergamot",9,5
6,Bleu Royal Princesse Marina De Bourbon for women,princesse marina de bourbon perfumes and colognes,"Bleu Royal by Princesse Marina De Bourbon is a Amber Floral fragrance for women. Bleu Royal was launched in 2012. Top notes are Bergamot, Orange Blossom and Apple; middle notes are Violet, Jasmine and Lotus; base notes are Patchouli, Amber and Sandalwood.",4.01,"apple, jasmine, violet, lotus, orange blossom, sandalwood, patchouli, bergamot",8,8
7,Divine Oriflame for women,oriflame perfumes and colognes,"Divine by Oriflame is a Floral fragrance for women. Divine was launched in 2002. The nose behind this fragrance is Jean Jacques. Top notes are Bamboo, Violet, Water Hyacinth, Kiwi and Ivy; middle notes are Orchid, Lily, Freesia, Jasmine and Rose; base notes are White Musk, Sandalwood and Plum.",3.66,"plum, kiwi, rose, jasmine, violet, freesia, lily, sandalwood",8,3
8,Immortal Oud Mith for women and men,mith perfumes and colognes,"Immortal Oud by Mith is a Woody fragrance for women and men. This is a new fragrance. Immortal Oud was launched in 2023. Top notes are Orange, Raspberry, Coconut, Cardamom, Pepper and Peach; middle notes are Rose, Dried Fruits, Thyme and Patchouli; base notes are Agarwood (Oud), Vanilla, Tonka Bean, Tobacco, Cacao, Amber and Vetiver.",4.33,"peach, raspberry, coconut, rose, cardamom, vanilla, patchouli, vetiver, agarwood (oud), orange",8,1
9,Unforgiven Night Jacques Evard for women,jacques evard perfumes and colognes,"Unforgiven Night by Jacques Evard is a Amber Floral fragrance for women. Top notes are Apple, Tangerine, Grapefruit, Pomegranate and Orange Blossom; middle notes are Lotus, Jasmine, Peony, Plum and Hibiscus; base notes are Amber, Vanilla and Musk.",,"apple, plum, jasmine, peony, lotus, orange blossom, vanilla, musk, grapefruit",7,4
10,Supremacy Blue Ainash Parfums for men,ainash parfums perfumes and colognes,"Supremacy Blue by Ainash Parfums is a Aromatic fragrance for men. This is a new fragrance. Supremacy Blue was launched in 2022. Top notes are Grapefruit, Lemon, Mint, Pink Pepper, Bergamot, Aldehydes and Coriander; middle notes are Ginger, Nutmeg, Jasmine and Melon; base notes are Incense, Amber, Cedar, Sandalwood, Patchouli, Labdanum and Amberwood.",5.0,"jasmine, mint, pink pepper, nutmeg, ginger, cedar, sandalwood, patchouli, labdanum, aldehydes, bergamot, lemon, grapefruit",7,4


## Description 기반 FAISS 추천

In [None]:
!pip install faiss-cpu



In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
import faiss
import numpy as np
import pandas as pd

In [None]:
# 1. Neo4j 추천 결과인 DataFrame(df)를 기반으로 description 리스트 추출
descriptions = df["Description"].fillna("").tolist()

# 2. TF-IDF 벡터화
vectorizer = TfidfVectorizer(max_features=10000)
tfidf_matrix = vectorizer.fit_transform(descriptions)

In [None]:
# 3. FAISS용 float32 배열 변환
tfidf_vectors = tfidf_matrix.toarray().astype("float32")

# 4. FAISS 인덱스 생성 및 벡터 추가
index = faiss.IndexFlatL2(tfidf_vectors.shape[1])
index.add(tfidf_vectors)

In [None]:
# 5. 선택한 향수가 DataFrame에서 몇 번째에 있는지 찾기
matched = df[df["Title"].str.contains(input_perfume_title, case=False, na=False)]
if matched.empty:
    print(f"FAISS: '{input_perfume_title}'에 해당하는 향수를 추천 후보에서 찾을 수 없습니다.")
else:
    selected_idx = matched.index[0]

    # 6. 해당 향수의 TF-IDF 벡터 추출
    query_vector = tfidf_vectors[selected_idx:selected_idx+1]

    # 7. FAISS로 유사 향수 6개 검색 (자기 자신 포함될 수 있으므로 6개)
    D, I = index.search(query_vector, k=6)

    # 8. 자기 자신 제외한 유사 향수 인덱스 선택
    filtered_indices = [i for i in I[0] if i != selected_idx][:5]

    # 9. 결과 출력
    print("선택한 향수:", df.loc[selected_idx, "Title"])
    print("설명:", df.loc[selected_idx, "Description"])
    print("\n[FAISS 기반 Description 유사 추천 향수]")
    display(df.iloc[filtered_indices][["Title", "Description"]])

선택한 향수: Bleu Royal Princesse Marina De Bourbon for women
설명: Bleu Royal by Princesse Marina De Bourbon is a Amber Floral fragrance for women. Bleu Royal was launched in 2012. Top notes are Bergamot, Orange Blossom and Apple; middle notes are Violet, Jasmine and Lotus; base notes are Patchouli, Amber and Sandalwood.

[FAISS 기반 Description 유사 추천 향수]


Unnamed: 0,Title,Description
16,Classique Love Actually Jean Paul Gaultier for...,Classique Love Actually by Jean Paul Gaultier ...
4,L'Edition Elle for women,L'Edition by Elle is a Floral Fruity fragrance...
5,Amuro 7 Dzintars for women,Amuro 7 by Dzintars is a Floral Woody Musk fra...
28,Atlantis Garden Korres for women,Atlantis Garden by Korres is a Floral fragranc...
9,Unforgiven Night Jacques Evard for women,Unforgiven Night by Jacques Evard is a Amber F...
