<a href="https://colab.research.google.com/github/melissaarrington1/recommender_system/blob/main/Copy_of_Perfume_Recommender.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ** Perfume Recommender **

This is a machine learning application that recommends perfume based on a preference of notes, age, or brand.

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import ipywidgets as widgets
from sklearn.neighbors import NearestNeighbors

## Showing the First 5 of the Perfume Dataset

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/final_perfume_data.csv')
df.head()

Unnamed: 0,Name,Brand,Description,Notes,Image URL,Country,Age Range
0,Tihota Eau de Parfum,Indult,"Rapa Nui for sugar, Tihota is, quite simply, ...",Aromatic notes,https://static.luckyscent.com/images/products/...,France,45
1,Sola Parfum,Di Ser,A tribute to the expanse of space extending f...,Fruity notes,https://static.luckyscent.com/images/products/...,France,32
2,Kagiroi Parfum,Di Ser,An aromatic ode to the ancient beauty of Japa...,Green notes,https://static.luckyscent.com/images/products/...,France,34
3,Velvet Fantasy Eau de Parfum,Montale,Velvet Fantasy is a solar fragrance where cit...,Lemon,https://static.luckyscent.com/images/products/...,France,34
4,A Blvd. Called Sunset Eau de Parfum,A Lab on Fire,There's no way A Lab On Fire could relocate t...,Marine notes,https://static.luckyscent.com/images/products/...,France,54


In [None]:
df.columns

In [None]:
df.info()

## Checking for duplicate perfumes, as we will remove any duplicates:

In [None]:
df.duplicated(subset='Name').sum()

In [None]:
df.describe()

## Visualizing Data

In [None]:
plt.figure(figsize=(15,6))
sns.histplot(data=df, x="Country")
plt.title("Number of Perfumes Per Country");



In [None]:
sns.set(style='whitegrid')
fmri = sns.load_dataset("fmri")

sns.scatterplot(x="Age Range",
                    y="Country",
                    data=df)

## Another Plot of Age Range
### Displaying outliers of age 87

In [None]:
sns.displot(df['Age Range'])

## These are the most popular Notes in our dataset

In [None]:
df['Notes'].value_counts().head()

In [None]:
df['Country'].value_counts()

## Setting Up for Training Machine Learning Model
### Using Cosine Similarity

In [None]:
from sklearn.model_selection import train_test_split
from sklearn import preprocessing

#turn categories into numbers
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()

In [None]:
#separate the independent and dependent variables

#independent, dropping the image_url
X = df.drop('Image URL', axis=1)

#dependent
y = np.array(df['Name']).reshape(-1,1)



categorical_features = ["Country", "Notes", "Name", "Brand", "Description"]
one_hot = OneHotEncoder(sparse_output=False)
transformer = ColumnTransformer([("one_hot",
                                  one_hot,
                                  categorical_features)],
                                remainder="passthrough")

transformed_X = transformer.fit_transform(X)
transformed_X


In [None]:
X

In [None]:
pd.DataFrame(transformed_X).head()

## Perform the similarity calculation using Cosine Similarity

In [None]:
similarity = cosine_similarity(transformed_X)
similarity

In [None]:
pd.DataFrame(similarity).reset_index()

## The Cosine Similarity model is done, let us convert the table so that we can view Column Names

In [None]:
name_df = pd.DataFrame(similarity, columns=df['Name'], index=df['Name']).reset_index()

name_df.head()

In [None]:
note_df = pd.DataFrame(similarity, columns=df['Notes'], index=df['Notes']).reset_index()

note_df.head()

# Perfume Recommender
### Searching by Perfume Name

In [None]:
input_perfume = 'Sola Parfum'
recommendations = pd.DataFrame(name_df.nlargest(6,input_perfume)['Name'])
recommendations = recommendations[recommendations['Name']!=input_perfume]
print("Based on ", input_perfume, " These are your top 5 recommended perfumes: " )
print(recommendations)

### Searching for a Perfume by "Note"

In [None]:
input_note = 'Apricot blossom'
recommendations = pd.DataFrame(note_df.nlargest(6,input_note)['Notes'])
recommendations = recommendations[recommendations['Notes']!=input_note]
print("These are your top 5 recommended perfumes based on the fragrance note ", input_note,  " that you like: ")
print(recommendations)

In [None]:
#frames = [name_df, note_df]  # Or perform operations on the DFs
#result = pd.concat(frames, axis=1)

In [None]:
#result