In [1]:
import pandas as pd
import numpy as np

In [2]:
import warnings
warnings.simplefilter("ignore", category=Warning)

* https://discuss.huggingface.co/t/how-to-update-the-gr-dropdown-block-in-gradio-blocks/19231
* https://stackoverflow.com/questions/76693922/what-am-i-doing-wrong-with-gradio-dropdown-how-to-dynamically-modify-the-choice
* https://www.gradio.app/docs/dropdown

## Open Data

In [3]:
df = pd.read_excel('database\perfume_database.xlsx', 
                   usecols=['brand', 'perfume', 'notes'])

In [4]:
# Drop perfumes with no notes
df = df[df['notes'].notna()]
df.reset_index(inplace=True, drop=True)
df

Unnamed: 0,brand,perfume,notes
0,18 21 Man Made,Sweet Tobacco Spirits,"[""Citruses"", ""Saffron"", ""Tonka Bean"", ""Vanilla..."
1,40 Notes Perfume,Cashmere Musk,"[""Sandalwood"", ""Cedar"", ""White Musk"", ""Cashmer..."
2,40 Notes Perfume,Exotic Ylang Ylang,"[""Ylang-Ylang"", ""Gardenia"", ""Musk""]"
3,40 Notes Perfume,Exquisite Amber,"[""Labdanum"", ""Styrax"", ""Benzoin"", ""Vanilla"", ""..."
4,40 Notes Perfume,Oudwood Veil,"[""Kephalis"", ""Agarwood (Oud)""]"
...,...,...,...
36964,Urban Rituelle,Lemongrass Blend,"[""Lemongrass"", ""Myrtle"", ""Grapefruit"", ""Eucaly..."
36965,Urban Rituelle,Peach Blossom,"[""Peach"", ""Honey"", ""Sweet Pea"", ""Mimosa""]"
36966,Urban Rituelle,Pomegranate,"[""Pomegranate"", ""Citruses"", ""Red Berries""]"
36967,Urban Rituelle,Vanilla,"[""Vanilla"", ""Caramel"", ""Milk""]"


In [5]:
corpus = pd.DataFrame(df['notes'])
corpus.head(10)

Unnamed: 0,notes
0,"[""Citruses"", ""Saffron"", ""Tonka Bean"", ""Vanilla..."
1,"[""Sandalwood"", ""Cedar"", ""White Musk"", ""Cashmer..."
2,"[""Ylang-Ylang"", ""Gardenia"", ""Musk""]"
3,"[""Labdanum"", ""Styrax"", ""Benzoin"", ""Vanilla"", ""..."
4,"[""Kephalis"", ""Agarwood (Oud)""]"
5,"[""Green Notes"", ""Jasmine"", ""Tuberose"", ""Honeys..."
6,"[""Grapefruit"", ""Black Currant"", ""Honeysuckle"",..."
7,"[""Orange Blossom"", ""Neroli"", ""White Musk""]"
8,"{""middle"": [""Woodsy Notes"", ""Coriander"", ""Nutm..."
9,"{""middle"": [""Damask Rose"", ""Rose""], ""base"": [""..."


## Clean Data

In [6]:
itens_to_remove = [
    '[', ']', '"', '{', '}',
    'middle: ', 'top: ', 'base: ', 'null'
]
def remove_items(text):
    for item in itens_to_remove:
        text = text.replace(item, "")
    return text

In [7]:
# Cleaning text
corpus['notes'] = corpus['notes'].astype(str)
corpus['notes'] = corpus['notes'].str.lower()
corpus['notes'] = corpus['notes'].apply(remove_items)
corpus.head(10)

Unnamed: 0,notes
0,"citruses, saffron, tonka bean, vanilla, exotic..."
1,"sandalwood, cedar, white musk, cashmere wood"
2,"ylang-ylang, gardenia, musk"
3,"labdanum, styrax, benzoin, vanilla, musk"
4,"kephalis, agarwood (oud)"
5,"green notes, jasmine, tuberose, honeysuckle"
6,"grapefruit, black currant, honeysuckle, orchid..."
7,"orange blossom, neroli, white musk"
8,"woodsy notes, coriander, nutmeg, patchouli, oa..."
9,"damask rose, rose, amber, ginger, apricot, cle..."


## Vectorize Data

In [8]:
from sklearn.feature_extraction.text import CountVectorizer

In [9]:
def custom_tokenizer(text):
    return text.split(',')

In [10]:
count_vectorizer = CountVectorizer(tokenizer=custom_tokenizer)

In [11]:
bag_of_words = count_vectorizer.fit_transform(corpus.notes)

In [12]:
bag_of_words.shape

(36969, 2145)

## Calculate similarity

In [13]:
from sklearn.metrics.pairwise import cosine_similarity
import scipy.sparse as sp
import pickle, os, sys

In [14]:
similarity_matrix_sparse = cosine_similarity(bag_of_words, dense_output=False)

In [15]:
similarity_matrix_sparse.shape

(36969, 36969)

In [16]:
type(similarity_matrix_sparse)

scipy.sparse._csr.csr_matrix

In [26]:
max_values = 10
num_rows = similarity_matrix_sparse.shape[0]

top_index = []
top_values = []

for index in range(num_rows):
    if index%5000 == 0 and index !=0: print(f'{index} calculated values')
    if index+1 == num_rows: print(f'{index} calculated values')
    perfume_search = similarity_matrix_sparse.getrow(index)
    top_similarity = np.argsort(perfume_search.data)[-max_values:][::-1]
    top_index.append(perfume_search.indices[top_similarity])
    top_values.append(perfume_search.data[top_similarity])

5000 calculated values
10000 calculated values
15000 calculated values
20000 calculated values
25000 calculated values
30000 calculated values
35000 calculated values
36968 calculated values


## App

In [28]:
import gradio as gr
from gradio.components import Dropdown, Textbox

In [29]:
def rs_change(rs):
    homeworks={}
    homeworks[rs] = df[df['brand']==rs]['perfume'].to_list()
    return gr.Dropdown.update(choices=homeworks[rs])

def test():
    pass

def webui():
    inputs = [
        gr.Dropdown(choices=list(df['brand'].unique()), label="Brand"),
        gr.Dropdown(choices=[], label="Perfume"),
        ]

    with gr.Blocks() as app:
        gr.Interface(
            fn=test,
            inputs=inputs,
            outputs=None
        )

        inputs[0].select(fn=rs_change, inputs=inputs[0], outputs=inputs[1])

    app.launch()

webui()

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


In [43]:
import gradio as gr
import pandas as pd

# Suponha que você já tenha o DataFrame carregado como 'df'
# Exemplo:
# df = pd.DataFrame({
#     'Marca': ['Ford', 'Toyota', 'Chevrolet'],
#     'Carro': ['Focus', 'Corolla', 'Onix'],
#     'Valor': [50000, 60000, 45000]
# })

# Função para filtrar o DataFrame com base na marca e/ou carro
def filter_dataframe(marca, carro):
    filtered_df = df
    if marca != "Todas":
        filtered_df = filtered_df[filtered_df['Marca'] == marca]
    if carro != "Todos":
        filtered_df = filtered_df[filtered_df['Carro'] == carro]
    return filtered_df['Valor'].values[0] if not filtered_df.empty else 'Não encontrado'

# Interface do Gradio
marca_dropdown = gr.inputs.Dropdown(["Todas"] + list(df['Marca'].unique()), label="Marca")
carro_dropdown = gr.inputs.Dropdown(["Todos"] + list(df['Carro'].unique()), label="Carro")
iface = gr.Interface(
    fn=filter_dataframe,  # Função a ser chamada
    inputs=[marca_dropdown, carro_dropdown],
    outputs=gr.outputs.Textbox(label="Valor")
)

# Executar a interface
iface.launch()


Running on local URL:  http://127.0.0.1:7867

To create a public link, set `share=True` in `launch()`.




In [44]:
def filter_records(records, gender):
    return records[records["gender"] == gender]


demo = gr.Interface(
    filter_records,
    [
        gr.Dataframe(
            headers=["name", "age", "gender"],
            datatype=["str", "number", "str"],
            row_count=5,
            col_count=(3, "fixed"),
        ),
        gr.Dropdown(["M", "F", "O"]),
    ],
    "dataframe",
    description="Enter gender as 'M', 'F', or 'O' for other.",
)

demo.launch()

Running on local URL:  http://127.0.0.1:7868

To create a public link, set `share=True` in `launch()`.




In [45]:
import gradio as gr
import pandas as pd

# Suponha que você já tenha o DataFrame carregado como 'df'
# Exemplo:
 df = pd.DataFrame({
     'Marca': ['Ford', 'Toyota', 'Chevrolet'],
     'Modelo': ['Focus', 'Corolla', 'Onix'],
     'Preço': [50000, 60000, 45000]
 })

def filter_dataframe(marca, modelo):
    filtered_df = df
    if marca != "Todas":
        filtered_df = filtered_df[filtered_df['Marca'] == marca]
    if modelo != "Todos":
        filtered_df = filtered_df[filtered_df['Modelo'] == modelo]
    return filtered_df['Preço'].tolist()

# Interface do Gradio
marca_dropdown = gr.inputs.Dropdown(["Todas"] + list(df['Marca'].unique()), label="Marca")
modelo_dropdown = gr.inputs.Dropdown(["Todos"] + list(df['Modelo'].unique()), label="Modelo")
iface = gr.Interface(
    fn=filter_dataframe,  # Função a ser chamada
    inputs=[marca_dropdown, modelo_dropdown],
    outputs=gr.outputs.Textbox(label="Preços Filtrados")
)

# Executar a interface
iface.launch()


KeyError: 'Modelo'