In [1]:
from pathlib import Path
import polars as pl
import duckdb
from datasets import Dataset
import numpy as np
from autofaiss import build_index
from FlagEmbedding import BGEM3FlagModel
from core_pro.ultilities import make_dir, make_sync_folder


In [2]:
path = make_sync_folder("dataset/item_matching")
file = path / "data_sample_FMCG_clean.parquet"

query = f"""
select item_id
, item_name
from read_parquet('{file}')
limit 10000
"""
df = duckdb.sql(query).pl()
df.head()

item_id,item_name
i64,str
14019452100,"""M·∫Øt M√®o C·∫ßu V·ªìng Huaxi 7 M√†u T‚Ä¶"
25423506264,"""*FREESHIP* Mi·∫øng D√°n Trang Tr√≠‚Ä¶"
25650112647,"""(Tr·ª• v·ª´a) Mi tr·ª• katun, tr·ª• wi‚Ä¶"
10158281345,"""Mascara Chu·ªët Mi JUDYDOLL 2g D‚Ä¶"
2486662254,"""B·ªôt l·∫Øc s·ªØa Bledina Ph√°p (Date‚Ä¶"


In [3]:
def create_search_result(file_embed, data):
    embeddings = np.load(file_embed)
    print(embeddings.shape)

    data = data.with_columns(pl.Series(values=embeddings, name="embed"))
    dataset = Dataset.from_polars(data)
    dataset.set_format(type="numpy", columns=["embed"], output_all_columns=True)

    path_index = Path(path / f"index_{file_embed.stem}")
    build_index(
        embeddings=embeddings,
        index_path=str(path_index),
        save_on_disk=True,
        metric_type="ip",
        verbose=30,
    )
    dataset.load_faiss_index("embed", path_index)

    score, result = dataset.get_nearest_examples_batch(
        "embed", dataset["embed"], k=5
    )
    for i in result:
        del i["embed"]

    dict_ = {"score": [list(i) for i in score]}
    df_score = pl.DataFrame(dict_)
    df_result = (
        pl.DataFrame(result).select(pl.all().name.prefix(f"db_"))
    )
    df_match = pl.concat([df, df_result, df_score], how="horizontal")
    return df_match

In [4]:
file_embed = path / "bi_encode.npy"
df_match = create_search_result(file_embed, df)

(10000, 768)


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 33825.03it/s]
100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2/2 [00:00<00:00, 21.07it/s]
  0%|          | 0/1 [00:00<?, ?it/s]


In [5]:
file_embed = path / "bge_encode.npy"
df_match_bge = create_search_result(file_embed, df)

(10000, 1024)


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 6384.02it/s]
100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2/2 [00:00<00:00, 14.63it/s]
  0%|          | 0/1 [00:00<?, ?it/s]


In [6]:
rename_cols = {"db_item_id": "db_bi_item_id", "db_item_name": "db_bi_item_name", "score": "score_bi"}
df_final = df.join(df_match.drop(["item_name"]).rename(rename_cols), how="left", on="item_id")

rename_cols = {"db_item_id": "db_bge_item_id", "db_item_name": "db_bge_item_name", "score": "score_bi"}
df_final = df_final.join(df_match_bge.drop(["item_name"]).rename(rename_cols), how="left", on="item_id")

In [7]:
dif = [set(i["db_bi_item_name"]) ^ set(i["db_bge_item_name"]) for i in df_final[["db_bi_item_name", "db_bge_item_name"]].to_dicts()]
df_final = df_final.with_columns(
    pl.Series("dif", dif),
    pl.Series("dif_len", [len(_) for _ in dif]),
)

In [8]:
df_final

item_id,item_name,db_bi_item_id,db_bi_item_name,score_bi,db_bge_item_id,db_bge_item_name,score_bi_right,dif,dif_len
i64,str,list[i64],list[str],list[f32],list[i64],list[str],list[f32],object,i64
14019452100,"""M·∫Øt M√®o C·∫ßu V·ªìng Huaxi 7 M√†u T‚Ä¶","[14019452100, 29271686341, ‚Ä¶ 24039668933]","[""M·∫Øt M√®o C·∫ßu V·ªìng Huaxi 7 M√†u T·∫°o Hi·ªáu ·ª®ng 9D ( Si√™u H√≥t )"", ""N·∫•m Ngo√†i Da Cho G√† ƒê√° , Ch√≥ ,M√®o Ketomycine V√†ng Tu√Ωp 25g"", ‚Ä¶ ""N∆∞·ªõc Hoa N·ªØ Delina Exclusif Ch√≠nh H√£ng - M√πi H∆∞∆°ng Ng·ªçt Ng√†o, Uy Quy·ªÅn, Sang Tr·ªçng""]","[0.999534, 0.735134, ‚Ä¶ 0.569959]","[14019452100, 29271686341, ‚Ä¶ 27400189915]","[""M·∫Øt M√®o C·∫ßu V·ªìng Huaxi 7 M√†u T·∫°o Hi·ªáu ·ª®ng 9D ( Si√™u H√≥t )"", ""N·∫•m Ngo√†i Da Cho G√† ƒê√° , Ch√≥ ,M√®o Ketomycine V√†ng Tu√Ωp 25g"", ‚Ä¶ ""YADORNOS 24 Chi·∫øc Th·ªè Ng·ªçt Hoa Camellia Kim C∆∞∆°ng Gi·∫£ Ng·ªçc Trai B√© G√°i M√≥ng Tay Gi·∫£ Ngh·ªá Thu·∫≠t""]","[1.000086, 0.744827, ‚Ä¶ 0.645257]","{'YADORNOS 24 Chi·∫øc Th·ªè Ng·ªçt Hoa Camellia Kim C∆∞∆°ng Gi·∫£ Ng·ªçc Trai B√© G√°i M√≥ng Tay Gi·∫£ Ngh·ªá Thu·∫≠t', 'Th·ª©c ƒÇn S·∫•y Th√∫ C∆∞ng Mix Nhi·ªÅu V·ªã - D√πng ƒê∆∞·ª£c Cho Ch√≥ V√† M√®o', 'Son d∆∞·ª°ng m√¥i m·ª° h∆∞∆°u Astrid T√°i t·∫°o m√¥i kh√¥ D∆∞·ª°ng m√¥i m·ªÅm m·∫°i, Son Astrid', 'Ph·ªß ƒëen mi - l√¥ng m√†y, Thu·ªëc nhu·ªôm SABBUHA (3 m√†u: N√¢u, Coffee, ƒêen) - Phan D≈©ng', 'N∆∞·ªõc Hoa N·ªØ Delina Exclusif Ch√≠nh H√£ng - M√πi H∆∞∆°ng Ng·ªçt Ng√†o, Uy Quy·ªÅn, Sang Tr·ªçng', 'Nailbox thi·∫øt k·∫ø c232 m·∫´u ƒë∆°n gi·∫£n ch·ª•p k·ªâ y·∫øu ƒëi h·ªçc, ƒëi ch∆°i Cebi.nailbox'}",6
25423506264,"""*FREESHIP* Mi·∫øng D√°n Trang Tr√≠‚Ä¶","[25423506264, 20579970043, ‚Ä¶ 26817874290]","[""*FREESHIP* Mi·∫øng D√°n Trang Tr√≠ M√≥ng Tay Ch·ªëng Th·∫•m N∆∞·ªõc Ho·∫°t H√¨nh D·ªÖ Th∆∞∆°ng kh√¥ng th·∫•m n∆∞·ªõc cho b√©"", ""X·ªãt D∆∞·ª°ng T√≥c Hask Curl Care Argan oil 5-in-1 Leave-in Spray 175ml Ph·ª•c H·ªìi T√≥c Kh·ªèi H∆∞ T·ªïn M·ªÅm M∆∞·ª£t T√≥c H√†ng √öc"", ‚Ä¶ ""H·ªôp 32 t√∫i m√π moi moi, r√πa babythree, crybaby, m·ªông kuromi full box t√∫i m√π r√πa moimoi qu√† t·∫∑ng cho b√©""]","[0.999482, 0.499663, ‚Ä¶ 0.429144]","[25423506264, 24970535248, ‚Ä¶ 17765522937]","[""*FREESHIP* Mi·∫øng D√°n Trang Tr√≠ M√≥ng Tay Ch·ªëng Th·∫•m N∆∞·ªõc Ho·∫°t H√¨nh D·ªÖ Th∆∞∆°ng kh√¥ng th·∫•m n∆∞·ªõc cho b√©"", ""B·ªô 5 ƒê·∫ø G·∫Øn M√≥ng Qu√¢n C·ªù T·∫≠p V·∫Ω L√†m Nail | ƒê·∫ø Acrylic 5 M√≥ng Nailbox D·ªÖ D√†ng S·ª≠ D·ª•ng"", ‚Ä¶ ""[T·∫∂NG M√öT CHE KHUY·∫æT ƒêI·ªÇM] M·∫™U M·ªöI - Kem Che Khuy·∫øt ƒêi·ªÉm CKƒê The Saem Cover Perfection Tip Concealer SPF28 PA++ 6.5g""]","[1.00047, 0.596681, ‚Ä¶ 0.575912]","{'C·ªå (CH·ªîI) QU√âT M·∫∂T N·∫† - TAY C·∫¶M NH·ª∞A TRONG SU·ªêT - TAY C·∫¶M C√ÅN G·ªñ - C√ì TH·ªÇ S·ª¨ D·ª§NG NHI·ªÄU L·∫¶N TI·ªÜN L·ª¢I üíìüíìüíìüíìüíì', 'Set 15 ƒë·∫ßu m√†i gi·∫•y nh√°m chuy√™n d·ª•ng cho d√¢n l√†m m√≥ng', '(GWP) C·ªç trang ƒëi·ªÉm 3CE ƒëa nƒÉng 3CE Brush Makeup Brush 15.7cm', 'B·ªô 5 ƒê·∫ø G·∫Øn M√≥ng Qu√¢n C·ªù T·∫≠p V·∫Ω L√†m Nail | ƒê·∫ø Acrylic 5 M√≥ng Nailbox D·ªÖ D√†ng S·ª≠ D·ª•ng', 'H·ªôp 32 t√∫i m√π moi moi, r√πa babythree, crybaby, m·ªông kuromi full box t√∫i m√π r√πa moimoi qu√† t·∫∑ng cho b√©', 'T√∫i x√°ch trang ƒëi·ªÉm UNMEGONI, T√∫i ƒë·ª±ng m·ªπ ph·∫©m dung t√≠ch l·ªõn b·∫±ng l∆∞·ªõi, T√∫i ƒë·ª±ng ƒë·ªì v·ªá sinh c√≥ d√¢y k√©o hai l·ªõp di ƒë·ªông Du l·ªãch', 'X·ªãt D∆∞·ª°ng T√≥c Hask Curl Care Argan oil 5-in-1 Leave-in Spray 175ml Ph·ª•c H·ªìi T√≥c Kh·ªèi H∆∞ T·ªïn M·ªÅm M∆∞·ª£t T√≥c H√†ng √öc', '[T·∫∂NG M√öT CHE KHUY·∫æT ƒêI·ªÇM] M·∫™U M·ªöI - Kem Che Khuy·∫øt ƒêi·ªÉm CKƒê The Saem Cover Perfection Tip Concealer SPF28 PA++ 6.5g'}",8
25650112647,"""(Tr·ª• v·ª´a) Mi tr·ª• katun, tr·ª• wi‚Ä¶","[25650112647, 11730202896, ‚Ä¶ 8270710040]","[""(Tr·ª• v·ª´a) Mi tr·ª• katun, tr·ª• wispy SEDUIRE d√πng ƒë·ªÉ n·ªëi mi m·∫Øt ∆∞·ªõt ƒë·∫ßy ƒë·ªß size (d·ª•ng c·ª• n·ªëi mi)"", ""B√°t ƒÉn ch·ªëng g√π cho m√®o ch√≥ th√∫ c∆∞ng"", ‚Ä¶ ""Pink coco Son b√≥ng m√†u h·ªìng d·ªÖ th∆∞∆°ng B·ªÅ m·∫∑t g∆∞∆°ng d∆∞·ª°ng ·∫©m D∆∞·ª°ng ·∫©m thi√™n nhi√™n l√¢u tr√¥i Son m√¥i""]","[1.000652, 0.771219, ‚Ä¶ 0.615231]","[25650112647, 25935229965, ‚Ä¶ 25472011611]","[""(Tr·ª• v·ª´a) Mi tr·ª• katun, tr·ª• wispy SEDUIRE d√πng ƒë·ªÉ n·ªëi mi m·∫Øt ∆∞·ªõt ƒë·∫ßy ƒë·ªß size (d·ª•ng c·ª• n·ªëi mi)"", ""B·ªçt m·ªÅm t·∫°o ki·ªÉu t√≥c xoƒÉn Bloom 300ml"", ‚Ä¶ ""M√≥ng ch√¢n gi·∫£ NG·∫ÆN m√†u nude ML02 nh·∫°t nail box ch√¢n GI√Å R·∫∫ ƒë·∫πp ƒë∆°n gi·∫£n th√≠ch h·ª£p ƒëi ch∆°i t·∫∑ng keo d√°n m√≥ng MC053""]","[1.000265, 0.809663, ‚Ä¶ 0.660487]","{'Pink coco Son b√≥ng m√†u h·ªìng d·ªÖ th∆∞∆°ng B·ªÅ m·∫∑t g∆∞∆°ng d∆∞·ª°ng ·∫©m D∆∞·ª°ng ·∫©m thi√™n nhi√™n l√¢u tr√¥i Son m√¥i', 'M√≥ng ch√¢n gi·∫£ NG·∫ÆN m√†u nude ML02 nh·∫°t nail box ch√¢n GI√Å R·∫∫ ƒë·∫πp ƒë∆°n gi·∫£n th√≠ch h·ª£p ƒëi ch∆°i t·∫∑ng keo d√°n m√≥ng MC053'}",2
10158281345,"""Mascara Chu·ªët Mi JUDYDOLL 2g D‚Ä¶","[10158281345, 25364580727, ‚Ä¶ 27402150482]","[""Mascara Chu·ªët Mi JUDYDOLL 2g D√†y D√†i Si√™u Cong L√¢u Tr√¥i Ch·ªëng Th·∫•m N∆∞·ªõc Su·ªët 24h Curling Iron Mascara"", ""Th√πng 48 h·ªôp s·ªØa l√∫a m·∫°ch Nestl√© MILO x 110ml (12x4x110ml)"", ‚Ä¶ ""Kem D∆∞·ª°ng Da D∆∞·ª°ng ·∫®m, M·ªÅm M·ªãn Ch·ªëng N·ª©t N·∫ª Da Johnson's Baby 50g""]","[1.000034, 0.996156, ‚Ä¶ 0.526873]","[10158281345, 25364580727, ‚Ä¶ 17013806157]","[""Mascara Chu·ªët Mi JUDYDOLL 2g D√†y D√†i Si√™u Cong L√¢u Tr√¥i Ch·ªëng Th·∫•m N∆∞·ªõc Su·ªët 24h Curling Iron Mascara"", ""Th√πng 48 h·ªôp s·ªØa l√∫a m·∫°ch Nestl√© MILO x 110ml (12x4x110ml)"", ‚Ä¶ ""Gel R·ª≠a M·∫∑t SVR Sebiaclear Gel Moussant Hasaki S·∫£n Ph·∫©m Ch√≠nh H√£ng""]","[0.99954, 0.964407, ‚Ä¶ 0.649622]","{'Gel R·ª≠a M·∫∑t SVR Sebiaclear Gel Moussant Hasaki S·∫£n Ph·∫©m Ch√≠nh H√£ng', 'Combo ƒëi sinh CB03 ƒë·∫ßy ƒë·ªß ƒë·ªì d√πng thi·∫øt cho m·∫π v√† b√© ƒëi sinh-T·∫∑ng thau t·∫Øm b√©,g·ªëi ch·ªëng tr√†o', 'N∆∞·ªõc t·∫©y trang b√≠ ƒëao Cocoon t·∫©y s·∫°ch makeup cho da d·∫ßu m·ª•n| Cherry Beauty', 'S√°p Kh·ª≠ M√πi N·ªØ Dove Moisturizers 74gr M·ªπ - NgƒÉn Ti·∫øt M·ªì H√¥i 24h', ""Kem D∆∞·ª°ng Da D∆∞·ª°ng ·∫®m, M·ªÅm M·ªãn Ch·ªëng N·ª©t N·∫ª Da Johnson's Baby 50g"", 'Kem Ch·ªëng R√°t Da ƒê·∫ßu Khi L√†m H√≥a Ch·∫•t Ziemlich | Chenglovehair, Chenglovehairs'}",6
2486662254,"""B·ªôt l·∫Øc s·ªØa Bledina Ph√°p (Date‚Ä¶","[2486662254, 23466354851, ‚Ä¶ 24405812537]","[""B·ªôt l·∫Øc s·ªØa Bledina Ph√°p (Date 2025)"", ""M·∫∑t N·∫° S·ªßi B·ªçt Th·∫£i ƒê·ªôc Chi·∫øt Xu·∫•t B∆° S·∫°ch S√¢u L·ªó Ch√¢n L√¥ng, H·ªó Tr·ª£ Gi·∫£m H·∫øt M·ª•n, Nh·ªùn, M·ªÅm M·ªãn Da"", ‚Ä¶ ""C√¢y Ch√† G√≥t Ch√¢n T·∫©y Da Ch·∫øt 2 M·∫∑t - C√¢y M√†i G√≥t Ch√¢n T·∫©y T·∫ø B√†o Ch·∫øt T·∫°i Nh√†""]","[1.0003, 0.624504, ‚Ä¶ 0.563621]","[2486662254, 24672948554, ‚Ä¶ 25230361735]","[""B·ªôt l·∫Øc s·ªØa Bledina Ph√°p (Date 2025)"", ""g·∫°o s√©ng c√π tr√≤n ƒëi·ªán bi√™n d·∫ªo ngon ƒë·∫≠m v·ªã bao 5kg"", ‚Ä¶ ""Kem Face ƒê√™m X3 ƒê√¥ng Tr√πng H·∫° Th·∫£o 15g [H√†ng C√¥ng Ty] H·ªó Tr·ª£ D∆∞·ª°ng Tr·∫Øng Da, Gi·ªØ ·∫®m, Ng·ª´a L√£o H√≥a Da""]","[1.000473, 0.726839, ‚Ä¶ 0.710833]","{'H·ªôp 200 c√¢y tƒÉm b√¥ng t·ª± nhi√™n ngo√°y tai 2 ƒë·∫ßu an to√†n', 'M·∫∑t N·∫° S·ªßi B·ªçt Th·∫£i ƒê·ªôc Chi·∫øt Xu·∫•t B∆° S·∫°ch S√¢u L·ªó Ch√¢n L√¥ng, H·ªó Tr·ª£ Gi·∫£m H·∫øt M·ª•n, Nh·ªùn, M·ªÅm M·ªãn Da', 'Kem Face ƒê√™m X3 ƒê√¥ng Tr√πng H·∫° Th·∫£o 15g [H√†ng C√¥ng Ty] H·ªó Tr·ª£ D∆∞·ª°ng Tr·∫Øng Da, Gi·ªØ ·∫®m, Ng·ª´a L√£o H√≥a Da', 'g·∫°o s√©ng c√π tr√≤n ƒëi·ªán bi√™n d·∫ªo ngon ƒë·∫≠m v·ªã bao 5kg', 'C√¢y Ch√† G√≥t Ch√¢n T·∫©y Da Ch·∫øt 2 M·∫∑t - C√¢y M√†i G√≥t Ch√¢n T·∫©y T·∫ø B√†o Ch·∫øt T·∫°i Nh√†', 'Combo 2 Son Tint N∆∞·ªõc M·ªãn L√¨ Thu·∫ßn Chay SUGARPLUM MistyNow Blurring Tint (2 x 3.8g)'}",6
…,…,…,…,…,…,…,…,…,…
28105292815,"""N∆∞·ªõc Hoa H·ªìng Kh√¥ng M√πi D√†nh C‚Ä¶","[28105292815, 4650684395, ‚Ä¶ 27601130165]","[""N∆∞·ªõc Hoa H·ªìng Kh√¥ng M√πi D√†nh Cho Da Nh·∫°y C·∫£m Klairs Supple Preparation Unscented Toner 180ml"", ""T√£ d·∫°ng ch√¢n v√°y l√†m b·∫±ng cotton ch·ªëng th·∫•m n∆∞·ªõc d√†nh cho b√© thi·∫øt k·∫ø ch·∫•t l∆∞·ª£ng cao"", ‚Ä¶ ""Meikodeft 50 Chi·∫øc Tr·∫Øng Trong Su·ªët Series M√≥ng Tay Kim C∆∞∆°ng Gi·∫£ Flatback Pha L√™ ƒê√° Qu√Ω 3D L·∫•p L√°nh M√≥ng Tay Ngh·ªá Thu·∫≠t Trang Tr√≠ VN""]","[0.999682, 0.559834, ‚Ä¶ 0.54208]","[28105292815, 26106417599, ‚Ä¶ 9665792755]","[""N∆∞·ªõc Hoa H·ªìng Kh√¥ng M√πi D√†nh Cho Da Nh·∫°y C·∫£m Klairs Supple Preparation Unscented Toner 180ml"", ""[Handmade] 10 Chi·∫øc M√≥ng Tay Gi·∫£ Thi·∫øt K·∫ø M·∫Øt M√®o V·ªõi K√≠ch C·ª° Kh√°c Nhau, Thi·∫øt K·∫ø Tinh T·∫ø V√† D√†y H∆°n V·ªõi Keo Th·∫°ch"", ‚Ä¶ ""Son D∆∞·ª°ng Mediheal L√†m M·ªù Th√¢m M√¥i, M·ªÅm M√¥i, H·ªìng M√¥i Labocare Healbalm 10ml""]","[0.99943, 0.711178, ‚Ä¶ 0.68011]","{'Meikodeft 50 Chi·∫øc Tr·∫Øng Trong Su·ªët Series M√≥ng Tay Kim C∆∞∆°ng Gi·∫£ Flatback Pha L√™ ƒê√° Qu√Ω 3D L·∫•p L√°nh M√≥ng Tay Ngh·ªá Thu·∫≠t Trang Tr√≠ VN', 'Son D∆∞·ª°ng Mediheal L√†m M·ªù Th√¢m M√¥i, M·ªÅm M√¥i, H·ªìng M√¥i Labocare Healbalm 10ml', '[MUA 5 T·∫∑ng 2] Combo 7 b·ªãch C∆°m ch√°y si√™u ch√† b√¥ng si√™u ngon', ""Gel D∆∞·ª°ng C·∫•p N∆∞·ªõc I'm From Vitamin Tree Water Gel 75g"", '[Handmade] 10 Chi·∫øc M√≥ng Tay Gi·∫£ Thi·∫øt K·∫ø M·∫Øt M√®o V·ªõi K√≠ch C·ª° Kh√°c Nhau, Thi·∫øt K·∫ø Tinh T·∫ø V√† D√†y H∆°n V·ªõi Keo Th·∫°ch', 'T√£ d·∫°ng ch√¢n v√°y l√†m b·∫±ng cotton ch·ªëng th·∫•m n∆∞·ªõc d√†nh cho b√© thi·∫øt k·∫ø ch·∫•t l∆∞·ª£ng cao', '„ÄêH∆∞∆°ng Qu·∫ø„ÄëX√† B√¥ng Handmade 100% D·∫ßu D·ª´a T·ª± Nhi√™n L√†m ·∫§m v√† Gi·∫£m M·ª•n', 'B√îNG C·∫ÆT S·∫¥N 1200 MI·∫æNG Kh√¥ng X∆° D·ªát D√†nh Cho Phun XƒÉm'}",8
29066147556,"""anpn Thanh LƒÉn M·∫∑t N·∫° ƒê·∫•t S√©t ‚Ä¶","[29066147556, 26513412247, ‚Ä¶ 23284832121]","[""anpn Thanh LƒÉn M·∫∑t N·∫° ƒê·∫•t S√©t B√πn Kho√°ng Xanh V·ªõi Chi·∫øt Xu·∫•t B·ªôt Tr√† Xanh V√† Rau M√° Clay Stick Mask 40g"", ""[YJ022] S∆°n gel Xeijayi ch√≠nh h√£ng si√™u ƒë·∫∑c m√†u n√¢u r√™u m√£ YJ022 15ml"", ‚Ä¶ ""[H·ªòP 750g] H·∫°t dinh d∆∞·ª°ng cao c·∫•p B·ªï sung DHA + Ph√¥ Mai Kh√¥ GULU FOODS""]","[0.999359, 0.787585, ‚Ä¶ 0.761288]","[29066147556, 10398530262, ‚Ä¶ 24134278749]","[""anpn Thanh LƒÉn M·∫∑t N·∫° ƒê·∫•t S√©t B√πn Kho√°ng Xanh V·ªõi Chi·∫øt Xu·∫•t B·ªôt Tr√† Xanh V√† Rau M√° Clay Stick Mask 40g"", ""Socola t∆∞∆°i Melty Kiss Meiji N·ªôi ƒê·ªãa Nh·∫≠t"", ‚Ä¶ ""Combo 2 K·∫πp B·∫•m Mi Choice TR1-23740-3 Si√™u Cong Inox Cao C·∫•p B·∫•m √äm T·∫∑ng K√®m 1 ƒê·ªám K·∫πp""]","[1.000273, 0.772987, ‚Ä¶ 0.726087]","{'Combo 2 K·∫πp B·∫•m Mi Choice TR1-23740-3 Si√™u Cong Inox Cao C·∫•p B·∫•m √äm T·∫∑ng K√®m 1 ƒê·ªám K·∫πp', 'T√∫i Da ƒê·ª±ng M·ªπ Ph·∫©m M.O.I [QU√Ä T·∫∂NG]'}",2
23446356680,"""S∆°n gel Ng·ªçc Trai B281 s∆°n √°nh‚Ä¶","[23446356680, 25321902320, ‚Ä¶ 23224005652]","[""S∆°n gel Ng·ªçc Trai B281 s∆°n √°nh trai ch·∫•t ƒë·∫≠m ƒë·∫∑c, s∆°n m∆∞·ªõt l√™n m√†u c·ª±c chu·∫©n cho m√≥ng tay"", ""Cappuvini Heart Pink Lip Gloss, M√†u s·∫Øc s·ªëng ƒë·ªông, K·∫øt th√∫c m·ªçng n∆∞·ªõc & b√≥ng, Men d∆∞·ª°ng ·∫©m l√¢u tr√¥i M√†u s·∫Øc cao, Trang ƒëi·ªÉm trong su·ªët & t·ª± nhi√™n, Son b√≥ng m√¥i"", ‚Ä¶ ""B·∫•m m√≥ng tay ƒëa nƒÉng""]","[1.000353, 0.773589, ‚Ä¶ 0.701994]","[23446356680, 27359796718, ‚Ä¶ 23224005652]","[""S∆°n gel Ng·ªçc Trai B281 s∆°n √°nh trai ch·∫•t ƒë·∫≠m ƒë·∫∑c, s∆°n m∆∞·ªõt l√™n m√†u c·ª±c chu·∫©n cho m√≥ng tay"", ""Sticker 5D h√¨nh d√°n m√≥ng tay ho·∫° ti·∫øt ng√¥i sao 5 c√°nh ƒë√≠nh ƒë√° cao c·∫•p phong c√°ch Y2K ph·ª• ki·ªán trang tr√≠ m√≥ng tay nail"", ‚Ä¶ ""B·∫•m m√≥ng tay ƒëa nƒÉng""]","[0.999794, 0.895277, ‚Ä¶ 0.842562]",set(),0
19783958002,"""G·ªëi H∆°i VƒÉn Ph√≤ng ti·ªán d·ª•ng""","[19783958002, 19594661282, ‚Ä¶ 27502880405]","[""G·ªëi H∆°i VƒÉn Ph√≤ng ti·ªán d·ª•ng"", ""X∆∞∆°ng U·ªën T√≥c - D·ª§ng C·ª• H·ªó Tr·ª£ L√†m T√≥c"", ‚Ä¶ ""M√°y Tri·ªát L√¥ng Vƒ©nh Vi·ªÖn C·∫ßm Tay T·∫°i Nh√† Parsion""]","[1.000325, 0.591591, ‚Ä¶ 0.446051]","[19783958002, 19594661282, ‚Ä¶ 20248518085]","[""G·ªëi H∆°i VƒÉn Ph√≤ng ti·ªán d·ª•ng"", ""X∆∞∆°ng U·ªën T√≥c - D·ª§ng C·ª• H·ªó Tr·ª£ L√†m T√≥c"", ‚Ä¶ ""C·∫Øm c·ªç mica, ·ªëng c·∫Øm c·ªç, c·∫Øm b√∫t""]","[0.999705, 0.732126, ‚Ä¶ 0.589415]","{'C·∫Øm c·ªç mica, ·ªëng c·∫Øm c·ªç, c·∫Øm b√∫t', 'N∆∞·ªõc t·∫©y trang b√≠ ƒëao Cocoon t·∫©y s·∫°ch makeup cho da d·∫ßu m·ª•n| Cherry Beauty', 'M√°y Tri·ªát L√¥ng Vƒ©nh Vi·ªÖn C·∫ßm Tay T·∫°i Nh√† Parsion', 'B√†n ch·∫£i trang ƒëi·ªÉm B√†n ch·∫£i trang ƒëi·ªÉm c√° nh√¢n B√†n ch·∫£i che khuy·∫øt ƒëi·ªÉm B√†n ch·∫£i trang ƒëi·ªÉm b√∫t k·∫ª m·∫Øt B√†n ch·∫£i trang ƒëi·ªÉm', 'Meso d·∫°ng b√¥i gi√∫p da tr·∫Øng m·ªãn cƒÉng b√≥ng (10ml) - Lamer', 'Dao th√©p kh√¥ng g·ªâ g·ªçt c·∫Øt t·ªâa 2 ƒë·∫ßu h·ªó tr·ª£ cho epoxy putty k·∫øt h·ª£p pha tr·ªôn s∆°n - model trimming knife - D·ª•ng c·ª• m√¥ h√¨nh'}",6


In [19]:
for n in ["item_name", "db_bi_item_name", "db_bge_item_name", "dif"]:
    print("=" * 20)
    print(df_final.to_dicts()[3][n])

Mascara Chu·ªët Mi JUDYDOLL 2g D√†y D√†i Si√™u Cong L√¢u Tr√¥i Ch·ªëng Th·∫•m N∆∞·ªõc Su·ªët 24h Curling Iron Mascara
['Mascara Chu·ªët Mi JUDYDOLL 2g D√†y D√†i Si√™u Cong L√¢u Tr√¥i Ch·ªëng Th·∫•m N∆∞·ªõc Su·ªët 24h Curling Iron Mascara', 'Th√πng 48 h·ªôp s·ªØa l√∫a m·∫°ch Nestl√© MILO x 110ml (12x4x110ml)', 'Combo ƒëi sinh CB03 ƒë·∫ßy ƒë·ªß ƒë·ªì d√πng thi·∫øt cho m·∫π v√† b√© ƒëi sinh-T·∫∑ng thau t·∫Øm b√©,g·ªëi ch·ªëng tr√†o', 'Kem Ch·ªëng R√°t Da ƒê·∫ßu Khi L√†m H√≥a Ch·∫•t Ziemlich | Chenglovehair, Chenglovehairs', "Kem D∆∞·ª°ng Da D∆∞·ª°ng ·∫®m, M·ªÅm M·ªãn Ch·ªëng N·ª©t N·∫ª Da Johnson's Baby 50g"]
['Mascara Chu·ªët Mi JUDYDOLL 2g D√†y D√†i Si√™u Cong L√¢u Tr√¥i Ch·ªëng Th·∫•m N∆∞·ªõc Su·ªët 24h Curling Iron Mascara', 'Th√πng 48 h·ªôp s·ªØa l√∫a m·∫°ch Nestl√© MILO x 110ml (12x4x110ml)', 'N∆∞·ªõc t·∫©y trang b√≠ ƒëao Cocoon t·∫©y s·∫°ch makeup cho da d·∫ßu m·ª•n| Cherry Beauty', 'S√°p Kh·ª≠ M√πi N·ªØ Dove Moisturizers 74gr M·ªπ - NgƒÉn Ti·∫øt M·ªì H√¥i 24h', 'Gel R·ª≠a M·∫∑t SV

In [10]:
df_final["db_bi_item_name"][0]

"""M·∫Øt M√®o C·∫ßu V·ªìng Huaxi 7 M√†u T‚Ä¶"
"""N·∫•m Ngo√†i Da Cho G√† ƒê√° , Ch√≥ ,‚Ä¶"
"""Son d∆∞·ª°ng m√¥i m·ª° h∆∞∆°u Astrid T‚Ä¶"
"""Th·ª©c ƒÇn S·∫•y Th√∫ C∆∞ng Mix Nhi·ªÅu‚Ä¶"
"""N∆∞·ªõc Hoa N·ªØ Delina Exclusif Ch‚Ä¶"
