In [1]:
import sys
import numpy as np
sys.path.append('..')
from modules.normalize_text import normalize_text, remove_special_characters, homogenize_units, extract_units
from modules.distance_metrics import levenshtein_and_dice_ratio, jaccard_distance_units
from modules.sku_matcher import get_confidence

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
# Performance

In [6]:
def get_performance(s1, s2):
    # Cleaning
    a1 = normalize_text(s1)
    a2 = normalize_text(s2)
    print(a1, '\n', a2, sep = '')
    print()

    # Confidence using only LevDice
    lvd_conf = round(1 - levenshtein_and_dice_ratio(a1, a2), 2)
    jac_conf = round(1 - jaccard_distance_units(a1, a2), 2)
    comb_conf = get_confidence(a1, a2)
    print(f'lvd_conf: {lvd_conf}, jac_conf: {jac_conf}, comb_conf: {comb_conf}')

    # Extract units
    b1 = extract_units(a1)
    b2 = extract_units(a2)
    display(b1, b2)
    print()

In [7]:
## Extract units
s1 = 'Agua Natural Nestle Pureza Vital botella 1 L 12 PIEZAS'
s2 = "NESTLE PV 1L 4 pzs Modelo"
get_performance(s1, s2)

npvnpvnpvnpv1lt 12pz
npvnpvnpvnpv1lt 4pz

lvd_conf: 1.0, jac_conf: 0.33, comb_conf: 0.88


['1lt', '12pz']

['1lt', '4pz']




In [9]:
## Extract units
s1 = 'Agua Natural Nestle Pureza Vital botella 1 L 12 PIEZAS'
s2 = "NESTLE PV 12x1000 ML Modelo"
get_performance(s1, s2)

npvnpvnpvnpv1lt 12pz
npvnpvnpvnpv12pz 1lt

lvd_conf: 1.0, jac_conf: 1.0, comb_conf: 1.0


['1lt', '12pz']

['12pz', '1lt']




In [10]:
## Extract units
s1 = 'boing fresa 500ml 24pz'
s2 = "jugo boing surtidos 500ml - 24pz"
get_performance(s1, s2)


boingboingboing fresa 500ml 24pz
boingboingboing surtido 500ml 24pz

lvd_conf: 0.77, jac_conf: 1.0, comb_conf: 0.74


['500ml', '24pz']

['500ml', '24pz']




In [11]:
## Extract units
s1 = 'pedigree re 100g 40 pz pouche'
s2 = "pedigree rp pouche res 100 gr 12 pz"
get_performance(s1, s2)

pedigree re 100g 40pz pch
pedigree rp pch re 100g 12pz

lvd_conf: 0.92, jac_conf: 0.33, comb_conf: 0.78


['100g', '40pz']

['100g', '12pz']




In [12]:
## Extract units
s1 = 'Brandy Domecq Don Pedro 200 ml Presentación'
s2 = "Brandy DON PEDRO 200ml"
get_performance(s1, s2)

donpedro 200ml
donpedro 200ml

lvd_conf: 1.0, jac_conf: 1.0, comb_conf: 1.0


['200ml']

['200ml']




In [13]:
## Extract units
s1 = 'Frijol Negro Queretaro Verde Valle QUERETANO 900 g Presentación: Paquete - 20 artículo(s).'
s2 = "FRIJOL NEGRO VERDE VALLE 900G - 20 PZS"
get_performance(s1, s2)

frijol black verdevalle 900g 20pz
frijol black verdevalle 900g 20pz

lvd_conf: 1.0, jac_conf: 1.0, comb_conf: 1.0


['900g', '20pz']

['900g', '20pz']




In [14]:
## Extract units
s1 = 'Caja bebida energetica Vive 100 300M/24P 355 ml en 24 piezas'
s2 = "VIVE 100 botella original 300ML 24 PIEZAS PACK"
get_performance(s1, s2)

vive100 300ml 24pz
vive100 300ml 24pz

lvd_conf: 1.0, jac_conf: 1.0, comb_conf: 1.0


['300ml', '24pz']

['300ml', '24pz']




In [15]:
## Extract units
s1 = 'Destilado de agave Rancho Escondido 750ml 1 pieza'
s2 = "LICOR RANCHO ESCONDIDO anejado 750ml"
get_performance(s1, s2)

ranchoescondido 700ml
ranchoescondido anejado 700ml

lvd_conf: 0.8, jac_conf: 1.0, comb_conf: 0.78


['700ml']

['700ml']




In [16]:
## Extract units
s1 = 'Bebida Energizante Red Bull Sugar Free 250 Ml'
s2 = "RED BULL SUGAR FREE 250 ML - 4 PACK"
get_performance(s1, s2)

red bull sugar free 200ml
red bull sugar free 200ml 4pz

lvd_conf: 1.0, jac_conf: 0.5, comb_conf: 0.91


['200ml']

['200ml', '4pz']




In [17]:
s1 = 'Paquete agua Santa Maria 1L/12P 1 litro con 12 piezas'
s2 = "SANTA MARIA 1.5L - 12PZS"
get_performance(s1, s2)

santa maria 1lt 12pz
santa maria 1.5lt 12pz

lvd_conf: 0.92, jac_conf: 0.33, comb_conf: 0.78


['1lt', '12pz']

['1.5lt', '12pz']




In [18]:
## Extract units
s1 = 'Bebida Caribe Cooler Tinto 300 ml Presentación: Caja 12 Artículo(s)'
s2 = "CARIBE COOLER TINTO 300 ML - 12 PZ"
get_performance(s1, s2)

caribecoolercaribecoolercaribecoolertinto 300ml 12pz
caribecoolercaribecoolercaribecoolertinto 300ml 12pz

lvd_conf: 1.0, jac_conf: 1.0, comb_conf: 1.0


['300ml', '12pz']

['300ml', '12pz']




In [19]:
## Extract units
s1 = 'suero electrolit mora azul 625ml presentacion 12 ártículos'
s2 = "electrolit mora azul 625ml 12pz"
get_performance(s1, s2)

electrolit mora azul 600ml 12pz
electrolit mora azul 600ml 12pz

lvd_conf: 1.0, jac_conf: 1.0, comb_conf: 1.0


['600ml', '12pz']

['600ml', '12pz']




In [20]:
## Extract units
s1 = '&apos;Cuétara'
s2 = "Electrolit Fresa Kiwi 625 ml Caja con 12"
get_performance(s1, s2)

cuetara
electrolit fresa kiwi 600ml 12pz

lvd_conf: 0.31, jac_conf: 0.0, comb_conf: 0.0


[]

['600ml', '12pz']




In [21]:
## Extract units
s1 = 'VELADORA ROSITA 12  - PZS'
s2 = "Vel Rosita con 12 botellas de 1"
get_performance(s1, s2)

veladora rosita 12pz
velrositavelrositavelrosita12pz 1

lvd_conf: 0.57, jac_conf: 1.0, comb_conf: 0.47


['12pz']

['12pz']




In [23]:
## Extract units
s1 = 'LALA LECHE LIGHT 1L - 12 PZS'
s2 = "leche Lala deslactosada Light 12/1L"
get_performance(s1, s2)

lalalalalalalalaleche light 1lt 12pz
leche lalalalalalalaladelac light 12pz 1lt

lvd_conf: 0.9, jac_conf: 1.0, comb_conf: 0.9


['1lt', '12pz']

['12pz', '1lt']




In [24]:
s1 = 'CHIVAS REGAL WHISKY 12 ANOS 750ML-1PZ'
s2 = 'Caja Whisky Chivas Regal 12 Anos 12P/750M'
get_performance(s1, s2)

chiva regal whisky 12ano 700ml
whisky chiva regal 12ano 12pz 700ml

lvd_conf: 1.0, jac_conf: 0.5, comb_conf: 0.91


['700ml']

['12pz', '700ml']




In [25]:
## Extract units
s1 = 'marlboro red 20pz 10pz'
s2 = "Walker Etiqueta Roja Litro 1000ml"
get_performance(s1, s2)

marlboro red 20pz 10pz
walker red 1lt

lvd_conf: 0.3, jac_conf: 0.0, comb_conf: 0.0


['20pz', '10pz']

['1lt']




In [26]:
## Extract units
s1 = 'NEW MIX VAMPIRO LATON 473ML - 24 PZS'
s2 = 'Caja bebida Red Mix Vampiro 24P/473M'
get_performance(s1, s2)

new mix vampiro 400ml 24pz
red mix vampiro 24pz 400ml

lvd_conf: 0.87, jac_conf: 1.0, comb_conf: 0.86


['400ml', '24pz']

['24pz', '400ml']




In [27]:
## Extract units
s1 = 'CARIBE COOLER FRESA 300ML - 12 PZS'
s2 = 'Caja bebida Caribe Cooler tinto 300M/12P'
get_performance(s1, s2)

caribecoolercaribecoolercaribecoolerfresa 300ml 12pz
caribecoolercaribecoolercaribecoolertinto 300ml 12pz

lvd_conf: 0.88, jac_conf: 1.0, comb_conf: 0.87


['300ml', '12pz']

['300ml', '12pz']




In [29]:
## Extract units
s1 = 'VODKA ABSOLUT 750 ML - 1 PZ'
s2 = 'Vodka Absolut Raspberri 750M - ZK'
get_performance(s1, s2)

vodka absolut 700ml
vodka absolut raspberri 700ml

lvd_conf: 0.73, jac_conf: 1.0, comb_conf: 0.69


['700ml']

['700ml']




In [30]:
'rrre'*2

'rrrerrre'

In [31]:
## Extract units
s1 = 'SMIRNOFF VODKA 750 ML - 1PZ'
s2 = 'Caja Vodka Smirnoff 12P/750M'
get_performance(s1, s2)

smirnoff vodka 700ml
vodka smirnoff 12pz 700ml

lvd_conf: 1.0, jac_conf: 0.5, comb_conf: 0.91


['700ml']

['12pz', '700ml']




In [32]:
## Extract units
s1 = 'SKYY BLUE 275 ML - 24 PZS'
s2 = 'Caja bebida Vodka skyy blue 275M/24P'
get_performance(s1, s2)

skyyskyyskyyskyyblue 200ml 24pz
skyyskyyskyyskyyblue 200ml 24pz

lvd_conf: 1.0, jac_conf: 1.0, comb_conf: 1.0


['200ml', '24pz']

['200ml', '24pz']




In [35]:
## Extract units
s1 = 'SMIRNOFF VODKA ETIQUETA ROJA 1L - 1PZ'
s2 = 'Caja Vodka Smirnoff 12P/1L'
get_performance(s1, s2)

smirnoffsmirnoffsmirnoffvodka red 1lt
vodka smirnoffsmirnoffsmirnoff12pz 1lt

lvd_conf: 0.94, jac_conf: 0.5, comb_conf: 0.84


['1lt']

['12pz', '1lt']




In [36]:
## Extract units
s1 = 'SKYY APPLETINI 275 ML - 24 PZS'
s2 = 'Caja bebida Vodka skyy blue 275M/24P'
get_performance(s1, s2)

skyyskyyskyyskyyappletini 200ml 24pz
skyyskyyskyyskyyblue 200ml 24pz

lvd_conf: 0.8, jac_conf: 1.0, comb_conf: 0.77


['200ml', '24pz']

['200ml', '24pz']




In [37]:
## Extract units
s1 = 'VODKA ABSOLUT 750 ML - 1 PZ'
s2 = 'Vodka Absolut Azul 750ml'
get_performance(s1, s2)

vodka absolut 700ml
vodka absolut azul 700ml

lvd_conf: 0.85, jac_conf: 1.0, comb_conf: 0.84


['700ml']

['700ml']




In [40]:
## Extract units
s1 = 'TEQUILA JOSE CUERVO ESPECIAL 695ML - 1PZ'
s2 = 'Tequila Cuervo Tradicional Reposado 695 ml'
get_performance(s1, s2)

teq cuervocuervocuervocuervoesp 600ml
teq cuervocuervocuervocuervotrad rep 600ml

lvd_conf: 0.9, jac_conf: 1.0, comb_conf: 0.89


['600ml']

['600ml']




In [41]:
## Extract units
s1 = 'Galletas de animalits'
s2 = 'Whisky Johnnie Walker Double Black Label 750 ml'
get_performance(s1, s2)

galleta animalits
whisky johnnie walker double black 700ml

lvd_conf: 0.29, jac_conf: 0.0, comb_conf: 0.0


[]

['700ml']




In [48]:
## Extract units
s1 = 'JOSE CUERVO TEQ 1800 CRISTAL 700ML-1PZ'
s2 = 'Tequila 1800 Anejo Cristalino 100% 700ml'
get_performance(s1, s2)

cuervocuervocuervocuervo1800 cristal 700ml
cuervocuervocuervocuervo1800 anejo cristal 100 700ml

lvd_conf: 0.88, jac_conf: 1.0, comb_conf: 0.88


['700ml']

['700ml']




In [140]:
## Extract units
s1 = 'SKYY APPLETINI 275 ML - 24 PZS'
s2 = 'Skyy Appletini 275ml (6pz)'
get_performance(s1, s2)

skyyskyyskyyskyyappletini 200ml 24pz
skyyskyyskyyskyyappletini 200ml 6pz

lvd_conf: 1.0, jac_conf: 0.33, comb_conf: 0.88


['200ml', '24pz']

['200ml', '6pz']




In [141]:
## Extract units
s1 = 'SAUZA HACIENDA TEQUILA REPOSADO 1Lt-1PZ'
s2 = 'Tequila Sauza Hacienda Rep 700ml-1pz'
get_performance(s1, s2)

sauzahaciendasauzahaciendasauzahacienda teq rep 1lt
teq sauzahaciendasauzahaciendasauzahacienda rep 700ml

lvd_conf: 1.0, jac_conf: 0.0, comb_conf: 0.81


['1lt']

['700ml']




In [144]:
## Extract units
s1 = 'ELECTROLIT pina 625ML - 12PZS'
s2 = 'Electrolit Pina 625 ml Caja con 12 Electrolit'
get_performance(s1, s2)

electrolit pina 600ml 12pz
electrolit pina 600ml 12pz

lvd_conf: 1.0, jac_conf: 1.0, comb_conf: 1.0


['600ml', '12pz']

['600ml', '12pz']




In [155]:
## Extract units
s1 = 'SALSA LOLTUN HABANERO ROJO 150G - 24 PZS'
s2 = 'SALSA LOL-TUN HABANERA 24/150 G *PROMO'
get_performance(s1, s2)

'salsa loltun habanero red 100g 24pz'

'salsa loltun habanera 24pz 100g'




0.91

1.0

0.91




['100g', '24pz']

['24pz', '100g']




In [51]:
## Extract units
s1 = 'JUGO JUMEX MANGO 40 PZS 250ml'
s2 = 'Jumex Mango 24/250 ml Jumex'
get_performance(s1, s2)

jumexjumexjumexjumexmango 40pz 200ml
jumexjumexjumexjumexmango 24pz 200ml

lvd_conf: 1.0, jac_conf: 0.33, comb_conf: 0.88


['40pz', '200ml']

['24pz', '200ml']




In [52]:
## Extract units
s1 = 'CROQUETAS PODER CANINO ADULTO 25KG 1 PZA'
s2 = 'Poder Canino Adulto 25 Kg'
get_performance(s1, s2)

croqueta poder canino adulto 25kg
poder canino adulto 25kg

lvd_conf: 0.82, jac_conf: 1.0, comb_conf: 0.8


['25kg']

['25kg']




In [53]:
## Extract units
s1 = 'ATUN DOLORES EN AGUA 133 GR - 6 PZS'
s2 = 'Atun Dolores en Agua 133gr. Desde 6 unidades'
get_performance(s1, s2)

atun dolore agua 100g 6pz
atun dolore agua 100g 6pz

lvd_conf: 1.0, jac_conf: 1.0, comb_conf: 1.0


['100g', '6pz']

['100g', '6pz']




In [54]:
## Extract units
s1 = 'LALA LECHE ENTERA 1L - 12 PZS'
s2 = 'Leche Entera Lala 1L Desde 12 UNID'
get_performance(s1, s2)

lalalalalalalalaleche entera 1lt 12pz
leche entera lalalalalalalala1lt 12pz

lvd_conf: 1.0, jac_conf: 1.0, comb_conf: 1.0


['1lt', '12pz']

['1lt', '12pz']




In [55]:
## Extract units
s1 = 'LALA LECHE ENTERA 1L - 12 PZS'
s2 = 'Leche Entera Lala 1L Desde 12 UNID'
get_performance(s1, s2)

lalalalalalalalaleche entera 1lt 12pz
leche entera lalalalalalalala1lt 12pz

lvd_conf: 1.0, jac_conf: 1.0, comb_conf: 1.0


['1lt', '12pz']

['1lt', '12pz']




In [56]:
## Extract units
s1 = 'AZUCAR ZULKA 1KG - 10 PZS'
s2 = 'Azucar mor estandar Zulka 1kg Desde 10 unidades'
get_performance(s1, s2)

azucar zulka 1kg 10pz
azucar mor esd zulka 1kg 10pz

lvd_conf: 0.75, jac_conf: 1.0, comb_conf: 0.71


['1kg', '10pz']

['1kg', '10pz']




In [57]:
## Extract units
s1 = 'SOPA MARUCHAN CAMARON Y PIQUIN - 12 PZS'
s2 = 'Maruchan - Camaron y Chile Piquin 64 g - caja con 12 unidades - Maruchan Mexico'
get_performance(s1, s2)

sopa maruchan 64g camaron piquin 12pz
sopa maruchan 64g camaron piquin 12pz

lvd_conf: 1.0, jac_conf: 1.0, comb_conf: 1.0


['64g', '12pz']

['64g', '12pz']




In [58]:
## Extract units
s1 = 'SOPA MARUCHAN RES - 12 PZS'
s2 = 'Sopa Instantanea Sabor Carne de Res 64 g - caja con 12 unidades - Maruchan Mexico'
get_performance(s1, s2)

sopa maruchan 64g re 12pz
sopa carne re 64g 12pz maruchan

lvd_conf: 0.85, jac_conf: 1.0, comb_conf: 0.84


['64g', '12pz']

['64g', '12pz']




In [59]:
## Extract units
s1 = 'ATUN DOLORES EN AGUA 133 GR - 6 PZS'
s2 = 'Atun Dolores en Agua de 133 g - 24 piezas - Dolores'
get_performance(s1, s2)

atun dolore agua 100g 6pz
atun dolore agua 100g 24pz

lvd_conf: 1.0, jac_conf: 0.33, comb_conf: 0.88


['100g', '6pz']

['100g', '24pz']




In [60]:
## Extract units
s1 = 'SAUZA HORNITOS TEQUILA REPOSADO 1L-1PZ'
s2 = 'Tequila Sauza Hornitos Reposado 1 L Presentaci√≥n: Botella - 1 art√≠culo(s).'
get_performance(s1, s2)

sauza hornito teq rep 1lt
teq sauza hornito rep 1lt presentacin 1 art=culo

lvd_conf: 0.67, jac_conf: 1.0, comb_conf: 0.6


['1lt']

['1lt']




In [61]:
## Extract units
s1 = 'LICOR DE CANA EL MEZCALITO 440 ML 24 PIEZA'
s2 = 'Licor de Agave El Mezcalito 440 ml Presentacion: Caja - 24 articulo(s).'
get_performance(s1, s2)

cana mezcalitomezcalitomezcalito400ml 24pz
mezcalitomezcalitomezcalito400ml 24pz

lvd_conf: 0.92, jac_conf: 1.0, comb_conf: 0.91


['400ml', '24pz']

['400ml', '24pz']




In [62]:
## Extract units
s1 = 'GRAN CENTENARIO TEQUILA PLATA 700ML-1PZ'
s2 = 'Pack de 2 Tequila Gran Centenario Plata 700 ml Gran Centenario Plata'
get_performance(s1, s2)

grancentenariograncentenariograncentenarioteq plata 700ml
2pz teq grancentenariograncentenariograncentenarioplata 700ml

lvd_conf: 1.0, jac_conf: 0.5, comb_conf: 0.91


['700ml']

['2pz', '700ml']




In [63]:
## Extract units
s1 = 'JACK DANIELS WHISKY 700ML-1PZ'
s2 = 'Pack de 4 Whisky Jack Daniels 700 ml Jack Daniels 700 ml'
get_performance(s1, s2)

jack daniels whisky 700ml
4pz whisky jack daniels 700ml

lvd_conf: 1.0, jac_conf: 0.5, comb_conf: 0.91


['700ml']

['4pz', '700ml']




In [64]:
## Extract units
s1 = 'CHIVAS REGAL WHISKY 12 ANOS 750ML-1PZ'
s2 = 'Whisky Chivas Regal 12 anos 750ml CHIVAS REGAL Botella 750ml'
get_performance(s1, s2)

chiva regal whisky 12ano 700ml
whisky chiva regal 12ano 700ml

lvd_conf: 1.0, jac_conf: 1.0, comb_conf: 1.0


['700ml']

['700ml']




In [65]:
## Extract units
s1 = 'PRESIDENTE CLASICO BRANDY 900ML - 1PZA'
s2 = 'Paquete de 3 Brandy Presidente Clasico 900 ml Presidente Paquete de 3'
get_performance(s1, s2)

presidente clasico bry 900ml
3pz bry presidente clasico 900ml

lvd_conf: 1.0, jac_conf: 0.5, comb_conf: 0.91


['900ml']

['3pz', '900ml']




In [66]:
## Extract units
s1 = 'JOSE CUERVO TEQUILA TRAD 950 ML - 1 PZ'
s2 = 'Paquete de 3 Tequila Jose Cuervo Tradicional 950 ml Jose Cuervo Tradicional'
get_performance(s1, s2)

cuervocuervocuervocuervoteq trad 900ml
3pz teq cuervocuervocuervocuervotrad 900ml

lvd_conf: 1.0, jac_conf: 0.5, comb_conf: 0.91


['900ml']

['3pz', '900ml']




In [67]:
## Extract units
s1 = 'DON JULIO TEQUILA REPOSADO 700 ML - 1PZ'
s2 = 'Tequila Don Julio Reposado Edicion Especial 700 ml Don Julio Reposado Edicion Especial'
get_performance(s1, s2)

donjuliodonjuliodonjulioteq rep 700ml
teq donjuliodonjuliodonjuliorep ed esp 700ml

lvd_conf: 0.9, jac_conf: 1.0, comb_conf: 0.9


['700ml']

['700ml']




In [68]:
## Extract units
s1 = 'CERVEZA ALLENDE GOLDEN 355 ML - 24 PZS'
s2 = 'Heineken Cerveza Botella 355 Ml 24 Pieza(s)'
get_performance(s1, s2)

cerveza allende gold 300ml 24pz
heineken cerveza 300ml 24pz

lvd_conf: 0.46, jac_conf: 1.0, comb_conf: 0.3


['300ml', '24pz']

['300ml', '24pz']




In [69]:
## Extract units
s1 = 'JACK DANIELS GINGER 355ML - 24PZS'
s2 = 'Jack Daniels Ginger 350 ml 24 Pieza(s)'
get_performance(s1, s2)

jack daniels ginger 300ml 24pz
jack daniels ginger 300ml 24pz

lvd_conf: 1.0, jac_conf: 1.0, comb_conf: 1.0


['300ml', '24pz']

['300ml', '24pz']




In [70]:
## Extract units
s1 = 'VINA REAL DURAZNO PET 2L - 6 PZS'
s2 = 'Bebida Alcoholica Vina Real Durazno Botella 2 Litros 6 Pieza(s)'
get_performance(s1, s2)

vina real durazno 2lt 6pz
vina real durazno 2lt 6pz

lvd_conf: 1.0, jac_conf: 1.0, comb_conf: 1.0


['2lt', '6pz']

['2lt', '6pz']




In [71]:
## Extract units
s1 = 'DON PEDRO RESERVA ESPECIAL 200ML - 1PZ'
s2 = 'Brandy Don Pedro Gran Reserva 200 Mililitros Pieza'
get_performance(s1, s2)

donpedro reserva esp 200ml
donpedro gran reserva 200ml

lvd_conf: 0.84, jac_conf: 1.0, comb_conf: 0.82


['200ml']

['200ml']




In [72]:
## Extract units
s1 = 'CABRITO TEQ REPOSADO 950 +250 ML - 1 PZ'
s2 = 'Tequila Cabrito Reposado 950 Mililitros Pieza'
get_performance(s1, s2)

cabritocabritocabritoteq rep 900ml 200ml
teq cabritocabritocabritorep 900ml

lvd_conf: 1.0, jac_conf: 0.5, comb_conf: 0.91


['900ml', '200ml']

['900ml']




In [73]:
## Extract units
s1 = 'MARLBORO ROJO 14 UND - 10 PZS'
s2 = 'Marlboro Cigarro Rojo 10 Cajetillas Con 14 pz Pieza'
get_performance(s1, s2)

marlboro red 14pz 10pz
marlboro red 10pz 14pz

lvd_conf: 1.0, jac_conf: 1.0, comb_conf: 1.0


['14pz', '10pz']

['10pz', '14pz']




In [74]:
## Extract units
s1 = 'MARLBORO ROJO 20UND - 10PZS'
s2 = 'Marlboro Cigarro Rojo Paquete Con 10 Cajetillas de 20 Pz Pieza'
get_performance(s1, s2)

marlboro red 20pz 10pz
marlboro red 10pz 20pz

lvd_conf: 1.0, jac_conf: 1.0, comb_conf: 1.0


['20pz', '10pz']

['10pz', '20pz']




In [75]:
## Extract units
s1 = 'JOHNNIE WALKER RED LABEL 200 ML - 1 PZA'
s2 = 'WHISKY JOHNNIE WALKER RED POCKET 200 ML / BOTELLA'
get_performance(s1, s2)

johnnie walker red 200ml
whisky johnnie walker red 200ml

lvd_conf: 0.85, jac_conf: 1.0, comb_conf: 0.83


['200ml']

['200ml']




In [76]:
## Extract units
s1 = 'WHISKY BLACK & WHITE 700ML - 1 PZ'
s2 = 'WHISKY BLACK AND WHITE 700 ML / BOTELLA'
get_performance(s1, s2)

whisky black & white 700ml
whisky black & white 700ml

lvd_conf: 1.0, jac_conf: 1.0, comb_conf: 1.0


['700ml']

['700ml']




In [77]:
## Extract units
s1 = 'NUTRILECHE PRODUCTO LACTEO 1L -12PZS'
s2 = 'lECHE NUTRILECHE 1 LT / TETRA PACK 12 piezas'
get_performance(s1, s2)

nutrileche lch 1lt 12pz
leche nutrileche 1lt 12pz

lvd_conf: 0.94, jac_conf: 1.0, comb_conf: 0.93


['1lt', '12pz']

['1lt', '12pz']




In [78]:
## Extract units
s1 = 'MARLBORO GOLD 20UND - 10PZS'
s2 = 'CIGARROS MARLBORO GOLDEN FRESH 20 CIG. 10PACK / DISPLAY'
get_performance(s1, s2)

marlboro gold 20pz 10pz
marlboro gold fsh 20pz 10pz

lvd_conf: 0.87, jac_conf: 1.0, comb_conf: 0.86


['20pz', '10pz']

['20pz', '10pz']




In [79]:
## Extract units
s1 = 'ELECTROLIT fresa 625ML - 12PZ'
s2 = 'electrolit fresa 625 ml 12pz'
get_performance(s1, s2)

electrolit fresa 600ml 12pz
electrolit fresa 600ml 12pz

lvd_conf: 1.0, jac_conf: 1.0, comb_conf: 1.0


['600ml', '12pz']

['600ml', '12pz']




In [80]:
## Extract units
s1 = 'MARLBORO ROJO 20UND - 10PZS'
s2 = "CIGARROS MARLBORO ROJO 100'S 20 CIG. 10PACK / DISPLAY"
get_performance(s1, s2)

marlboro red 20pz 10pz
marlboro red 100s 20pz 10pz

lvd_conf: 0.83, jac_conf: 1.0, comb_conf: 0.82


['20pz', '10pz']

['20pz', '10pz']




In [81]:
s1 = 'AZTECA DE ORO 700 ML 1 PZA'
s2 = "bry AZTECA DE ORO 700 ML"
get_performance(s1, s2)

aztecaoro 700ml
aztecaoro 700ml

lvd_conf: 1.0, jac_conf: 1.0, comb_conf: 1.0


['700ml']

['700ml']




In [82]:
s1 = 'WILLIAM LAWSONS WHISKY 700 ML - 1PZ'
s2 = "WHISKY WILLIAMS LAWSONS .700"
get_performance(s1, s2)

william lawsons whisky 700ml
whisky williams lawsons 700ml

lvd_conf: 0.98, jac_conf: 1.0, comb_conf: 0.98


['700ml']

['700ml']




In [83]:
s1 = 'JOSE CUERVO TEQUILA TRAD 950 ML - 1 PZ'
s2 = "TEQUILA REP.100% CUERVO TRADICIONAL .950"
get_performance(s1, s2)

cuervocuervocuervocuervoteq trad 900ml
teq rep100 cuervocuervocuervocuervotrad 900ml

lvd_conf: 0.9, jac_conf: 1.0, comb_conf: 0.89


['900ml']

['900ml']




In [84]:
s1 = 'WHISKY PASSPORT 700ML - 1 PZ'
s2 = "Whisky Passport Scotch - 700 ml"
get_performance(s1, s2)

whisky passport 700ml
whisky passport scotch 700ml

lvd_conf: 0.82, jac_conf: 1.0, comb_conf: 0.8


['700ml']

['700ml']




In [85]:
s1 = 'JOHNNE WALKER WHISKY E NEGRA 750ML - 1PZ'
s2 = "Whisky Johnnie Walker Black Label - 750 ml"
get_performance(s1, s2)

johnne walker whisky black 700ml
whisky johnnie walker black 700ml

lvd_conf: 0.98, jac_conf: 1.0, comb_conf: 0.98


['700ml']

['700ml']




In [86]:
s1 = 'AZTECA DE ORO 700 ML 1 PZA'
s2 = "Brandy Azteca De Oro 700 Ml - Azteca De Oro - 1 pieza"
get_performance(s1, s2)

aztecaoro 700ml
aztecaoro 700ml

lvd_conf: 1.0, jac_conf: 1.0, comb_conf: 1.0


['700ml']

['700ml']




In [87]:
s1 = 'SAUZA HACIENDA TEQUILA REPOSADO 1L-1PZ'
s2 = "Tequila Sauza Hacienda Azul 1 Lt - Sauza - 1 pieza"
get_performance(s1, s2)

sauzahaciendasauzahaciendasauzahacienda teq rep 1lt
teq sauzahaciendasauzahaciendasauzahacienda azul 1lt sauza

lvd_conf: 0.87, jac_conf: 1.0, comb_conf: 0.86


['1lt']

['1lt']




In [88]:
s1 = 'COCA COLA 355ML - 12PZS'
s2 = "Coca Cola sin azucar Paquete 12 unidades 355ML Paquete por 12 unidades"
get_performance(s1, s2)

coca cola 300ml 12pz
coca cola azucar 12pz 300ml

lvd_conf: 0.72, jac_conf: 1.0, comb_conf: 0.68


['300ml', '12pz']

['12pz', '300ml']




In [225]:
s1 = 'PRESIDENTE CUBA LATA 355 ML - 12 PZS'
s2 = "Sprite - Paquete 12 unidades 355ML Paquete por 12 unidades"
get_performance(s1, s2)

'presidente cuba 300ml 12pz'

'sprite 12pz 300ml'




0.54

1.0

0.48




['300ml', '12pz']

['12pz', '300ml']




In [311]:
re.sub(r'\d+[ml|lt|pz|g|oz|kg]+(\s|$)', '', 'presidente cuba 355ml 12pz 1pz').strip()

'presidente cuba'