In [12]:
import pandas as pd
import numpy as np
import time
import logging
from fuzzywuzzy import fuzz,process
 
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [53]:
shopeeList = pd.read_csv('shp_data.csv')

mappingList = pd.read_csv('shop_mapping.csv').iloc[:,0:2].astype(int)

lazadaList = pd.read_csv('lzd_data.csv')
lazadaList = lazadaList.merge(mappingList,how='left',on='LZD Shop ID')

shopIdList = lazadaList['SHP Shop ID'].dropna().drop_duplicates()
lzdDict = {shopId: lazadaList[lazadaList['SHP Shop ID'] == shopId] for shopId in shopIdList}
shpDict = {shopId: shopeeList[shopeeList['SHP Shop ID'] == shopId] for shopId in shopIdList}

correctMatch = pd.read_csv('correct_match.csv')
correctMatch = correctMatch[['LZD Item ID','SHP Item Name']]
correctMatch.columns = ['LZD Item ID','Correct Match - SHP Item Name']

In [55]:
#######################
### NEW MATCH LOGIC ###
#######################

def newFuzzMatcher(lzdInfo,shpInfoDf):
  mainArr = []
  lzdShopId=lzdInfo[0];lzdItemId=lzdInfo[1];lzdItemName=lzdInfo[2];lzdItemDesc=lzdInfo[3];shpShopId=lzdInfo[4]
  shpInfo = shpInfoDf.values.tolist()

  # print('to match: '+lzdItemName)
  for i in range(len(shpInfo)):
    shpItemId=shpInfo[i][1];shpItemName=shpInfo[i][2];shpItemDesc=shpInfo[i][3]
    if lzdItemDesc == 'None':
      tknSortNameScore = fuzz.token_sort_ratio(lzdItemName, shpItemName)
      tknSetNameScore = fuzz.token_set_ratio(lzdItemName, shpItemName)
      tknSortDescScore = 0
      tknSetDescScore = 0
    else:
      tknSortNameScore = fuzz.token_sort_ratio(lzdItemName, shpItemName)
      tknSetNameScore = fuzz.token_set_ratio(lzdItemName, shpItemName)
      tknSortDescScore = fuzz.token_sort_ratio(lzdItemDesc, shpItemDesc)
      tknSetDescScore = fuzz.token_set_ratio(lzdItemDesc, shpItemDesc)
    overallScore = tknSortNameScore + tknSetNameScore + tknSortDescScore + tknSetDescScore
    mainArr.append([lzdShopId,lzdItemId,lzdItemName,lzdItemDesc,shpShopId,shpItemId,shpItemName,shpItemDesc,
                    tknSortNameScore,tknSetNameScore,tknSortDescScore,tknSetDescScore,overallScore])
  lastColIndex = len(mainArr[0])-1
  toDf = pd.DataFrame(mainArr)
  rankedDf = toDf.sort_values(by=lastColIndex,ascending=False)
  toReturn = rankedDf.head(2).values.tolist()[0]

  # print('matched: '+toReturn[2])
  return toReturn

In [56]:
startTime = time.time()
# Matching
appendList = []
logger.info('Start matching...')
for shopId in shopIdList:
  if shopId == 26704352:
    print(shopId)
    lzdDf = lzdDict[shopId]
    shpDf = shpDict[shopId]

    for lzdIndex in range(len(lzdDf)):
      lzdInfo = lzdDf.iloc[lzdIndex].values.tolist()
      appendList.append(newFuzzMatcher(lzdInfo, shpDf))

toList = list(filter(None.__ne__, appendList))
logger.info('Done matching...')
toPaste = pd.DataFrame(toList,columns=['LZD Shop ID','LZD Item ID','LZD Item Name','LZD Item Description','SHP Shop ID','SHP Item ID','SHP Item Name','SHP Item Description',
                                       'Token Sort Name Score','Token Set Name Score','Token Sort Desc Score','Token Set Desc Score','Overall Score'])

endTime = time.time()
totalTime = endTime - startTime
print(f'Time Elapsed: {time.strftime("%H:%M:%S", time.gmtime(totalTime))}')

INFO:__main__:Start matching...


26704352


INFO:__main__:Done matching...


Time Elapsed: 00:00:11


In [60]:
newLogicResult = toPaste.merge(correctMatch,how='left',on='LZD Item ID')
newLogicResult['Test Result'] = np.where(newLogicResult['SHP Item Name'] == newLogicResult['Correct Match - SHP Item Name'], 'Pass', 'Fail')

In [66]:
#######################
### OLD MATCH LOGIC ###
#######################

def oldFuzzMatcher(lzdInfo,shpInfoDf):
  mainArr = []
  lzdShopId=lzdInfo[0];lzdItemId=lzdInfo[1];lzdItemName=lzdInfo[2];lzdItemDesc=lzdInfo[3];shpShopId=lzdInfo[4]
  shpInfo = shpInfoDf.values.tolist()
  for i in range(len(shpInfo)):
    shpItemId=shpInfo[i][1];shpItemName=shpInfo[i][2];shpItemDesc=shpInfo[i][3]
    tknSortNameScore = fuzz.token_sort_ratio(lzdItemName, shpItemName)
    tknSetNameScore = 0
    tknSortDescScore = 0
    tknSetDescScore = 0

    overallScore = tknSortNameScore + tknSetNameScore + tknSortDescScore + tknSetDescScore
    mainArr.append([lzdShopId,lzdItemId,lzdItemName,lzdItemDesc,shpShopId,shpItemId,shpItemName,shpItemDesc,
                    tknSortNameScore,tknSetNameScore,tknSortDescScore,tknSetDescScore,overallScore])

  lastColIndex = len(mainArr[0])-1
  toDf = pd.DataFrame(mainArr)
  rankedDf = toDf.sort_values(by=lastColIndex,ascending=False)
  toReturn = rankedDf.head(2).values.tolist()[0]
  # try:
  #   multiprocessingList.append(toReturn)
  # except Exception as ex:
  #   print(ex)

  return toReturn

In [67]:
startTime = time.time()
# Matching
appendList = []
logger.info('Start matching...')
for shopId in shopIdList:
  if shopId == 26704352:
    print(shopId)
    lzdDf = lzdDict[shopId]
    shpDf = shpDict[shopId]

    for lzdIndex in range(len(lzdDf)):
      lzdInfo = lzdDf.iloc[lzdIndex].values.tolist()
      appendList.append(oldFuzzMatcher(lzdInfo, shpDf))

  # for lzdIndex in range(len(lzdDf)):
  #   lzdInfo = lzdDf.iloc[lzdIndex].values.tolist()
  #   p = multiprocessing.Process(target=oldFuzzMatcher, args=(lzdInfo, shpDf, appendList))
  #   processes.append(p)
  #   p.start()
  
  # for process in processes:
  #   counter += 1
  #   process.join()

toList = list(filter(None.__ne__, appendList))
logger.info('Done matching...')
toPaste = pd.DataFrame(toList,columns=['LZD Shop ID','LZD Item ID','LZD Item Name','LZD Item Description','SHP Shop ID','SHP Item ID','SHP Item Name','SHP Item Description',
                                        'Token Sort Name Score','Token Set Name Score','Token Sort Desc Score','Token Set Desc Score','Overall Score'])
endTime = time.time()
totalTime = endTime - startTime
print(f'Time Elapsed: {time.strftime("%H:%M:%S", time.gmtime(totalTime))}')

INFO:__main__:Start matching...


26704352


INFO:__main__:Done matching...


Time Elapsed: 00:00:00


In [68]:
oldLogicResult = toPaste.merge(correctMatch,how='left',on='LZD Item ID')
oldLogicResult['Test Result'] = np.where(oldLogicResult['SHP Item Name'] == oldLogicResult['Correct Match - SHP Item Name'], 'Pass', 'Fail')
oldLogicResult

Unnamed: 0,LZD Shop ID,LZD Item ID,LZD Item Name,LZD Item Description,SHP Shop ID,SHP Item ID,SHP Item Name,SHP Item Description,Token Sort Name Score,Token Set Name Score,Token Sort Desc Score,Token Set Desc Score,Overall Score,Correct Match - SHP Item Name,Test Result
0,168991,367027212,MYKUTSU Blyss Sneakers,READY STOCK Material : Textiles STANDARD SIZ...,26704352,733539291,MYKUTSU Blyss Sneakers,READY STOCK Material : Textiles Standard Size ...,100,0,0,0,100,MYKUTSU Blyss Sneakers,Pass
1,168991,466844583,MYKUTSU Ruffle X Sandals,READY STOCKMaterial : Textiles Standard S...,26704352,1604641923,MYKUTSU Ruffle X Sandals,READY STOCK Material : Textiles Standard Size ...,100,0,0,0,100,MYKUTSU Ruffle X Sandals,Pass
2,168991,357741914,MYKUTSU Velcro Wedges,READY STOCK Material : SUEDE STANDARD SIZE ...,26704352,362940456,MYKUTSU Velcro Wedges,READY STOCK Material : Suede Standard Size Pl...,100,0,0,0,100,MYKUTSU Velcro Wedges,Pass
3,168991,421005220,MYKUTSU Melody Sandals,READY STOCK Material : PU STANDARD SIZE Ple...,26704352,1276321201,MYKUTSU Melody Sandals,"READY STOCK Material : Suede, (PU-White colour...",100,0,0,0,100,MYKUTSU Melody Sandals,Pass
4,168991,420719410,MYKUTSU Mia Wedges (Small Cutting),"READY STOCK Material :SUEDE, PU-BLACK only S...",26704352,1286291801,MYKUTSU Mia Wedges 11cm,"READY STOCK Material : Suede, PU - Black only ...",73,0,0,0,73,MYKUTSU Mia Wedges 11cm,Pass
5,168991,450223548,MYKUTSU Sweety Heels,READY STOCK Material : SUEDE STANDARD SIZE ...,26704352,3119168855,MYKUTSU Sensero Heels,READY STOCK Material : Rubber SMALL CUTTING P...,83,0,0,0,83,MYKUTSU Sweety Women High Heels,Fail
6,168991,411673560,MYKUTSU Mell Heels,READY STOCK Material : SUEDE STANDARD SIZE ...,26704352,2582018116,MYKUTSU Marcella Heels,READY STOCK Material : PU Leather SMALL CUTTIN...,90,0,0,0,90,MYKUTSU Mell Strap Heels,Fail
7,168991,466846383,MYKUTSU Jesse Sneakers Kasut Perempuan,READY STOCKMaterial : Textiles Standard Siz...,26704352,1824181065,MYKUTSU Jesse Sneakers,READY STOCK Material : Textiles SMALL CUTTING ...,73,0,0,0,73,MYKUTSU Jesse Sneakers,Pass
8,168991,443236913,MYKUTSU Mustache Sneakers,READY STOCK Material : PU Leather STANDARD S...,26704352,1117308059,MYKUTSU Mustache Sneakers,READY STOCK Material : PU leather Standard Siz...,100,0,0,0,100,MYKUTSU Mustache Sneakers,Pass
9,168991,740972771,MYKUTSU Royanne Sneakers,READY STOCK Material : PU Leather SM...,26704352,6807833666,MYKUTSU Royanne Sneakers,READY STOCK Material : PU Leather SMALL CUTTIN...,100,0,0,0,100,MYKUTSU Royanne Sneakers,Pass
