In [26]:
import pandas as pd
import numpy as np
import requests
from tqdm import tqdm

In [27]:
brand_df = pd.read_csv('db_cosmenet/production/view_b_brand.csv')
category_df = pd.read_csv('db_cosmenet/uat/category.csv')
product_df = pd.read_csv('db_cosmenet/production/view_b_product.csv')
scan_df = pd.read_csv('/app/nfs_clientshare/Datasets/Cosmenet_uat_20231108/data_last_join_2023_12_18.csv')

In [55]:
url_404 = 'https://viterbischool.usc.edu/wp-content/uploads/2023/10/404.jpg'

In [101]:

brand_df_clean = brand_df.dropna(subset=['BID'])
brand_df_clean = brand_df_clean.astype({'BID': 'int64'})
brand_df_clean['BRAND_NAME'] = brand_df_clean['BRAND_NAME'].apply(lambda x: x.strip())
brand_df_clean['BRAND_IMG'] = brand_df_clean['BRAND_IMG'].apply(lambda x: x.strip() if not pd.isna(x) else x)

brand_duplicate_unique = brand_df_clean[brand_df_clean['BRAND_NAME'].duplicated()].sort_values(by='BRAND_NAME')['BRAND_NAME'].unique()
brand_duplicate = brand_df_clean[brand_df_clean['BRAND_NAME'].isin(brand_duplicate_unique)].drop(['BRAND_IMG'], axis=1)\
    .sort_values(['BRAND_NAME', 'BID'])
# drop duplicate brand name
bid_dub_index = brand_df_clean[brand_df_clean['BID'].isin(brand_duplicate['BID'])].index
brand_df_clean.drop(bid_dub_index, inplace=True)
brand_df_clean.reset_index(drop=True, inplace=True)

brand_deleted = brand_df_clean[brand_df_clean['active'] == 'N']
brand_img_head = 'https://static.cosmenet.in.th/thumbnail-sm'
brand_df_clean['BRAND_IMG'] = brand_df_clean['BRAND_IMG'].apply(lambda x: brand_img_head + x if not pd.isna(x) else url_404)
brand_url = 'https://www.cosmenet.in.th/brand/'
brand_df_clean['BRAND_URL'] = brand_df_clean.apply(lambda x: brand_url + str(x['BID']) + '/' + x['BRAND_NAME'].replace(' ', '-').lower(), axis=1)

brand_df_clean = brand_df_clean[['BID', 'BRAND_NAME', 'BRAND_IMG','BRAND_URL']]
brand_df_clean.tail(1)

Unnamed: 0,BID,BRAND_NAME,BRAND_IMG,BRAND_URL
1424,4806,Glory Glowy,https://static.cosmenet.in.th/thumbnail-sm/upl...,https://www.cosmenet.in.th/brand/4806/glory-glowy


In [29]:
brand_duplicate_unique

array(['Gallinée', 'Queen Helene'], dtype=object)

In [30]:
brand_duplicate

Unnamed: 0,BID,BRAND_NAME,brand_detail,active,detail_img,UF_COUNTRY_TEXT
1328,3579,Gallinée,,Y,/upload/iblock/b5c/Gallinée-branner.jpg,fra
963,3988,Gallinée,"<font size=""4""> กลุ่มผลิตภัณฑ์ดูแลผิวหน้าแ...",Y,/upload/iblock/cb2/gallinee-head.jpg,
128,1646,Queen Helene,,Y,,usa
1204,3979,Queen Helene,,Y,,


In [31]:
pd.DataFrame(brand_duplicate_unique).to_csv('db_cosmenet/duplicate/brand_duplicate_unique.txt', index=False, header=False)
brand_duplicate.to_csv('db_cosmenet/duplicate/brand_duplicate.csv', index=False)

In [32]:
category_df_clean = category_df[category_df['DEPTH_LEVEL'] == 1].reset_index(drop=True)
category_df_clean.drop(['DEPTH_LEVEL', 'IBLOCK_SECTION_ID'], axis=1, inplace=True)
category_df_clean.tail(1)

Unnamed: 0,ID,NAME
9,1821,For Men


In [33]:
subcategory_df_clean = category_df[category_df['DEPTH_LEVEL'] == 3].reset_index(drop=True)[['ID', 'IBLOCK_SECTION_ID', 'NAME']]
subcategory_df_clean = pd.merge(subcategory_df_clean, category_df[['ID', 'IBLOCK_SECTION_ID']], left_on='IBLOCK_SECTION_ID', right_on='ID', how='left')
subcategory_df_clean = pd.merge(subcategory_df_clean, category_df[['ID']], left_on='IBLOCK_SECTION_ID_y', right_on='ID', how='left')
subcategory_df_clean.drop(['IBLOCK_SECTION_ID_x', 'ID_y', 'IBLOCK_SECTION_ID_y'], axis=1, inplace=True)
subcategory_df_clean.columns = ['SCID', 'NAME', 'CID']

subcategory_df_not_parent = subcategory_df_clean[subcategory_df_clean['CID'].isna()]
subcategory_df_clean.drop(subcategory_df_not_parent.index, inplace=True)
subcategory_df_clean.reset_index(drop=True, inplace=True)
subcategory_df_clean.tail(1)

Unnamed: 0,SCID,NAME,CID
130,2683,Home Perfume,63


In [102]:
# cut product not active
product_df_clean = product_df.drop(product_df[(product_df['ACTIVE'] != 'Y')].index)
product_df_clean = product_df_clean[['EID', 'PRODUCT_NAME', 'BID', 'PRODUCT_IMG', 'TYPE_ID', 'SECTION_L1']]

product_df_clean['PRODUCT_NAME'] = product_df_clean['PRODUCT_NAME'].apply(lambda x: x.strip())
product_df_clean['PRODUCT_IMG'] = product_df_clean['PRODUCT_IMG'].apply(lambda x: x.strip() if not pd.isna(x) else x)

# delete product not have data
product_not_found = product_df_clean[product_df_clean['TYPE_ID'].isna()]
product_df_clean.drop(product_not_found.index, inplace=True)
# fill values middle category to category
MCID = category_df[category_df['DEPTH_LEVEL'] == 2]
product_MCID = product_df_clean[product_df_clean['TYPE_ID'].isin(MCID['ID'])]
MID2CID = pd.merge(product_MCID['TYPE_ID'], MCID[['ID', 'IBLOCK_SECTION_ID']], left_on='TYPE_ID', right_on='ID', how='left')['IBLOCK_SECTION_ID']
product_df_clean.loc[product_MCID.index, ['SECTION_L1']] = MID2CID.values
product_df_clean.loc[product_MCID.index, ['TYPE_ID']] = -1
# fill values subcategory to category
product_CID = product_df_clean[product_df_clean['TYPE_ID'].isin(category_df_clean['ID'])]
product_df_clean.loc[product_CID.index, ['SECTION_L1']] = product_CID['TYPE_ID'].values
product_df_clean.loc[product_CID.index, ['TYPE_ID']] = -1
# product no data
product_df_SCID_not_provided = product_df_clean[product_df_clean['TYPE_ID'] == -1]
product_df_BID_not_provided = product_df_clean[product_df_clean['BID'].isna()]
# fill value brand
product_df_clean.loc[product_df_BID_not_provided.index, ['BID']] = -1
# drop
product_df_clean.drop(product_df_SCID_not_provided.index, inplace=True)
product_df_clean.drop(product_df_BID_not_provided.index, inplace=True)
# covert type
product_df_clean = product_df_clean.astype({'TYPE_ID': 'int64', 'BID': 'int64', 'SECTION_L1': 'int64'})
# product name duplicate check
product_duplicate = product_df_clean[product_df_clean.duplicated(subset=['PRODUCT_NAME','BID'], keep=False)]\
    .sort_values(['PRODUCT_NAME', 'EID'])
product_duplicate_unique = product_duplicate['PRODUCT_NAME'].unique()
product_duplicate_unique.sort()
# drop product name and brand duplicate
product_df_clean.drop(product_duplicate.index, inplace=True)
# product brand duplicate check
product_brand_duplicate = product_df_clean[product_df_clean['BID'].isin(brand_duplicate['BID'])]
# drop product brand duplicate
product_df_clean.drop(product_brand_duplicate.index, inplace=True)

product_df_clean.columns = ['EID', 'PRODUCT_NAME', 'BID', 'PRODUCT_IMG', 'SCID', 'CID']

product_df_clean.reset_index(drop=True, inplace=True)
product_img_head = 'https://static.cosmenet.in.th/thumbnail'
product_df_clean['PRODUCT_IMG'] = product_df_clean['PRODUCT_IMG'].apply(lambda x: product_img_head + x if not pd.isna(x) else url_404)
product_url = 'https://cosmenet.in.th/product/'
product_df_clean['PRODUCT_URL'] = product_df_clean['EID'].apply(lambda x: product_url + str(x))

product_df_clean.tail(1)

Unnamed: 0,EID,PRODUCT_NAME,BID,PRODUCT_IMG,SCID,CID,PRODUCT_URL
29818,51199,Glass Shine Lip Oil,4806,https://static.cosmenet.in.th/thumbnail/upload...,228,58,https://cosmenet.in.th/product/51199


In [104]:
product_df_SCID_not_provided.sort_values('SECTION_L1')

Unnamed: 0,EID,PRODUCT_NAME,BID,PRODUCT_IMG,TYPE_ID,SECTION_L1
18286,45117,GODDESS CLEANSING RITUAL,3854.0,/upload/iblock/640/Goddess-Cleansing-Ritual-01...,-1.0,53.0
1181,15378,Makeup Remover for Eye & Lip,1528.0,/upload/iblock/c0a/biore_makeupremoverforeyean...,-1.0,53.0
1500,16709,Purity Made Simple 3-in-1 Cleanser for Face an...,1706.0,/upload/iblock/8ab/philosophy_puritymadesimple...,-1.0,53.0
22095,39029,Mini Pore Double Clearing Cleansing Foam,1490.0,/upload/iblock/bdb/Mini-Pore_Double_Clearing_C...,-1.0,53.0
21211,43368,MODERN FRICTION Cleansing Stick With Exfoliating,1505.0,/upload/iblock/5c8/MODERN-FRICTION-Cleansing-S...,-1.0,53.0
...,...,...,...,...,...,...
22223,43383,Fuel Deo Zinc and Charcoal Shower Cream,2940.0,/upload/iblock/d06/TROS-Fuel-Deo-Zinc-and-Char...,-1.0,1821.0
22156,43382,Beer Deo Shower Gel,2940.0,/upload/iblock/40f/Tros-Beer-Deo-Shower-Gel-0.jpg,-1.0,1821.0
22100,43381,Clear and Cool Roll On,2940.0,/upload/iblock/a1d/Tros-Clear-Cool-Roll-On-0.jpg,-1.0,1821.0
3820,35019,Srichand For Men Black Edition Oil Control Powder,2250.0,/upload/iblock/1eb/Srichand-For-Men-Black-Edit...,-1.0,1821.0


In [105]:
product_df_BID_not_provided

Unnamed: 0,EID,PRODUCT_NAME,BID,PRODUCT_IMG,TYPE_ID,SECTION_L1
25604,50855,Veil Hydrating Skin Tint,,/upload/iblock/3dd/hourglass_veilhydratingskin...,3076.0,54.0
30634,42734,Sun C&E SPF 50 PA+++,,/upload/iblock/6ee/Nivea Sun SPF 50 PA-00.jpg,248.0,62.0
30781,49136,Pro Natural Spirit Slim Eyeshadow Palette,,/upload/iblock/dde/catrice_pronaturalspiritsli...,227.0,58.0
31859,50687,London Body Spray Maple,,/upload/iblock/533/confetti_londonbodyspraymap...,2681.0,61.0


In [106]:
product_duplicate_unique

array(['Acne Extra Sensitive Cleansing Gel',
       'Alcohol-Free Tender Toner (for All Skin Types)',
       'Anti-Blemish Solutions Concealing Stick', 'Apple Blossom Citrus',
       'Aqua Cream',
       'Aqualia Thermal Lotion SPF 30 Fortifying & Soothing 24Hr Hydrating Moisturizer',
       'Aquasource', 'Aquasource Non-Stop Normal/Combination Skin',
       'Aromatics Elixir Body Smoother', 'Aromatics Elixir Body Wash',
       "Artist's Eye Pencil", "Artist's Lip pencil",
       "Artist's Mechanical Lip Pencil",
       'BLANC EXPERT NeuroWhite Ultimate Whitening Spot Eraser',
       'Bamboo Charcoal Revitalizingscalp Toning Conditioner',
       'Bath & Shower Gel', 'Be Enchanted', 'Beauty Elixir Set',
       'Berry Picnic Lip Gloss', 'BioSenses', 'Biopur Pore Reducer',
       'Biosensitive', 'Black Amethyst', 'Black Raspberry Vanilla',
       'Blush Brush', 'Blush N Glow 3-Dimensional Glow',
       'Brightening Serum Supreme', 'Brown Sugar & Fig',
       'Bubble Bath & Shower Gel', 'C

In [107]:
product_duplicate

Unnamed: 0,EID,PRODUCT_NAME,BID,PRODUCT_IMG,TYPE_ID,SECTION_L1
2289,18799,Acne Extra Sensitive Cleansing Gel,1636,/upload/iblock/3ef/Smooth-E-Acne-Extra-Sensiti...,68,53
6982,29373,Acne Extra Sensitive Cleansing Gel,1636,/upload/iblock/18e/smoothe_acneextrasensitivec...,68,53
31743,12798,Alcohol-Free Tender Toner (for All Skin Types),1831,/upload/iblock/2b5/Alcohol-Free-Tender-Toner.gif,2715,53
14289,12805,Alcohol-Free Tender Toner (for All Skin Types),1831,/upload/iblock/960/Alcohol-Free-Tender-Toner.gif,2715,53
6274,4199,Anti-Blemish Solutions Concealing Stick,1467,/upload/social_product/product_4199/2019-0-200...,258,54
...,...,...,...,...,...,...
24798,8604,lip plump,1524,/upload/iblock/01e/lip plump.jpg,220,58
7868,8597,maybe baby eau de toilette,1524,/upload/iblock/1f1/maybe baby eau de toilette.jpg,2542,59
24353,8598,maybe baby eau de toilette,1524,/upload/iblock/c17/maybe baby eau de toilette.jpg,2542,59
15340,16916,ฺBaby Bee Nourishing Lotion - Calming,1845,/upload/iblock/02d/Baby Bee Nourishing Lotion ...,212,61


In [110]:
product_brand_duplicate

Unnamed: 0,EID,PRODUCT_NAME,BID,PRODUCT_IMG,TYPE_ID,SECTION_L1
323,10340,Mint Julep Masque,1646,/upload/iblock/425/queenhelene_mintjulepmasque...,79,53
11538,44953,Face Vinegar Toner,3988,/upload/iblock/b84/gallinee-face-vinegar-toner...,2715,53
13761,10338,Jojoba Oil Hot Oil Treatment,1646,/upload/iblock/5b6/Jojoba Hot Oil copy.gif,1769,60
19864,44884,Veggieto Charcoal Q10 x Collagen Eye Mask,3979,/upload/iblock/77a/queen-helene-veggieto-charc...,82,53
20678,10337,Cholesterol Hot Oil Treatment,1646,/upload/iblock/28a/CholesterolHotOil-300x300.jpg,1769,60
20816,10339,Olive Oil Hot Oil Treatment,1646,/upload/iblock/4ee/OliveOilHotOilTreatment-300...,1769,60
24699,44954,Face Mask & Scrub,3988,/upload/iblock/0a4/gallinee-face-mask-and-scru...,2730,53
26089,39855,Soothing Cleansing Cream,3579,/upload/iblock/7df/Gallinee-Soothing-Cleansing...,232,60


In [42]:
product_df_BID_not_provided[['EID', 'PRODUCT_NAME']].sort_values(by='EID').to_csv('db_cosmenet/duplicate/product_BID_not_provided.csv', index=False)
product_df_SCID_not_provided[['EID', 'PRODUCT_NAME']].sort_values(by='EID').to_csv('db_cosmenet/duplicate/product_SCID_not_provided.csv', index=False)
pd.DataFrame(product_duplicate_unique).to_csv('db_cosmenet/duplicate/product_duplicate_unique.txt', index=False, header=False)
pd.merge(product_duplicate, brand_df_clean, left_on='BID', right_on='BID', how='left')[['EID', 'PRODUCT_NAME', 'BID', 'BRAND_NAME']]\
    .sort_values(['PRODUCT_NAME', 'BID'])\
        .to_csv('db_cosmenet/duplicate/product_duplicate.csv', index=False)

In [43]:
scan_df_EID_not_have = scan_df[scan_df['labels'].isin(product_df_clean['EID']) == False]
scan_df_clean = scan_df.drop(scan_df_EID_not_have.index)
scan_df_clean.drop(['BID', 'SCID', 'CID'], axis=1, inplace=True)
scan_df_clean = pd.merge(scan_df_clean, product_df_clean[['EID', 'BID', 'SCID', 'CID']], left_on='labels', right_on='EID', how='left').drop(['EID'], axis=1)
scan_df_clean.reset_index(drop=True, inplace=True)
scan_df_clean.tail(1)

Unnamed: 0,file_names,labels,images_path,BID,SCID,CID
93676,Be-You-Tiful-Eyeshadow-Palette.jpg,37992,/app/nfs_clientshare/Datasets/Cosmenet_uat_202...,2069,227,58


In [44]:
scan_df_EID_not_have

Unnamed: 0,file_names,labels,images_path,BID,SCID,CID
5846,41193_15.JPG,41193,/app/nfs_clientshare/Datasets/Cosmenet_product...,1700,1782,60
5847,41193_8.JPG,41193,/app/nfs_clientshare/Datasets/Cosmenet_product...,1700,1782,60
5848,41193_4.JPG,41193,/app/nfs_clientshare/Datasets/Cosmenet_product...,1700,1782,60
5849,41193_6.JPG,41193,/app/nfs_clientshare/Datasets/Cosmenet_product...,1700,1782,60
5850,41193_1.jpg,41193,/app/nfs_clientshare/Datasets/Cosmenet_product...,1700,1782,60
...,...,...,...,...,...,...
94297,smooto_tomatocollagen_bbandcc_sunscreencream60...,43038,/app/nfs_clientshare/Datasets/Cosmenet_uat_202...,2169,1755,54
94298,31aec9a172b2dd70420b51baf5a43e5f.jpg,43038,/app/nfs_clientshare/Datasets/Cosmenet_uat_202...,2169,1755,54
94299,th-11134103-22070-nmniteb1d4ev8a.jpg,43038,/app/nfs_clientshare/Datasets/Cosmenet_uat_202...,2169,1755,54
94300,565a6d349552e024312170c12d16558d.jpg,43038,/app/nfs_clientshare/Datasets/Cosmenet_uat_202...,2169,1755,54


In [52]:
def insert_data(body, tag_name):
    response = requests.post(
        url="http://localhost:8000/" + tag_name + "/insert", 
        json=body,
        headers={
            "Content-Type": "application/json", 
            'Authorization': 'Bearer dev'
            },
    )
    if response.status_code != 201:
        print(response.status_code)
        print(response.text)
        return False
    return True

In [58]:
brand_body = {
  "active": 'true',
  "brand_id": "",
  "brand_name": "",
  "description": "",
  "update_by": "admin",
  "url": "https://www.google.com/",
  "url_preview_image": "https://www.google.com/"
}

for i in tqdm(range(113, len(brand_df_clean))):
    brand_body['brand_id'] = str(brand_df_clean['BID'][i])
    brand_body['brand_name'] = brand_df_clean['BRAND_NAME'][i]
    brand_body['url'] = brand_df_clean['BRAND_URL'][i]
    brand_body['url_preview_image'] = brand_df_clean['BRAND_IMG'][i]
    if not insert_data(brand_body, 'brand'):
        print(f'index {i}')
        break

100%|██████████████████████████████████████████| 1312/1312 [00:27<00:00, 48.18it/s]


In [59]:
category_body = {
  "active": 'true',
  "category_id": "",
  "category_name": "",
  "description": "",
  "update_by": "admin",
  "url": "https://www.google.com/",
  "url_preview_image": "https://www.google.com/"
}

for i in tqdm(range(len(category_df_clean))):
    category_body['category_id'] = str(category_df_clean['ID'][i])
    category_body['category_name'] = category_df_clean['NAME'][i]
    if not insert_data(category_body, 'category'):
        print(f'index {i}')
        break

100%|██████████████████████████████████████████████| 10/10 [00:00<00:00, 46.47it/s]


In [60]:
subcategory_body = {
  "active": 'true',
  "category_id": "",
  "description": "",
  "sub_category_id": "",
  "sub_category_name": "",
  "update_by": "admin",
  "url": "https://www.google.com/",
  "url_preview_image": "https://www.google.com/"
}

for i in tqdm(range(len(subcategory_df_clean))):
    subcategory_body['category_id'] = str(subcategory_df_clean['CID'][i])
    subcategory_body['sub_category_id'] = str(subcategory_df_clean['SCID'][i])
    subcategory_body['sub_category_name'] = subcategory_df_clean['NAME'][i]
    if not insert_data(subcategory_body, 'subcategory'):
        print(f'index {i}')
        break

100%|████████████████████████████████████████████| 131/131 [00:03<00:00, 43.59it/s]


In [85]:
product_body = {
  "active": 'true',
  "brand_id": "",
  "description": "",
  "product_id": "",
  "product_name": "",
  "sub_category_id": "",
  "update_by": "admin",
  "url": "https://www.google.com/",
  "url_preview_image": "https://www.google.com/"
}

for i in tqdm(range(3927, len(product_df_clean))):
    product_body['brand_id'] = str(product_df_clean['BID'][i])
    product_body['product_id'] = str(product_df_clean['EID'][i])
    product_body['product_name'] = product_df_clean['PRODUCT_NAME'][i]
    product_body['sub_category_id'] = str(product_df_clean['SCID'][i])
    product_body['url'] = product_df_clean['PRODUCT_URL'][i]
    product_body['url_preview_image'] = product_df_clean['PRODUCT_IMG'][i]
    if not insert_data(product_body, 'product'):
        print(f'index {i}')
        break

  0%|                                                    | 0/25994 [00:00<?, ?it/s]

409
{"detail":"Product name and brand are already exists."}
index 3927





In [69]:
scan_body = {
  "active": 'true',
  "counter": '0',
  "directory_name": "",
  "image_path": "",
  "product_id": "",
  "update_by": "admin"
}

for i in tqdm(range(len(scan_df_clean))):
    scan_body['image_path'] = scan_df_clean['images_path'][i]
    scan_body['product_id'] = str(scan_df_clean['labels'][i])
    if not insert_data(scan_body, 'scan'):
        print(f'index {i}')
        break

  0%|                                        | 15/93677 [00:07<12:14:02,  2.13it/s]

404
{"detail":"Product is not exists."}
index 15





In [None]:
scan_body = {
  "active": 'true',
  "counter": '0',
  "directory_name": "",
  "image_path": "",
  "product_id": "",
  "update_by": "admin"
}

scan_body['image_path'] = '/app/www/vhosts/cosmenet.in.th/httpdocs-scanner-extract-feature/n01491361_tiger_shark.JPEG'
scan_body['product_id'] = str(scan_df_clean['labels'][0])

if not insert_data(scan_body, 'main'):
    print(f'fail')

In [14]:
pred_body = {
  "brand_id": "",
  "category_id": "",
  "image_path": "/app/www/vhosts/cosmenet.in.th/httpdocs-scanner-extract-feature/n01491361_tiger_shark.JPEG",
  "page": 1,
  "size": 5,
  "sub_category_id": "",
  "update_by": ""
}

In [122]:
response = requests.post(
    url="http://10.148.0.50:4446/" + 'predict' + "/product", 
    json=pred_body,
    headers={"Content-Type": "application/json"},
)