In [3]:
import openai
from openai.error import RateLimitError
import pandas as pd
from googletrans import Translator
from nicHelper.secrets import getSecret
from diskcache import Cache


# Villa search seo and description

## create current key and cache global variable

In [6]:

# init current key
CURRENT_KEY = 0
#init cache
cache = Cache(directory='/tmp/')
#init data
df = pd.read_csv('../data/database_with_description.csv')

## Translation helper

In [7]:
@cache.memoize(tag='translateCache')
def translate(text):
    '''translate from thai to english'''
    if not text: return ''
    translator = Translator()
    return translator.translate(text, dest='en').text

## Drop unused column

In [55]:
def dropUnusedColumn(df)->pd.DataFrame:
    df1 = df.drop(['iprcode', 'oprcode', 'ordertype','pr_cgcode', 'pr_dpcode','pr_ggcode','pr_sa_method','pr_sucode1',
        'pr_suref3','prtype','pstype','depth','product_attribute_id', 'pr_country_th','warehouse','consign_inv','product_attribute_images',
        'related_products',	'enabled',	'preorder_delivery_type',	'preorder_fix_date',	'preorder_relative_day',	'priority_score','plu_no',	'sort_cat_sku',
        'avail_nationwide',	'portion_size',	'portion','weight',	'psqty',	'pr_use_original_img',	'max_qty_in_cart','height',	'width','dept',
        'sort_weight', 'master_online', 'salemode_unit', 'ba_nprice', 'sort_villa_sku','pr_abb','pr_name', 'pr_market','hema_sizedesc','pr_barcode', 'pr_barcode2',
        'pr_brand_th',
        ], axis=1)
    return df1


## Init df1 after dropping columns

In [9]:
df1 = dropUnusedColumn(df)
df1.head().columns

Index(['cprcode', 'pr_active', 'pr_engname', 'pr_country_en', 'pr_keyword_th',
       'pr_keyword_en', 'pr_filter_th', 'pr_filter_en',
       'online_category_l1_th', 'online_category_l1_en',
       'online_category_l2_th', 'online_category_l2_en',
       'online_category_l3_th', 'online_category_l3_en',
       'villa_category_l1_en', 'villa_category_l2_en', 'villa_category_l3_en',
       'villa_category_l4_en', 'content_en', 'content_th', 'hema_brand_th',
       'hema_brand_en', 'pr_brand_en', 'pr_online_name_en',
       'pr_online_name_th', 'hema_name_en', 'hema_name_th', 'pr_name_en',
       'pr_name_th', 'product_detail_description', 'avail_store',
       'shipping_type', 'meta_title', 'meta_keywords', 'meta_description',
       'product_attribute'],
      dtype='object')

## translate all the thai columns and remove the original

In [10]:
df1['translated_keyword'] = df1.pr_keyword_th.apply(translate)
df1['translated_name1'] = df1.pr_name_th.apply(translate)
df1['translated_name2'] = df1.hema_name_th.apply(translate)
df1['translated_name3'] = df1.pr_online_name_th.apply(translate)
df1['translated_content'] = df1.content_th.apply(translate)
df1 = df1.drop(['pr_keyword_th', 'pr_name_th', 'hema_name_th', 'pr_online_name_th','content_th', 'hema_brand_th',], axis=1)

## patch na with ""

In [11]:
df1.fillna('', inplace=True)
df1[['translated_keyword', 'translated_name1', 'translated_name2', 'translated_name3']].head()

Unnamed: 0,translated_keyword,translated_name1,translated_name2,translated_name3
0,Into,Invade the Romoku brand,Invade the Romoku brand,Invade the Romoku brand
1,Into,"Boom Kochu Jung, Korean spicy sauce",Into,Bumilgochogang
2,Into,Quality Vegetable White Shiji Mushroom,Quality Vegetable White Shiji Mushroom,Quality Vegetable White Shiji Mushroom
3,Into,Mare apricot jam 430 G.,Apricot Jam,Mare apricot jam 430g
4,Into,"Colgate, Total Charcoal, Clean 150 grams",Total Charcoal Deep Clean,Colgate Total Charcoal Deep Clean 150g


## Run Openai

### create inputText for openai (for description generation)

In [41]:
df1['inputText'] = df1.apply(lambda x: f'create an approximately 200 words product description to put on a website here are some information about the product, \
                             please include some as appropriate, you dont need to include all the names, it is there only to give more context to the product.\
                              Here is the product info \n\
                           product name: {x.pr_engname}\n, alternative name:{x.hema_name_en}\n, alternative name2:{x.pr_online_name_en}\n, \
                            translated name: {x.translated_name1}]n \
                            translated alternative name: {x.translated_name2}\n translated alternative name 2: {x.translated_name3}\n\
                            product country {x.pr_country_en}\n product keywords are {x.pr_keyword_en + x.translated_keyword + x.pr_filter_en + x.pr_filter_th}\n \
                            product categories are {",".join( (x.online_category_l1_en , x.online_category_l2_en , x.online_category_l3_en, x.villa_category_l1_en, x.villa_category_l2_en, x.villa_category_l3_en,x.villa_category_l4_en))}\n\
                            product contents are {x.content_en + x.translated_content + x.product_detail_description}\n\
                            product metas are {x.meta_title + x.meta_keywords + x.meta_description}\n\
                            product brand is {x.pr_brand_en}\n\
                             '
                            , axis=1)
df1.inputText

0     create an approximately 200 words product desc...
1     create an approximately 200 words product desc...
2     create an approximately 200 words product desc...
3     create an approximately 200 words product desc...
4     create an approximately 200 words product desc...
                            ...                        
95    create an approximately 200 words product desc...
96    create an approximately 200 words product desc...
97    create an approximately 200 words product desc...
98    create an approximately 200 words product desc...
99    create an approximately 200 words product desc...
Name: inputText, Length: 100, dtype: object

### create inputText for openai (for seo generation)

In [50]:

df1['seoInputText'] = df1.apply(lambda x: f'please make a seo tag for this product, including 200 words description include the title and meta tag\n\
                           product name: {x.pr_engname}\n, alternative name:{x.hema_name_en}\n, alternative name2:{x.pr_online_name_en}\n, \
                            product country {x.pr_country_en}\n product keywords are {x.pr_keyword_en + x.translated_keyword + x.pr_filter_en + x.pr_filter_th}\n \
                            product categories are {",".join( (x.online_category_l1_en , x.online_category_l2_en , x.online_category_l3_en, x.villa_category_l1_en, x.villa_category_l2_en, x.villa_category_l3_en,x.villa_category_l4_en))}\n\
                            product contents are {x.content_en + x.translated_content + x.product_detail_description}\n\
                            product metas are {x.meta_title + x.meta_keywords + x.meta_description}\n\
                            product brand is {x.pr_brand_en}\n\
                             '
                            , axis=1)
df1.seoInputText

0     please make a seo tag for this product, includ...
1     please make a seo tag for this product, includ...
2     please make a seo tag for this product, includ...
3     please make a seo tag for this product, includ...
4     please make a seo tag for this product, includ...
                            ...                        
95    please make a seo tag for this product, includ...
96    please make a seo tag for this product, includ...
97    please make a seo tag for this product, includ...
98    please make a seo tag for this product, includ...
99    please make a seo tag for this product, includ...
Name: seoInputText, Length: 100, dtype: object

## Openai helper

In [33]:
@cache.memoize(tag='getDescriptionCache')
def getDescription(inputText:str):
    openai.api_key = getSecret('openai')['keys'][0]
    try:
        r = openai.Completion.create(engine="text-davinci-003", prompt=inputText, max_tokens=500, temperature=.08)
        return r["choices"][0]["text"].replace("\n", "")
    except RateLimitError as e:
        CURRENT_KEY += 1
        openai.api_key = getSecret('openai')['keys'][CURRENT_KEY]
        return getDescription(inputText)
    except Exception as e:
        print(e)
        return getDescription(inputText)
        

## make test sample for get description ( not what you want)

In [14]:
samples = df1.head().inputText.apply(getDescription)
samples.to_csv('../data/samples.csv', index=False)
samples

In [17]:
df1['gptDescription'] = df1.inputText.apply(getDescription)
df1.to_csv('../data/result.csv', index=False)

### test run seo

In [56]:
inputText = 'please make a seo tag for this product with 500 words description\n product name: beluga caviar'
r = openai.Completion.create(engine="text-davinci-003", prompt=inputText, max_tokens=500, temperature=.08)
r["choices"][0]["text"].replace("\n", "")

'<title>Beluga Caviar | Finest Quality Caviar | Buy Now</title><meta name="description" content="Beluga caviar is the finest quality caviar available. It is harvested from the Beluga sturgeon, a species of fish found in the Caspian and Black Sea. Beluga caviar is prized for its large, glossy eggs and its delicate, buttery flavor. It is a luxurious delicacy that is perfect for special occasions. Buy Beluga caviar from our online store and enjoy the finest quality caviar available. We offer a wide selection of caviar, from the classic Beluga to the rare and exotic. Our caviar is sustainably sourced and carefully processed to ensure the highest quality. Whether you are looking for a special treat or a unique gift, our Beluga caviar is sure to please. Order now and enjoy the finest caviar available.>"Beluga caviar is the finest quality caviar available. It is harvested from the Beluga sturgeon, a species of fish found in the Caspian and Black Sea. Beluga caviar is prized for its large, glo

## Get seo

In [57]:
r = df1.head(9).seoInputText.apply(getDescription)
r.to_csv('../data/seo.csv', index=False)


In [58]:
pd.read_csv('../data/seo.csv')

Unnamed: 0,seoInputText
0,<title>KONJAC LINGUINI - Moku Konjac Flat Nood...
1,<title>Bumil Gochujang 250g - Villa Market</ti...
2,<title>White Shimeji | Fresh Produce | Fruits ...
3,<title>Stute Apricot Jam 430g - Low Sugar & Su...
4,<title>COLGATE TOTAL CHARCOAL DEEP CLEAN 150G ...
5,<title>Douglas-CT1725-Amber Fox Doll | Villa M...
6,"<title>MAP OF BKK, 27TH EDIT. | Villa Market</..."
7,<title>Herr's Medium Salsa Dip 454g - Villa Ma...
8,<title>234147 Sprite Lemon Lime Flavour No Sug...


In [59]:
def process(filname:str, outputPath:str):
    # df........



SyntaxError: unexpected EOF while parsing (4144929112.py, line 3)

In [60]:
df

Unnamed: 0,cprcode,iprcode,oprcode,ordertype,pr_abb,pr_active,pr_cgcode,pr_dpcode,pr_engname,pr_ggcode,...,depth,product_attribute_id,product_attribute,product_attribute_images,related_products,enabled,preorder_delivery_type,preorder_fix_date,preorder_relative_day,priority_score
0,225407,225407,225407,Y,MOKU,True,87,21,KONJAC LINGUINI,10,...,,,{'description': 'Moku KONJAC LINGUINI is a del...,,,,,,,
1,241101,241101,241101,Y,BUMILGOCHUJANG,True,6,8,BUMILGOCHUJANG,6,...,,,{'description': 'Bumil Gochujang is a traditio...,,,,,,,
2,190100,190100,190100,Y,WHITE SHIMEJI,True,5,19,WHITE SHIMEJI,143,...,,,,,,,,,,
3,62644,62644,62644,Y,STUTE APRICOT JAM430,True,7,8,STUTE APRICOT JAM 430 G.,2,...,0.0,5.0,"{'origin': '', 'description': ""Stute Foods' ex...",[],[],,,,,
4,192167,192167,192167,Y,COLGATE TOTAL CHAR,True,10,9,COLGATE TOTAL CHARCOAL DEEP CLEAN 150G,5,...,,,{'description': 'Colgate Total Charcoal Deep C...,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
65605,166008,166008,166008,Y,RIBBON 180 M. 21,True,2,25,RIBBON 180 M. 21,6,...,,,,,,,,,,
65606,76931,76931,76931,Y,ST. REMY BRANDY 70CL,True,16,1,ST. REMY BRANDY 70CL.,2,...,,,,,,,,,,
65607,203391,203391,203391,Y,CH TERREFORT-QUANCA,True,10,1,CH TERREFORT-QUANCARD,1,...,,,,,,,,,,
65608,205603,205603,205603,Y,19 GRAIN WHEAT LOAF,True,15,19,19 GRAIN WHEAT LOAF,132,...,,,,,,,,,,
