In [None]:
import dotenv
import os

from genai.credentials import Credentials
from genai.schemas import GenerateParams
from genai.model import Model

In [None]:
dotenv.load_dotenv()

api_key = os.getenv("GENAI_KEY", None)
api_endpoint = os.getenv("GENAI_API", None)

creds = Credentials(api_key,api_endpoint)

params = GenerateParams(
    decoding_method="greedy",
    max_new_tokens=1000,
    min_new_tokens=15,
    stop_sequences=['###']
).dict()

model = Model(model="bigcode/starcoder",credentials=creds, params=params)
# model = Model(model="codellama/codellama-34b-instruct",credentials=creds, params=params)

In [None]:
filename = """cls_rec15_2023092802.csv
"""

samplein = """15||7000000001|FF|70001@gmail.com|AB||Y|HTML|Y|20230928123456
"""

sampleout = """|CLS|2023-09-28 02:00:00|2023-09-28 00:00:00|af_ioda_cus_daily_cls_mini_batch.CUSCLSMiniBatchTransformer|airflow||D|0|cls_rec15_2023092802.csv|15||7000000001|FF|70001@gmail.com|AB||Y|HTML|Y|2023-09-28 12:34:56
"""

csv = """00ABC-MKTS202309280230082023092720200520230928023005
15||7000000001|FF|70001@gmail.com|AB||Y|HTML|Y|20230928123456
15||7000000002|FF|70002@gmail.com|AB||Y|HTML|Y|20230928123456
15||7000000003|FF|70003@gmail.com|AB||Y|HTML|Y|20230928123456
15||7000000004|FF|70004@gmail.com|AB||Y|HTML|Y|20230928123456
15||7000000005|FF|70005@gmail.com|AB||Y|HTML|Y|20230928123456
15||7000000006|FF|70006@gmail.com|AB||Y|HTML|Y|20230928123456
15||7000000007|FF|70007@gmail.com|AB||Y|HTML|Y|20230928123456
15||7000000008|FF|70008@gmail.com|AB||Y|HTML|Y|20230928123456
990000000008"""

In [None]:
prompt = f'''convert this csv to csv, show the result only, and end with ###:
FILENAME:
{filename}
INPUT:
{samplein}
OUTPUT:
{sampleout}
INPUT:
{csv}
OUTPUT:'''

In [None]:
response_text = model.generate([prompt])
print("\nresult:\n\n")
for resp in response_text:
    print(resp.generated_text)

generate python code

In [None]:
prompt = f'''generate a python function that convert input csv to output csv, show the result only, end with ###:
FILENAME:
{filename}
Data INPUT:
{samplein}
Data OUTPUT:
{sampleout}
PYTHON:'''

response_text = model.generate([prompt])
print("\nresult:\n\n")
for resp in response_text:
    print(resp.generated_text)

In [None]:
filename = """wos_hawb_20220513.dat"""

samplein = """AB,1234,1234567,09-Apr-2023,HKG,CHN,ABCD123456,09-Apr-2023 12:34:56,AB,1234,09-Apr-2023,HKG,CHN,1,21.0,K,,,,,,,,ABCDEFG,ABCDEFG.,,,,,ABCDEFG,,330457,HK,,ABCDEFG,ABCDEFG,,,,,ABCDEFG,,1920,ID,,ABCDEFG,ABCDEFG,,,,,,,,,,,
"""

sampleout = """""|"WOS"|"2022-05-13"|"2022-05-13 15:47:48"|"af_ioda_cgo_wos_daily_wos_hawb.CGOTransformer_WOS_HAWB"|"ioda_cgo"|""|"D"|"0"|"wos_hawb_20220513.dat"|"AB"|"1234"|"1234567"|"2023-04-09"|"HKG"|"CHN"|"ABCD123456"|"2023-04-09 12:34:56"|"AB"|"1234"|"2023-04-09"|"HKG"|"CHN"|"1"|"21.0"|"K"|""|""|""|""|""|""|""|"ABCDEFG"|"ABCDEFG."|""|""|""|""|"ABCDEFG"|""|"330457"|"HK"|""|"ABCDEFG"|"ABCDEFG"|""|""|""|""|"ABCDEFG"|""|"1920"|"ID"|""|"ABCDEFG"|"ABCDEFG"|""|""|""|""|""|""|""|""|""|""|"
"""

dat = """MAWB Issuing Carrier,Airline Prefix,Shipment Ref Number,MAWB Issue Date,MAWB Origin,MAWB Destination,HWB No,HWB Last Update DT,Carrier Code,Flight No,Flight Origin Date,HWB Origin,HWB Destination,HWB PCS,HAWB WT,HWB Weight Unit Code,SLAC,Other Prepaid or Collect,WTVAL Prepaid or Collect,Carriage Declared Value,Customs Declared Value,The currency for declare values of carriage/customs/insurance,Insurance Amount,Shipper Name,Shipper Address 1,Shipper Address 2,Shipper Address 3,Shipper Address 4,Shipper Address 5,Shipper Place,Shipper City,Shipper Postal Code,Shipper Country Code,Shipper Contact,Consignee Name,Consignee Address 1,Consignee Address 2,Consignee Address 3,Consignee Address 4,Consignee Address 5,Consignee Place,Consignee City,Consignee Postal Code,Consignee Country Code,Consignee Contact,Goods desc of the HWB-15 chars,Goods desc of the HWB manifest,Info for customs declaration,harmonized commodity code,FHL SHC1,FHL SHC2,FHL SHC3,FHL SHC4,FHL SHC5,FHL SHC6,FHL SHC7,FHL SHC8,FHL SHC9
AB,1234,1234567,09-Apr-2023,HKG,CHN,ABCD123456,09-Apr-2023 12:34:56,AB,1234,09-Apr-2023,HKG,CHN,1,21.0,K,,,,,,,,ABCDEFG,ABCDEFG.,,,,,ABCDEFG,,330457,HK,,ABCDEFG,ABCDEFG,,,,,ABCDEFG,,1920,ID,,ABCDEFG,ABCDEFG,,,,,,,,,,,
AB,1223,1234567,09-Apr-2023,HKG,CHN,ABCD123456,09-Apr-2023 12:34:56,AB,1234,09-Apr-2023,HKG,CHN,1,21.0,K,,,,,,,,ABCDEFG,ABCDEFG.,,,,,ABCDEFG,,330457,HK,,ABCDEFG,ABCDEFG,,,,,ABCDEFG,,1920,ID,,ABCDEFG,ABCDEFG,,,,,,,,,,,
"""

In [None]:
prompt = f'''convert this dat to csv, show the result only, end with ###:
FILENAME:
{filename}
INPUT:
{samplein}
OUTPUT:
{sampleout}
INPUT:
{dat}
OUTPUT:'''

In [None]:
response_text = model.generate([prompt])
print("\nresult:\n\n")
for resp in response_text:
    print(resp.generated_text)

generate python code

In [None]:
prompt = f'''generate a python function that convert input csv to output csv, show the result only, end with ###:
FILENAME:
{filename}
Data INPUT:
{samplein}
Data OUTPUT:
{sampleout}
PYTHON:'''

response_text = model.generate([prompt])
print("\nresult:\n\n")
for resp in response_text:
    print(resp.generated_text)

In [None]:
filename = """categorymstdata_20230928_ioda.xml"""

samplein = """<Category>
      <supercategories>
         <Category>
            <code>level_1</code>
            <name>Electronics</name>
            <creationDateTime>2023-09-28T00:00:00.000</creationDateTime>
            <modifiedDateTime>2023-09-28T00:00:00.000</modifiedDateTime>
            <businessPoint/>
         </Category>
      </supercategories>
      <code>level_2</code>
      <name>Camera & Photo</name>
      <creationDateTime>2023-09-28T00:00:00.000</creationDateTime>
      <modifiedDateTime>2023-09-28T00:00:00.000</modifiedDateTime>
      <businessPoint/>
   </Category>
   """

sampleout = """
""|"IRedeem"|"2023-09-28 00:00:00"|"2023-09-28 00:00:00"|"af_ioda_cus_iredeem_daily_transaction_log.CUSIRedeemDailyTransactionLogTransformer"|"airflow"|""|"D"|"0"|"categorymstdata_20230928_ioda.xml"|"level_2"|"Camera & Photo"|"2023-09-28 00:00:00"|"2023-09-28 00:00:00"|""|"level_1"|"Electronics"|"2023-09-28 00:00:00"|"2023-09-28 00:00:00"|""|""|""|""|""|""|""|""|""|""|""|""|""|""|""|""

"""

xml = """<?xml version="1.0" encoding="UTF-8"?>
<Categories xmlns:xs="http://www.w3.org/2001/XMLSchema">
   <Category>
      <supercategories>
         <Category>
            <code>level_1</code>
            <name>Electronics</name>
            <creationDateTime>2023-09-28T00:00:00.000</creationDateTime>
            <modifiedDateTime>2023-09-28T00:00:00.000</modifiedDateTime>
            <businessPoint/>
         </Category>
      </supercategories>
      <code>level_2</code>
      <name>Camera & Photo</name>
      <creationDateTime>2023-09-28T00:00:00.000</creationDateTime>
      <modifiedDateTime>2023-09-28T00:00:00.000</modifiedDateTime>
      <businessPoint/>
   </Category>
   <Category>
      <supercategories>
         <Category>
            <supercategories>
               <Category>
                  <code>level_1</code>
                  <name>Electronics</name>
                  <creationDateTime>2023-09-28T00:00:00.000</creationDateTime>
                  <modifiedDateTime>2023-09-28T00:00:00.000</modifiedDateTime>
                  <businessPoint/>
               </Category>
            </supercategories>
            <code>level_2</code>
            <name>Camera & Photo</name>
            <creationDateTime>2023-09-28T00:00:00.000</creationDateTime>
            <modifiedDateTime>2023-09-28T00:00:00.000</modifiedDateTime>
            <businessPoint/>
         </Category>
      </supercategories>
      <code>level_3</code>
      <name>Camera Bags & Cases</name>
      <creationDateTime>2023-09-28T00:00:00.000</creationDateTime>
      <modifiedDateTime>2023-09-28T00:00:00.000</modifiedDateTime>
      <businessPoint/>
   </Category>
   <Category>
      <supercategories>
         <Category>
            <supercategories>
               <Category>
                  <supercategories>
                     <Category>
                        <code>level_1</code>
                        <name>Electronics</name>
                        <creationDateTime>2023-09-28T00:00:00.000</creationDateTime>
                        <modifiedDateTime>2023-09-28T00:00:00.000</modifiedDateTime>
                        <businessPoint/>
                     </Category>
                  </supercategories>
                  <code>level_2</code>
                  <name>Camera & Photo</name>
                  <creationDateTime>2023-09-28T00:00:00.000</creationDateTime>
                  <modifiedDateTime>2023-09-28T00:00:00.000</modifiedDateTime>
                  <businessPoint/>
               </Category>
            </supercategories>
            <code>level_3</code>
            <name>Camera Bags & Cases</name>
            <creationDateTime>2023-09-28T00:00:00.000</creationDateTime>
            <modifiedDateTime>2023-09-28T00:00:00.000</modifiedDateTime>
            <businessPoint/>
         </Category>
      </supercategories>
      <code>level_4</code>
      <name>Camera Cases</name>
      <creationDateTime>2023-09-28T00:00:00.000</creationDateTime>
      <modifiedDateTime>2023-09-28T00:00:00.000</modifiedDateTime>
      <businessPoint/>
   </Category>
</Categories>"""

In [None]:
prompt = f'''convert this xml to csv, show the result only, end with ###:
FILENAME:
{filename}
Data INPUT:
{samplein}
Data OUTPUT:
{sampleout}
INPUT:
{xml}
OUTPUT:'''

In [None]:
response_text = model.generate([prompt])
print("\nresult:\n\n")
for resp in response_text:
    print(resp.generated_text)

generate python code

In [None]:
prompt = f'''generate a python function that convert input xml to output csv, show the result only, end with ###:
FILENAME:
{filename}
Data INPUT:
{samplein}
Data OUTPUT:
{sampleout}
PYTHON:'''

# response_text = model.generate([prompt])
print("\nresult:\n\n")
# for resp in response_text:
#     print(resp.generated_text)

previous_output = ""
for i in range(5):
    prompt += previous_output
    response_text = model.generate([prompt])
    
    previous_output = ""
    for resp in response_text:
        previous_output += resp.generated_text
        print(resp.generated_text)