In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm_notebook as tqdm
from torch.optim.optimizer import Optimizer
import matplotlib.pyplot as plt
from copy import deepcopy
import numpy as np
import random
import torch
from transformers import pipeline
import warnings 
warnings.filterwarnings('ignore')
from pytorch_lightning import seed_everything
from torch.utils.data import DataLoader
import os
import gc
gc.collect()

def get_jaccard_sim(str1, str2): 
    a = set(str1.split()) 
    b = set(str2.split())
    c = a.intersection(b)
    return float(len(c)) / (len(a) + len(b) - len(c))

In [2]:
def set_seed(seed = int):
    '''Sets the seed of the entire notebook so results are the same every time we run.
    This is for REPRODUCIBILITY.'''
    np.random.seed(seed)
    random_state = np.random.RandomState(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    os.environ['PYTHONHASHSEED'] = str(seed)
    seed_everything(seed)
    return random_state
random_state = set_seed(42)

Global seed set to 42


In [3]:
from transformers import DistilBertTokenizerFast
from transformers import DistilBertForQuestionAnswering

tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')
model = DistilBertForQuestionAnswering.from_pretrained("distilbert-base-uncased")
model.load_state_dict(torch.load('Product_Data_SQuAD_model_2144.pt'))
model.eval()
nlp = pipeline('question-answering', model=model.to('cpu'), tokenizer=tokenizer)
gc.collect()

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForQuestionAnswering: ['vocab_projector.weight', 'vocab_projector.bias', 'vocab_layer_norm.weight', 'vocab_transform.bias', 'vocab_layer_norm.bias', 'vocab_transform.weight']
- This IS expected if you are initializing DistilBertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForQuestionAnswering were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['qa_outputs.weight', 'qa_outputs.bias']
You should probably TRAIN this mode

244

In [4]:
def model_predict(nlp,df):
    table = pd.DataFrame()
    idx_list = sorted(df.index.tolist())
    for i in tqdm(idx_list):
        sample = df.loc[[i]]
        string_X_train = sample['string_X_train'].values[0]
        QA_input = {
            'question': 'What is the product name?',
            'context': string_X_train
        }
        res = nlp(QA_input)
        predict = QA_input['context'][res['start']:res['end']]
        row = pd.DataFrame({'predict:':predict},index=[i])
        table = table.append(row)
    return table

In [5]:
import pandas as pd
from tqdm import tqdm_notebook as tqdm


df = pd.read_excel('台塑企業_ 產品寶典20210303.xlsx',engine='openpyxl')
display(df.head(3))
display(df[df['品名']=='MA'])

產品集合 = set(df['品名'].values)

'''
train_df = pd.read_csv('Train_Product_Data_2021_0114.csv',index_col=0).drop(['Forward','Backward','label_for_train'],axis=1)
train_df = train_df.dropna(axis=0)
display(train_df.head(3))

val_df = pd.read_csv('Val_Product_Data_2021_0114.csv',index_col=0).drop(['Forward','Backward'],axis=1)
val_df = val_df.dropna(axis=0)
display(val_df.head(3))
'''

Unnamed: 0,RIGID,分機,公司代號,公司事業部門,品名,新增
0,黃淑玲,7190,11,台塑塑膠部,CAUSTIC SODA LIQUID FLAKE PEARLS MICROPEARLS,
1,黃淑玲,7190,11,台塑塑膠部,CAUSTIC SODA LIQUID,
2,黃淑玲,7190,11,台塑塑膠部,COMMODITY FORMOSACN,


Unnamed: 0,RIGID,分機,公司代號,公司事業部門,品名,新增
531,,,2P,南亞塑四部化學品部,MA,


"\ntrain_df = pd.read_csv('Train_Product_Data_2021_0114.csv',index_col=0).drop(['Forward','Backward','label_for_train'],axis=1)\ntrain_df = train_df.dropna(axis=0)\ndisplay(train_df.head(3))\n\nval_df = pd.read_csv('Val_Product_Data_2021_0114.csv',index_col=0).drop(['Forward','Backward'],axis=1)\nval_df = val_df.dropna(axis=0)\ndisplay(val_df.head(3))\n"

In [7]:
品名2部門 = dict(zip(df['品名'],df['公司事業部門']))
品名2部門

{'CAUSTIC SODA LIQUID FLAKE PEARLS MICROPEARLS': '台塑塑膠部',
 'CAUSTIC SODA LIQUID': '台塑塑膠部',
 'COMMODITY FORMOSACN': '台塑塑膠部',
 'CHLOROFORM': '台塑塑膠部',
 'ETHYLENE DICHLORIDE': '台塑塑膠部',
 'EDC': '台塑塑膠部',
 'FM130 FM190 FM070 M41 S60 S65 FM090': '台塑塑膠部',
 'FORMOSACON': '台塑塑膠部',
 'FORMOLON': '台塑塑膠部',
 'HCFC': '台塑塑膠部',
 'MBS RESIN': '台塑塑膠部',
 'MBS RESIN M-61': '台塑塑膠部',
 'MBS': '台塑塑膠部',
 'MBS M-51 M-41': '台塑塑膠部',
 'MBS RESIN M-61 MBS  M-51 M-41': '台塑塑膠部',
 'METHYLENE CHLORIDE  CH3CL CH2CL2': '台塑塑膠部',
 'MODIFIER MPACT M-61': '台塑塑膠部',
 'METHYLENE CHLORIDE ': '台塑塑膠部',
 'PROCESSING AIDS PVC PROCESSING AID': '台塑塑膠部',
 'PRO PR-415       ': '台塑塑膠部',
 'POLYVINYL CHLORIDE PASTE RESIN': '台塑塑膠部',
 'PVC ADDITIVE': '台塑塑膠部',
 'PVC COPOLYMER': '南亞塑四部化學品部',
 'PVC EMULSION PASTES PR-F PR-450': '台塑塑膠部',
 'PVC IMPACT MODIFIER M-51 M-31 M-81': '台塑塑膠部',
 'PVC RESIN': '台塑塑膠部',
 'PVC HEAT STABILIZER TM-2080': '台塑塑膠部',
 'PVC HOMOPOLYMER RESINS S-60S': '台塑塑膠部',
 'PVC PASTE RESIN': '台塑塑膠部',
 'PVC RESIN PASTE': '台塑塑膠部',
 '

In [8]:
val_df = pd.read_csv('preprocess_for_SQUAD_wordninja.csv',index_col=0)[['45A','Y_label']]
val_df.columns = ['string_X_train','Y_label']
val_df

Unnamed: 0,string_X_train,Y_label
0,MASS PVC RESIN B-57 QUANTITY 175 MT AT 1300 US...,MASS PVC RESIN B-57
1,PHTHALIC ANHYDRIDE PA QUANTITY 306 MT UNIT PRI...,PHTHALIC ANHYDRIDE
2,COMMODITY LLDPE TAISOX 3470 QUANTITY 320 MT 2 ...,LLDPE TAISOX
5,ITEM 1 HDPE TAISOX 8010 200 MT USD 1100 MT CON...,HDPE TAISOX 8010
6,ITEM 1 HDPE TAISOX 8010 200 MT USD 1100 MT CON...,HDPE TAISOX 8010
...,...,...
3609,COMMODITY MONO ETHYLENE GLYCOL x 000 D QUANTIT...,MONO ETHYLENE GLYCOL
3610,CFR KOBE JAPAN x 000 D VISCOSE RAYON STAPLE FI...,VISCOSE RAYON STAPLE FIBER
3611,DESCRIPTION OF GOODS QUANTITY MT UNIT PRICE US...,PC RESIN
3612,DESCRIPTION OF GOODS QUANTITY MT UNIT PRICE US...,PC RESIN


In [9]:
train_df = val_df

# 如果品名是單詞的話 前後加個空白

In [10]:
for i in df.index:
    if ' ' not in df.loc[i,'品名']:
        name = df.loc[i,'品名']
        df.loc[i,'品名'] = f' {name} '
        assert df.loc[i,'品名'][0] == ' '
        assert df.loc[i,'品名'][-1] == ' '

In [11]:
display(df[df['品名']=='MA'])
display(df[df['品名']==' MA '])

Unnamed: 0,RIGID,分機,公司代號,公司事業部門,品名,新增


Unnamed: 0,RIGID,分機,公司代號,公司事業部門,品名,新增
531,,,2P,南亞塑四部化學品部,MA,


In [12]:
產品集合 = set(df['品名'].values)

# find_fail_sample and drop fail_sample

In [13]:
def find_fail_sample(df):
    fails = []
    for i in df.index:
        context = df.loc[i,'string_X_train']
        answer = df.loc[i,'Y_label']
        if str(answer) not in str(context):
            fails.append(i)
    return fails
train_fails = find_fail_sample(train_df)
val_fails = find_fail_sample(val_df)
print(train_fails,val_fails)
display(val_df.loc[val_fails])
print(val_df.shape)
val_df = val_df.drop(val_fails,axis=0)
print(val_df.shape)

[182, 521, 745, 748, 758, 759, 760, 829, 832, 833, 875, 876, 891, 908, 920, 923, 925, 957, 987, 998, 1003, 1004, 1007, 1009, 1046, 1048, 1083, 1132, 1135, 1136, 1154, 1203, 1209, 1266, 1301, 1315, 1317, 1326, 1368, 1398, 1481, 1501, 1502, 1512, 1552, 1553, 1589, 1602, 1643, 1650, 1677, 1679, 1680, 1713, 1714, 1716, 1718, 1721, 1754, 1755, 1780, 1831, 1871, 1872, 1962, 1964, 1965, 2067, 2208, 2210, 2218, 2229, 2233, 2246, 2282, 2353, 2387, 2389, 2393, 2478, 2485, 2502, 2541, 2553, 2573, 2574, 2596, 2600, 2608, 2680, 2709, 2720, 2768, 2826, 2828, 2833, 2838, 2839, 2840, 2841, 2844, 2847, 2912, 2962, 2985, 2986, 3000, 3024, 3026, 3083, 3084, 3133, 3162, 3166, 3194, 3215, 3216, 3225, 3235, 3248, 3295, 3296, 3339, 3393, 3394, 3399, 3554, 3584, 3587, 3590, 3591, 3595, 3605, 3608] [182, 521, 745, 748, 758, 759, 760, 829, 832, 833, 875, 876, 891, 908, 920, 923, 925, 957, 987, 998, 1003, 1004, 1007, 1009, 1046, 1048, 1083, 1132, 1135, 1136, 1154, 1203, 1209, 1266, 1301, 1315, 1317, 1326, 1368, 

Unnamed: 0,string_X_train,Y_label
182,PHTHALIC ANHYDRIDE 504 MT AT USD 1010 M Tx 000...,
521,PHTHALIC ANHYDRIDE 504 MT AT USD 980 M Tx 000 ...,
745,9000 MT PHTHALIC ANHYDRIDE PAx 000 DAT USD 825...,
748,9000 MT PHTHALIC ANHYDRIDE PAx 000 DAT USD 825...,
758,9000 MT PHTHALIC ANHYDRIDE PAx 000 DAT USD 825...,
...,...,...
3590,3600 MT 2 F CL x 000 D PHTHALIC ANHYDRIDE x 00...,
3591,PHTHALIC ANHYDRIDE PAx 000 D QT Y 54 M TSx 000...,
3595,108 MT 6 F CL OF PHTHALIC ANHYDRIDE AT 98000 P...,
3605,108 MT 6 F CL OF PHTHALIC ANHYDRIDE AT USD 960...,


(3198, 2)
(3064, 2)


In [14]:
def Collection_method(df,產品集合):
    labels = {}
    for i in tqdm(df.index):
        products = []
        for p in 產品集合:
            if p in df.loc[i,'string_X_train']:
                products.append(p)
        labels[i] = products
    predict = pd.DataFrame(index=labels.keys(),columns=['predict'])
    predict['predict'] = labels.values()
    return predict
predict = Collection_method(val_df,產品集合)
result = val_df.join(predict)

  0%|          | 0/3064 [00:00<?, ?it/s]

In [15]:
result

Unnamed: 0,string_X_train,Y_label,predict
0,MASS PVC RESIN B-57 QUANTITY 175 MT AT 1300 US...,MASS PVC RESIN B-57,"[PVC RESIN, RESIN , PVC RESIN B-57]"
1,PHTHALIC ANHYDRIDE PA QUANTITY 306 MT UNIT PRI...,PHTHALIC ANHYDRIDE,"[PHTHALIC ANHYDRIDE , PA , PHTHALIC ANHYDRIDE..."
2,COMMODITY LLDPE TAISOX 3470 QUANTITY 320 MT 2 ...,LLDPE TAISOX,"[LLDPE TAISOX, TAISOX ]"
5,ITEM 1 HDPE TAISOX 8010 200 MT USD 1100 MT CON...,HDPE TAISOX 8010,"[ TAISOX , HDPE , HDPE TAISOX, HDPE TAISOX 8010]"
6,ITEM 1 HDPE TAISOX 8010 200 MT USD 1100 MT CON...,HDPE TAISOX 8010,"[ TAISOX , HDPE , HDPE TAISOX, HDPE TAISOX 8010]"
...,...,...,...
3609,COMMODITY MONO ETHYLENE GLYCOL x 000 D QUANTIT...,MONO ETHYLENE GLYCOL,"[MONO ETHYLENE GLYCOL, ETHYLENE ]"
3610,CFR KOBE JAPAN x 000 D VISCOSE RAYON STAPLE FI...,VISCOSE RAYON STAPLE FIBER,"[ RAYON , VISCOSE RAYON STAPLE FIBER, RAYON ST..."
3611,DESCRIPTION OF GOODS QUANTITY MT UNIT PRICE US...,PC RESIN,"[PC RESIN, RESIN , INA ]"
3612,DESCRIPTION OF GOODS QUANTITY MT UNIT PRICE US...,PC RESIN,"[PC RESIN, RESIN , INA ]"


In [16]:
not_find = []
for j,i in enumerate(result.iloc[:,2].values):
    if len(i) == 0:
        not_find.append(j)
len(not_find)

328

In [17]:
not_find_df = result.iloc[not_find]
not_find_df

Unnamed: 0,string_X_train,Y_label,predict
51,PVC SUSPENSION S65 - 112 MT AT USD 133000 PER ...,EA,[]
143,EVA 7470 Mx 000 D QUANTITY 5000 M TSx 000 D UN...,EVA,[]
144,EVA 7470 Mx 000 D QUANTITY 5000 M TSx 000 D UN...,EVA,[]
145,EVA 7470 Mx 000 D QUANTITY 5000 M TSx 000 D UN...,EVA,[]
203,TERMS OF PRICE CFR AQABA PORT - J O R D A N x ...,DOP,[]
...,...,...,...
3566,ABS AG 15 AA - H x 000 D Q U A N T I T Y 25200...,ABS,[]
3589,COMMODITY MONOETHYLENE GLYCOL x 000 D QUANTITY...,ETHYLENE,[]
3596,EPICHLOROHYDRIN x 000 D QT Y 18400 x 000 DAS P...,EPICHLOROHYDRIN,[]
3603,ACETONITRILE 128 M Tx 000 DAT USD 3650 PER M T...,ACETONITRILE,[]


In [18]:
bert_predict = model_predict(nlp,not_find_df)
bert_predict

  0%|          | 0/328 [00:00<?, ?it/s]

Unnamed: 0,predict:
51,PVC SUSPENSION
143,EVA
144,EVA
145,EVA
203,DOP
...,...
3566,ABS
3589,MONOETHYLENE
3596,EPICHLOROHYDRIN
3603,ACETONITRILE


In [19]:
def get_jaccard_sim(str1, str2): 
    a = set(str1.split()) 
    b = set(str2.split())
    c = a.intersection(b)
    return float(len(c)) / (len(a) + len(b) - len(c))

In [20]:
result.iloc[not_find,2] = [ [str(i)] for i in bert_predict['predict:'].values]
result.iloc[not_find]

Unnamed: 0,string_X_train,Y_label,predict
51,PVC SUSPENSION S65 - 112 MT AT USD 133000 PER ...,EA,[PVC SUSPENSION]
143,EVA 7470 Mx 000 D QUANTITY 5000 M TSx 000 D UN...,EVA,[EVA]
144,EVA 7470 Mx 000 D QUANTITY 5000 M TSx 000 D UN...,EVA,[EVA]
145,EVA 7470 Mx 000 D QUANTITY 5000 M TSx 000 D UN...,EVA,[EVA]
203,TERMS OF PRICE CFR AQABA PORT - J O R D A N x ...,DOP,[DOP]
...,...,...,...
3566,ABS AG 15 AA - H x 000 D Q U A N T I T Y 25200...,ABS,[ABS]
3589,COMMODITY MONOETHYLENE GLYCOL x 000 D QUANTITY...,ETHYLENE,[MONOETHYLENE]
3596,EPICHLOROHYDRIN x 000 D QT Y 18400 x 000 DAS P...,EPICHLOROHYDRIN,[EPICHLOROHYDRIN]
3603,ACETONITRILE 128 M Tx 000 DAT USD 3650 PER M T...,ACETONITRILE,[ACETONITRILE]


In [23]:
get_jaccard_sim('MASS PVC RESIN B-57','PVC RESIN B-57')

0.75

In [26]:
for j in result.loc[51,'predict']:
    print(j)

PVC SUSPENSION


In [27]:
def get_acc(df):
    correct = []
    correct_label = []
    for i in df.index:
        jacs = []
        for j in df.loc[i,'predict']:
            jacs.append(get_jaccard_sim(df.loc[i,'Y_label'],j))
        if max(jacs) >= 0.75:
            correct.append('yes')
        else:
            correct.append('no')
    return correct

In [33]:
def get_jac(df):
    all_jacs = []
    for i in df.index:
        jacs = []
        for j in df.loc[i,'predict']:
            jacs.append(get_jaccard_sim(df.loc[i,'Y_label'],j))
        all_jacs.append(max(jacs))
    return np.sum(all_jacs)/len(all_jacs)

In [29]:
correct = get_acc(result)
result['correct'] = correct
result

Unnamed: 0,string_X_train,Y_label,predict,correct
0,MASS PVC RESIN B-57 QUANTITY 175 MT AT 1300 US...,MASS PVC RESIN B-57,"[PVC RESIN, RESIN , PVC RESIN B-57]",yes
1,PHTHALIC ANHYDRIDE PA QUANTITY 306 MT UNIT PRI...,PHTHALIC ANHYDRIDE,"[PHTHALIC ANHYDRIDE , PA , PHTHALIC ANHYDRIDE...",yes
2,COMMODITY LLDPE TAISOX 3470 QUANTITY 320 MT 2 ...,LLDPE TAISOX,"[LLDPE TAISOX, TAISOX ]",yes
5,ITEM 1 HDPE TAISOX 8010 200 MT USD 1100 MT CON...,HDPE TAISOX 8010,"[ TAISOX , HDPE , HDPE TAISOX, HDPE TAISOX 8010]",yes
6,ITEM 1 HDPE TAISOX 8010 200 MT USD 1100 MT CON...,HDPE TAISOX 8010,"[ TAISOX , HDPE , HDPE TAISOX, HDPE TAISOX 8010]",yes
...,...,...,...,...
3609,COMMODITY MONO ETHYLENE GLYCOL x 000 D QUANTIT...,MONO ETHYLENE GLYCOL,"[MONO ETHYLENE GLYCOL, ETHYLENE ]",yes
3610,CFR KOBE JAPAN x 000 D VISCOSE RAYON STAPLE FI...,VISCOSE RAYON STAPLE FIBER,"[ RAYON , VISCOSE RAYON STAPLE FIBER, RAYON ST...",yes
3611,DESCRIPTION OF GOODS QUANTITY MT UNIT PRICE US...,PC RESIN,"[PC RESIN, RESIN , INA ]",yes
3612,DESCRIPTION OF GOODS QUANTITY MT UNIT PRICE US...,PC RESIN,"[PC RESIN, RESIN , INA ]",yes


In [30]:
result['correct'].value_counts()

yes    2650
no      414
Name: correct, dtype: int64

In [31]:
result['correct'].value_counts()['yes']/len(val_df)

0.8648825065274152

In [34]:
get_jac(result)

0.8863507086907869

In [38]:
品名2部門

{'CAUSTIC SODA LIQUID FLAKE PEARLS MICROPEARLS': '台塑塑膠部',
 'CAUSTIC SODA LIQUID': '台塑塑膠部',
 'COMMODITY FORMOSACN': '台塑塑膠部',
 'CHLOROFORM': '台塑塑膠部',
 'ETHYLENE DICHLORIDE': '台塑塑膠部',
 'EDC': '台塑塑膠部',
 'FM130 FM190 FM070 M41 S60 S65 FM090': '台塑塑膠部',
 'FORMOSACON': '台塑塑膠部',
 'FORMOLON': '台塑塑膠部',
 'HCFC': '台塑塑膠部',
 'MBS RESIN': '台塑塑膠部',
 'MBS RESIN M-61': '台塑塑膠部',
 'MBS': '台塑塑膠部',
 'MBS M-51 M-41': '台塑塑膠部',
 'MBS RESIN M-61 MBS  M-51 M-41': '台塑塑膠部',
 'METHYLENE CHLORIDE  CH3CL CH2CL2': '台塑塑膠部',
 'MODIFIER MPACT M-61': '台塑塑膠部',
 'METHYLENE CHLORIDE ': '台塑塑膠部',
 'PROCESSING AIDS PVC PROCESSING AID': '台塑塑膠部',
 'PRO PR-415       ': '台塑塑膠部',
 'POLYVINYL CHLORIDE PASTE RESIN': '台塑塑膠部',
 'PVC ADDITIVE': '台塑塑膠部',
 'PVC COPOLYMER': '南亞塑四部化學品部',
 'PVC EMULSION PASTES PR-F PR-450': '台塑塑膠部',
 'PVC IMPACT MODIFIER M-51 M-31 M-81': '台塑塑膠部',
 'PVC RESIN': '台塑塑膠部',
 'PVC HEAT STABILIZER TM-2080': '台塑塑膠部',
 'PVC HOMOPOLYMER RESINS S-60S': '台塑塑膠部',
 'PVC PASTE RESIN': '台塑塑膠部',
 'PVC RESIN PASTE': '台塑塑膠部',
 '

In [53]:
部門_lst = []
for p_lst in tqdm(result['predict'].values):
    p = max(p_lst,key=len)
    jac_dict = {}
    for i in 品名2部門.keys():
        jac_dict[i] = get_jaccard_sim(i,p)
    部門_lst.append(品名2部門[max(jac_dict, key=jac_dict.get)])

  0%|          | 0/3064 [00:00<?, ?it/s]

In [55]:
result['預測部門'] = 部門_lst

In [56]:
result

Unnamed: 0,string_X_train,Y_label,predict,correct,預測部門
0,MASS PVC RESIN B-57 QUANTITY 175 MT AT 1300 US...,MASS PVC RESIN B-57,"[PVC RESIN, RESIN , PVC RESIN B-57]",yes,台塑塑膠部
1,PHTHALIC ANHYDRIDE PA QUANTITY 306 MT UNIT PRI...,PHTHALIC ANHYDRIDE,"[PHTHALIC ANHYDRIDE , PA , PHTHALIC ANHYDRIDE...",yes,南亞化一部
2,COMMODITY LLDPE TAISOX 3470 QUANTITY 320 MT 2 ...,LLDPE TAISOX,"[LLDPE TAISOX, TAISOX ]",yes,台塑聚烯部
5,ITEM 1 HDPE TAISOX 8010 200 MT USD 1100 MT CON...,HDPE TAISOX 8010,"[ TAISOX , HDPE , HDPE TAISOX, HDPE TAISOX 8010]",yes,台塑聚烯部
6,ITEM 1 HDPE TAISOX 8010 200 MT USD 1100 MT CON...,HDPE TAISOX 8010,"[ TAISOX , HDPE , HDPE TAISOX, HDPE TAISOX 8010]",yes,台塑聚烯部
...,...,...,...,...,...
3609,COMMODITY MONO ETHYLENE GLYCOL x 000 D QUANTIT...,MONO ETHYLENE GLYCOL,"[MONO ETHYLENE GLYCOL, ETHYLENE ]",yes,化工第三事業部
3610,CFR KOBE JAPAN x 000 D VISCOSE RAYON STAPLE FI...,VISCOSE RAYON STAPLE FIBER,"[ RAYON , VISCOSE RAYON STAPLE FIBER, RAYON ST...",yes,台化化一部
3611,DESCRIPTION OF GOODS QUANTITY MT UNIT PRICE US...,PC RESIN,"[PC RESIN, RESIN , INA ]",yes,台化塑膠事業部
3612,DESCRIPTION OF GOODS QUANTITY MT UNIT PRICE US...,PC RESIN,"[PC RESIN, RESIN , INA ]",yes,台化塑膠事業部


In [57]:
result.to_csv('submit_0726.csv')