In [40]:
import pandas as pd
import numpy as np
from tqdm import tqdm_notebook as tqdm
from torch.optim.optimizer import Optimizer
import matplotlib.pyplot as plt
from copy import deepcopy
import numpy as np
import random
import torch
from transformers import pipeline
import warnings 
warnings.filterwarnings('ignore')
from pytorch_lightning import seed_everything
from torch.utils.data import DataLoader
import os
import gc
gc.collect()

1685

# set_seed(42)

In [41]:
def set_seed(seed = int):
    '''Sets the seed of the entire notebook so results are the same every time we run.
    This is for REPRODUCIBILITY.'''
    np.random.seed(seed)
    random_state = np.random.RandomState(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    os.environ['PYTHONHASHSEED'] = str(seed)
    seed_everything(seed)
    return random_state
random_state = set_seed(42)

Global seed set to 42


# jaccard

In [42]:
def get_jaccard_sim(str1, str2): 
    a = set(str1.split()) 
    b = set(str2.split())
    c = a.intersection(b)
    return float(len(c)) / (len(a) + len(b) - len(c))

# MADGRAD

In [43]:
import math
from typing import TYPE_CHECKING, Any, Callable, Optional

if TYPE_CHECKING:
    from torch.optim.optimizer import _params_t
else:
    _params_t = Any

class MADGRAD(Optimizer):

    def __init__(
        self, params: _params_t, lr: float = 1e-2, momentum: float = 0.9, weight_decay: float = 0, eps: float = 1e-6,
    ):
        if momentum < 0 or momentum >= 1:
            raise ValueError(f"Momentum {momentum} must be in the range [0,1]")
        if lr <= 0:
            raise ValueError(f"Learning rate {lr} must be positive")
        if weight_decay < 0:
            raise ValueError(f"Weight decay {weight_decay} must be non-negative")
        if eps < 0:
            raise ValueError(f"Eps must be non-negative")

        defaults = dict(lr=lr, eps=eps, momentum=momentum, weight_decay=weight_decay)
        super().__init__(params, defaults)

    @property
    def supports_memory_efficient_fp16(self) -> bool:
        return False

    @property
    def supports_flat_params(self) -> bool:
        return True

    def step(self, closure: Optional[Callable[[], float]] = None) -> Optional[float]:

        loss = None
        if closure is not None:
            loss = closure()

        if 'k' not in self.state:
            self.state['k'] = torch.tensor([0], dtype=torch.long)
        k = self.state['k'].item()

        for group in self.param_groups:
            eps = group["eps"]
            lr = group["lr"] + eps
            decay = group["weight_decay"]
            momentum = group["momentum"]

            ck = 1 - momentum
            lamb = lr * math.pow(k + 1, 0.5)

            for p in group["params"]:
                if p.grad is None:
                    continue
                grad = p.grad.data
                state = self.state[p]

                if "grad_sum_sq" not in state:
                    state["grad_sum_sq"] = torch.zeros_like(p.data).detach()
                    state["s"] = torch.zeros_like(p.data).detach()
                    if momentum != 0:
                        state["x0"] = torch.clone(p.data).detach()

                if momentum != 0.0 and grad.is_sparse:
                    raise RuntimeError("momentum != 0 is not compatible with sparse gradients")

                grad_sum_sq = state["grad_sum_sq"]
                s = state["s"]

                # Apply weight decay
                if decay != 0:
                    if grad.is_sparse:
                        raise RuntimeError("weight_decay option is not compatible with sparse gradients")

                    grad.add_(p.data, alpha=decay)

                if grad.is_sparse:
                    grad = grad.coalesce()
                    grad_val = grad._values()

                    p_masked = p.sparse_mask(grad)
                    grad_sum_sq_masked = grad_sum_sq.sparse_mask(grad)
                    s_masked = s.sparse_mask(grad)

                    # Compute x_0 from other known quantities
                    rms_masked_vals = grad_sum_sq_masked._values().pow(1 / 3).add_(eps)
                    x0_masked_vals = p_masked._values().addcdiv(s_masked._values(), rms_masked_vals, value=1)

                    # Dense + sparse op
                    grad_sq = grad * grad
                    grad_sum_sq.add_(grad_sq, alpha=lamb)
                    grad_sum_sq_masked.add_(grad_sq, alpha=lamb)

                    rms_masked_vals = grad_sum_sq_masked._values().pow_(1 / 3).add_(eps)

                    s.add_(grad, alpha=lamb)
                    s_masked._values().add_(grad_val, alpha=lamb)

                    # update masked copy of p
                    p_kp1_masked_vals = x0_masked_vals.addcdiv(s_masked._values(), rms_masked_vals, value=-1)
                    # Copy updated masked p to dense p using an add operation
                    p_masked._values().add_(p_kp1_masked_vals, alpha=-1)
                    p.data.add_(p_masked, alpha=-1)
                else:
                    if momentum == 0:
                        # Compute x_0 from other known quantities
                        rms = grad_sum_sq.pow(1 / 3).add_(eps)
                        x0 = p.data.addcdiv(s, rms, value=1)
                    else:
                        x0 = state["x0"]

                    # Accumulate second moments
                    grad_sum_sq.addcmul_(grad, grad, value=lamb)
                    rms = grad_sum_sq.pow(1 / 3).add_(eps)

                    # Update s
                    s.data.add_(grad, alpha=lamb)

                    # Step
                    if momentum == 0:
                        p.data.copy_(x0.addcdiv(s, rms, value=-1))
                    else:
                        z = x0.addcdiv(s, rms, value=-1)

                        # p is a moving average of z
                        p.data.mul_(1 - ck).add_(z, alpha=ck)


        self.state['k'] += 1
        return loss

# LOAD DATA

In [44]:
train_df = pd.read_csv('Train_Product_Data_2021_0114.csv',index_col=0).drop(['Forward','Backward','label_for_train'],axis=1)
train_df = train_df.dropna(axis=0)
display(train_df.head(3))

val_df = pd.read_csv('Val_Product_Data_2021_0114.csv',index_col=0).drop(['Forward','Backward'],axis=1)
val_df = val_df.dropna(axis=0)
display(val_df.head(3))

Unnamed: 0,string_X_train,string_Y_1,string_Y_2,Y_label,row_id
586,"YUNGSOX 2100M 12MT USD1,015/MT USD12,180 YUNGS...",0,13,YUNGSOX 2100M,855
92,"TRIS 1,2-CYCLOHEXANE DICARBOXYLIC ACID,DI-ISON...",0,50,"TRIS 1,2-CYCLOHEXANE DICARBOXYLIC ACID,DI-ISON...",140
91,"TRIS . CIF PORT KLANG,MALAYSIA",0,4,TRIS,139


Unnamed: 0,string_X_train,string_Y_1,string_Y_2,Y_label,row_id
1,"COMMODITY: STYRENE MONOMER IN BULKQUANTITY: 3,...",11,34,STYRENE MONOMER IN BULK,1
2,"COMMODITY: STYRENE MONOMER IN BULKQUANTITY: 3,...",11,34,STYRENE MONOMER IN BULK,2
3,PP 3307UNC1 . TRADE TERMS: CFR ANY JAPANESE PORT,0,2,PP,10


# find_fail_sample and drop fail_sample

In [45]:
def find_fail_sample(df):
    fails = []
    for i in df.index:
        context = df.loc[i,'string_X_train']
        answer = df.loc[i,'Y_label']
        if answer not in context:
            fails.append(i)
    return fails
train_fails = find_fail_sample(train_df)
val_fails = find_fail_sample(val_df)
print(train_fails,val_fails)
display(val_df.loc[val_fails])
print(val_df.shape)
val_df = val_df.drop(val_fails,axis=0)
print(val_df.shape)

[] [342, 343, 344]


Unnamed: 0,string_X_train,string_Y_1,string_Y_2,Y_label,row_id
342,#NAME?,1,26,PURIFIED ISOPHTHALIC ACID,1238
343,#NAME?,1,26,PURIFIED ISOPHTHALIC ACID,1240
344,#NAME?,40,65,PURIFIED ISOPHTHALIC ACID,1241


(744, 5)
(741, 5)


# Initialization Model and Tokenizer 

In [46]:
from transformers import DistilBertTokenizerFast
from transformers import DistilBertForQuestionAnswering

tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')
model = DistilBertForQuestionAnswering.from_pretrained("distilbert-base-uncased")
gc.collect()

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForQuestionAnswering: ['vocab_transform.bias', 'vocab_transform.weight', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForQuestionAnswering were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this mode

244

# Data preprocessing pipeline (df2DataLoader)

In [47]:
def preprocessing(df):
    contexts = df['string_X_train'].values.tolist()
    questions = [ 'What is the product name?' for i in range(len(df))]
    answers = []
    for idx in df.index:
        answers.append({
            'text':df.loc[idx,'Y_label'],
            'answer_start':df.loc[idx,'string_Y_1'],
            'answer_end':df.loc[idx,'string_Y_2'],
            })
    return contexts ,questions ,answers

def add_token_positions(encodings, answers):
    start_positions = []
    end_positions = []
    for i in range(len(answers)):
        start_positions.append(encodings.char_to_token(i, answers[i]['answer_start']))
        end_positions.append(encodings.char_to_token(i, answers[i]['answer_end'] - 1))

        # if start position is None, the answer passage has been truncated
        if start_positions[-1] is None:
            start_positions[-1] = tokenizer.model_max_length
        if end_positions[-1] is None:
            end_positions[-1] = tokenizer.model_max_length

    encodings.update({'start_positions': start_positions, 'end_positions': end_positions})
    return encodings

class SquadDataset(torch.utils.data.Dataset):
    def __init__(self, encodings):
        self.encodings = encodings

    def __getitem__(self, idx):
        return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}

    def __len__(self):
        return len(self.encodings.input_ids)

def df2DataLoader(df):
    contexts ,questions ,answers = preprocessing(df)
    encodings = tokenizer(contexts, questions, truncation=True, padding=True)
    encodings = add_token_positions(encodings, answers)
    dataset = SquadDataset(encodings)
    dataloader = DataLoader(dataset, batch_size = 32, shuffle=True ,num_workers=4)
    return dataloader

# train loop

In [48]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)
model.to(device)
model.train()
train_loader = df2DataLoader(train_df)
val_loader = df2DataLoader(val_df)
optimizer = MADGRAD(model.parameters(),lr=5e-5)
gc.collect()

def train_step(model,batch,optimizer):
    model = model.to(device)
    model.train()
    # forward
    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    start_positions = batch['start_positions'].to(device)
    end_positions = batch['end_positions'].to(device)
    outputs = model(input_ids, attention_mask=attention_mask, start_positions=start_positions, end_positions=end_positions)
    loss = outputs[0]
    
    # update model
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    
    gc.collect()
    return loss.item()

def val_step(model,batch,optimizer):
    model = model.to(device)
    model.eval()
    # forward
    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    start_positions = batch['start_positions'].to(device)
    end_positions = batch['end_positions'].to(device)
    outputs = model(input_ids, attention_mask=attention_mask, start_positions=start_positions, end_positions=end_positions)
    loss = outputs[0]
    
    gc.collect()
    return loss.item()

def train_loop(model,train_loader,val_loader,optimizer,max_epochs=5,patience=3):
    history = {'train_loss':[],'val_loss':[]}
    best_loss = np.inf
    best_model = None
    not_improve_count = 0
    for epoch in tqdm(range(max_epochs)):    
        # reset this epoch loss equal to zero
        epoch_train_loss = 0.0
        epoch_val_loss = 0.0

        # train one epoch and get train_loss
        for i,batch in enumerate(tqdm(train_loader)):
            epoch_train_loss += train_step(model,batch,optimizer)

        # val one epoch and get val_loss
        for j,batch in enumerate(tqdm(val_loader)):
            epoch_val_loss += val_step(model,batch,optimizer)

        # record loss history
        history['train_loss'].append(epoch_train_loss/i)
        history['val_loss'].append(epoch_val_loss/j)

        # print this epoch's infomation
        print(f'epoch:{epoch} train_loss:{epoch_train_loss/i} val_loss:{epoch_val_loss/j}')

        # save best_model (if current val_loss <= best_loss)
        if history['val_loss'][-1] <= best_loss: 
            best_model = deepcopy(model.eval())
            best_loss = history['val_loss'][-1]
            print(f'save best_model now_val_best_loss is:{best_loss}')

        if history['val_loss'][-1] > best_loss:
            not_improve_count += 1
            print(f'not_improve_count:{not_improve_count}')
            if not_improve_count > patience:
                print('early_stoping')
                break

    # GET best_model.eval()
    model = best_model.eval()
    return model,history

cuda


# test model method

In [49]:
def test_model(nlp,df,test_n=30):
    table = pd.DataFrame()
    persudo_val_df = pd.read_csv('Val_Product_Data_2021_0114.csv',index_col=0).drop(['Forward','Backward'],axis=1)
    if test_n != None:
        idx_list = df.sample(test_n).index.tolist()
    else:
        idx_list = df.index.tolist()
    for i in tqdm(idx_list):
        sample = df.loc[[i]]
        string_X_train = sample['string_X_train'].values[0]
        string_Y_1 = sample['string_Y_1'].values[0]
        string_Y_2 = sample['string_Y_2'].values[0]
        QA_input = {
            'question': 'What is the product name?',
            'context': string_X_train
        }
        res = nlp(QA_input)
        predict = QA_input['context'][res['start']:res['end']]
        row = pd.DataFrame({
            'label':string_X_train[string_Y_1:string_Y_2],
            'predict:':predict},index=[i])
        if string_X_train[string_Y_1:string_Y_2] == predict:
            row['是否全對'] = 'Yes'
        else:
            row['是否全對'] = 'No'
        table = table.append(row)
        
        # make persudo label by nlp output 
        persudo_val_df.loc[i]['string_Y_1'] = res['start']
        persudo_val_df.loc[i]['string_Y_2'] = res['end']
        persudo_val_df.loc[i]['Y_label'] = predict
    
    jaccard_avg_score = np.mean([get_jaccard_sim(table.label[i],table['predict:'][i]) for i in table.index])
    
    acc = table['是否全對'].value_counts()['Yes']/len(table)
    
    return table ,jaccard_avg_score ,acc ,persudo_val_df

# knowledge distillation

In [50]:
gc.collect()
def knowledge_distillation(n=3):
    # initialize tokenizer ,model and train_loader
    tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')
    model = DistilBertForQuestionAnswering.from_pretrained("distilbert-base-uncased")
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    model.to(device)
    train_df_extra_loader = train_loader
    
    #knowledge_distillation loop
    for i in range(n):
        
        # 1.training model by MADGRAD optimizer
        optimizer = MADGRAD(model.parameters(),lr=5e-5)
        model,history = train_loop(model,train_df_extra_loader,val_loader,optimizer,max_epochs=4,patience=2)
        nlp = pipeline('question-answering', model=model.to('cpu'), tokenizer=tokenizer)
        
        # 2.get persudo_label(by trained_model)
        table,jaccard_avg_score,acc,persudo_val_df = test_model(nlp,val_df,test_n=42)
        
        # 3.add persudo_label to trainset
        train_df_extra_loader = df2DataLoader(train_df.append(persudo_val_df).reset_index(drop=True))
        
        model = model.to(device)
        gc.collect()

    return model.eval()

In [51]:
knowledge_distillation_model = knowledge_distillation(n=3)

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForQuestionAnswering: ['vocab_transform.bias', 'vocab_transform.weight', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForQuestionAnswering were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this mode

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/55 [00:00<?, ?it/s]

  0%|          | 0/24 [00:00<?, ?it/s]

epoch:0 train_loss:1.7057869766873341 val_loss:0.29767756844344345
save best_model now_val_best_loss is:0.29767756844344345


  0%|          | 0/55 [00:00<?, ?it/s]

  0%|          | 0/24 [00:00<?, ?it/s]

epoch:1 train_loss:0.46890841948765294 val_loss:0.29715345475984656
save best_model now_val_best_loss is:0.29715345475984656


  0%|          | 0/55 [00:00<?, ?it/s]

  0%|          | 0/24 [00:00<?, ?it/s]

epoch:2 train_loss:0.36646683624497167 val_loss:0.24843451931424762
save best_model now_val_best_loss is:0.24843451931424762


  0%|          | 0/55 [00:00<?, ?it/s]

  0%|          | 0/24 [00:00<?, ?it/s]

epoch:3 train_loss:0.31320765476535867 val_loss:0.3032576976263005
not_improve_count:1


  0%|          | 0/42 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/78 [00:00<?, ?it/s]

  0%|          | 0/24 [00:00<?, ?it/s]

epoch:0 train_loss:0.272211276768864 val_loss:0.19155878508868424
save best_model now_val_best_loss is:0.19155878508868424


  0%|          | 0/78 [00:00<?, ?it/s]

  0%|          | 0/24 [00:00<?, ?it/s]

epoch:1 train_loss:0.21633928423965132 val_loss:0.1642644013399663
save best_model now_val_best_loss is:0.1642644013399663


  0%|          | 0/78 [00:00<?, ?it/s]

  0%|          | 0/24 [00:00<?, ?it/s]

epoch:2 train_loss:0.18959422564351713 val_loss:0.11024203831734865
save best_model now_val_best_loss is:0.11024203831734865


  0%|          | 0/78 [00:00<?, ?it/s]

  0%|          | 0/24 [00:00<?, ?it/s]

epoch:3 train_loss:0.17135022375006953 val_loss:0.10054190711968619
save best_model now_val_best_loss is:0.10054190711968619


  0%|          | 0/42 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/78 [00:00<?, ?it/s]

  0%|          | 0/24 [00:00<?, ?it/s]

epoch:0 train_loss:0.13747899662461374 val_loss:0.09064356816689605
save best_model now_val_best_loss is:0.09064356816689605


  0%|          | 0/78 [00:00<?, ?it/s]

  0%|          | 0/24 [00:00<?, ?it/s]

epoch:1 train_loss:0.14814101827222032 val_loss:0.06762145089147531
save best_model now_val_best_loss is:0.06762145089147531


  0%|          | 0/78 [00:00<?, ?it/s]

  0%|          | 0/24 [00:00<?, ?it/s]

epoch:2 train_loss:0.12490494517827189 val_loss:0.06828537461874278
not_improve_count:1


  0%|          | 0/78 [00:00<?, ?it/s]

  0%|          | 0/24 [00:00<?, ?it/s]

epoch:3 train_loss:0.11964972186591719 val_loss:0.07332090393680593
not_improve_count:2


  0%|          | 0/42 [00:00<?, ?it/s]

In [52]:
nlp = pipeline('question-answering', model=knowledge_distillation_model.to('cpu'), tokenizer=tokenizer)

In [56]:
table,jaccard_avg_score,acc,persudo_val_df = test_model(nlp,val_df,test_n=len(val_df))
print(f'jaccard_avg_score:{jaccard_avg_score}')
print(f'acc:{acc}')
display(table)

  0%|          | 0/741 [00:00<?, ?it/s]

jaccard_avg_score:0.9360998650472334
acc:0.8785425101214575


Unnamed: 0,label,predict:,是否全對
463,ETHYLENE-PROPYLENE COPOLYMERS,ETHYLENE-PROPYLENE COPOLYMERS,Yes
248,BISPHENOL-ACONTRACT,BISPHENOL-ACONTRACT,Yes
364,HIGH IMPACT POLYSTYRENE RESIN,HIGH IMPACT POLYSTYRENE RESIN,Yes
569,EVA TAISOX,EVA TAISOX,Yes
392,POLYPROPYLENE IMPACT COPOLYMER,POLYPROPYLENE IMPACT COPOLYMER,Yes
...,...,...,...
383,BUTYL ACRYLATE,BUTYL ACRYLATE,Yes
419,EPOXIDIZED SOYBEAN OIL,EPOXIDIZED SOYBEAN OIL,Yes
632,ETHYL ACRYLATE,ETHYL ACRYLATE,Yes
544,POLYESTER STAPLE FIBER,POLYESTER STAPLE FIBER,Yes


# check error prediction

In [122]:
pd.options.display.max_rows = 999
display(table[table['是否全對']!='Yes'].head(30))

Unnamed: 0,label,predict:,是否全對
159,PURIFIED ISOPHTHALIC ACID,PURIFIED ISOPHTHALIC ACID20MT,No
33,EVA TAISOX 7470M,EVA TAISOX,No
652,HDPE TAISOX 9001,HDPE TAISOX 9001QUANTITY,No
240,ETHYLENE-PROPYLENE COPOLYMERS PP COPOLYMER YUN...,ETHYLENE-PROPYLENE COPOLYMERS,No
123,MONO ETHYLENE GLYCOL,MONO ETHYLENE GLYCOLQUANTITY,No
146,GLASS FIBER YARN,GLASS FIBER YARNECG75,No
135,GLASS FIBER YARN,GLASS FIBER YARNQ,No
133,COPPER CLAD LAMINATE,COPPER CLAD LAMINATES,No
429,IMPACT MODIFIER,IMPACT MODIFIER M-31,No
470,HDPE TAISOX 8001,HDPE TAISOX 8001UNIT,No


由於發現錯誤有一些固定的模式 例如後面多了 QUANTITY QTY 和 前面多了小寫n

# 做個後處理看看

In [130]:
def Post_processing(predicts):
    def remove_QUANTITY(x):
        x = x.replace('QUANTITY','')
        x = x.replace('QTY','')
        x = x.replace('n','')
        return x
    return [ remove_QUANTITY(i) for i in predicts]

new_table = table.copy()[['label','predict:']]
new_table['predict:'] = Post_processing(new_table['predict:'].values)
for i in new_table.index:
    if new_table.loc[i,'label'] == new_table.loc[i,'predict:']:
        new_table.loc[i,'是否全對'] = 'Yes'
    else:
        new_table.loc[i,'是否全對'] = 'No'    
display(new_table[table['是否全對']!='Yes'].head(30))

jaccard_avg_score = np.mean([get_jaccard_sim(new_table.label[i],new_table['predict:'][i]) for i in new_table.index])
acc = new_table['是否全對'].value_counts()['Yes']/len(new_table)
print(f'jaccard_avg_score:{jaccard_avg_score}')
print(f'acc:{acc}')

Unnamed: 0,label,predict:,是否全對
159,PURIFIED ISOPHTHALIC ACID,PURIFIED ISOPHTHALIC ACID20MT,No
33,EVA TAISOX 7470M,EVA TAISOX,No
652,HDPE TAISOX 9001,HDPE TAISOX 9001,Yes
240,ETHYLENE-PROPYLENE COPOLYMERS PP COPOLYMER YUN...,ETHYLENE-PROPYLENE COPOLYMERS,No
123,MONO ETHYLENE GLYCOL,MONO ETHYLENE GLYCOL,Yes
146,GLASS FIBER YARN,GLASS FIBER YARNECG75,No
135,GLASS FIBER YARN,GLASS FIBER YARNQ,No
133,COPPER CLAD LAMINATE,COPPER CLAD LAMINATES,No
429,IMPACT MODIFIER,IMPACT MODIFIER M-31,No
470,HDPE TAISOX 8001,HDPE TAISOX 8001UNIT,No


jaccard_avg_score:0.9502249212775528
acc:0.9055330634278003


# save model

In [131]:
torch.save(knowledge_distillation_model.state_dict(),'Product_Data_SQuAD_model.pt')