In [5]:
import argparse
import json
from copy import deepcopy
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.style.use("ggplot")

from Utils.dataset import *
from Utils.utils import *
from Models.BertClf import *
from Models.LstmClf import *
from Models.ElectraClf import *
device = torch.device("cuda:" + str(0)) if torch.cuda.is_available() else torch.device("cpu")
print(f'training device: {device, torch.cuda.get_device_name()}')

training device: (device(type='cuda', index=0), 'GeForce GTX 1660 Ti')


In [6]:
class Select_Dataset(Dataset):
    """Create the valid_dataset
    """
    def __init__(self, opt,df):
        # Load
        valid_dataset_df = df
        valid_X_arr = valid_dataset_df.Sentence.values
        valid_y_arr = valid_dataset_df.Category.values
        # Preprocessing
        # Run function `preprocessing_for_bert` on the train set and the validation set
        print('Tokenizing data...')
        valid_X_ids_tsr, valid_X_masks_tsr = preprocessing_for_bert(valid_X_arr, opt)
        print(f"valid_X_ids_tsr.shape: {valid_X_ids_tsr.shape}\nvalid_X_masks_tsr.shape: {valid_X_masks_tsr.shape}")
        
        self.ids_tsr = valid_X_ids_tsr
        self.masks_tsr = valid_X_masks_tsr
        self.labels = torch.LongTensor(valid_y_arr)
        self.df=valid_dataset_df
                
    def __getitem__(self, index):
        ids_tsr = self.ids_tsr[index]
        masks_tsr = self.masks_tsr[index]
        label_tsr = self.labels[index]
        return [ids_tsr, masks_tsr, label_tsr]

    def __len__(self):
        return len(self.ids_tsr)

## Total code

In [7]:
option=2

In [8]:
signature = "jh_BILSTM_6M_1D_15H_10M"

In [78]:
# Define signature
signature = "jh_BERT_6M_1D_20H_8M"

In [25]:
# Define signature
#signature = "jh_ELECTRA_5M_31D_17H_5M"
signature = "sw_focal_ELECTRA_6M_1D_18H_48M"

In [26]:
# Load options
parser = argparse.ArgumentParser()
#     opt = parser.parse_args() # in .py env
opt, _ = parser.parse_known_args() # in .ipynb env
with open('./Saved_models/' + signature + '_opt.txt', 'r') as f:
    opt.__dict__ = json.load(f)
print(opt)

Namespace(aug=0, author='sw_focal', batch_size=16, data_path='./Dataset', dropout=0.5, embedding_dim=256, eps=1e-08, freeze_pretrained=0, gpu=0, hidden_dim=768, kernel_depth=500, kernel_sizes=[3, 4, 5], lr_clf=0.0001, lr_pretrained=1e-05, max_epoch=30, max_len=50, model='ELECTRA', num_layer=2, save=1, save_model_path='./Saved_models', save_submission_path='./Submissions', sent_embedding=0, signature='sw_focal_ELECTRA_6M_1D_18H_48M', split_ratio=1, weight_decay=0.0005)


In [27]:
if option==1:
    ## electra만 맞힌 경우   -> OPTION 1
    df=pd.read_csv('./Dataset/ELECTRA_CORRECT.csv')
    df_class=Select_Dataset(opt,df)
elif option==2:
    #LSTM만 맞힌 경우    -> OPTION 2
    df=pd.read_csv('./Dataset/BILSTM_CORRECT.csv')
    df_class=Select_Dataset(opt,df)
elif option==3:
    ## 세개 다 맞힌 경우   -> OPTION 3
    df=pd.read_csv('./Dataset/BERT_ELECTRA_BILSTM_CORRECT.csv')
    df_class=Select_Dataset(opt,df)

Tokenizing data...
Apply the ElectraTokenizer...


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


valid_X_ids_tsr.shape: torch.Size([574, 50])
valid_X_masks_tsr.shape: torch.Size([574, 50])


In [28]:
# Load untrained model
if opt.model == "BERT":
    model = BertClassifier(opt).to(device)
elif opt.model == "ELECTRA":
    model = ElectraClassifier(opt).to(device)
elif opt.model == "BILSTM":
    model = LstmClassifier(opt,30522).to(device)
elif opt.model =='ConvClassifier':
    model = ConvClassifier(opt,30522).to(device)

In [29]:
# Load trained model
model_save_path = str(opt.save_model_path) + "/" + opt.signature +'.model'
model.load_state_dict(torch.load(model_save_path))
model.to(device)

ElectraClassifier(
  (electra): ElectraModel(
    (embeddings): ElectraEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): ElectraEncoder(
      (layer): ModuleList(
        (0): ElectraLayer(
          (attention): ElectraAttention(
            (self): ElectraSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): ElectraSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [30]:
# Load the DataLoaders
#train_dataloader, valid_dataloader, _ = data_load(opt)

In [31]:
train_dataloader = DataLoader(df_class,batch_size=opt.batch_size,shuffle=False)

In [32]:
attention_score=[]
attention_score_idx=[]
b_ids_tsr_list=[]
for step, batch in enumerate(train_dataloader):  ####수정 필요
        # Load batch to GPU
    b_ids_tsr, b_masks_tsr = tuple(tsrs.to(device) for tsrs in batch)[:2]
    b_ids_tsr_list.extend(b_ids_tsr)
    
    if opt.model == "BERT":
        output=model.extract_sent_feature(b_ids_tsr,b_masks_tsr)
        batch_size=output[3][0].shape[0]
        for j in range(batch_size):
            check=[]
            for i in range(len(output[3])):  
                check.append(output[3][i][j].detach().cpu().numpy()) ### batch별로 뽑아내는 코드(i 뒤에 있는 숫자)
            check=np.array(check) #(layer, num_heads, sequence_length, sequence_length)
            check_tensor=torch.Tensor(check)
            attentions = check_tensor.permute(2,1,0,3) #(sequence_length, num_heads, layer, sequence_length)
            attentions_pos = attentions[0] # CLS_token (num_heads, layer, sequence_length)
            attentions_pos=attentions_pos[:,-1,:] # last layer (num_heads, sequence_length)
            attention_score.append(attentions_pos.mean(dim=0).detach().cpu().numpy())  #(batch_size,sequence_length)
            attention_score_idx.append(attentions_pos.mean(dim=0).detach().cpu().numpy().argsort()[::-1])  
    
    elif opt.model == "ELECTRA":
        output=model.extract_sent_feature(b_ids_tsr,b_masks_tsr)
        batch_size=output[2][0].shape[0]
        for j in range(batch_size):
            check=[]
            for i in range(len(output[2])):  
                check.append(output[2][i][j].detach().cpu().numpy()) ### batch별로 뽑아내는 코드(i 뒤에 있는 숫자)
            check=np.array(check) #(layer, num_heads, sequence_length, sequence_length)
            check_tensor=torch.Tensor(check)
            attentions = check_tensor.permute(2,1,0,3) #(sequence_length, num_heads, layer, sequence_length)
            attentions_pos = attentions[0] # CLS_token (num_heads, layer, sequence_length)
            attentions_pos=attentions_pos[:,-1,:] # last layer (num_heads, sequence_length)
            attention_score.append(attentions_pos.mean(dim=0).detach().cpu().numpy())  #(batch_size,sequence_length)
            attention_score_idx.append(attentions_pos.mean(dim=0).detach().cpu().numpy().argsort()[::-1])
    
    elif opt.model == "BILSTM":
        output, final_hidden_state=model.extract_sent_feature(b_ids_tsr)
        attention_score.extend(model.attention_net(output, final_hidden_state).detach().cpu().numpy())
    elif opt.model=='CNN':
        output=model.extract_sent_feature(b_ids_tsr)

In [37]:
attention_score_idx_sel=[]
if opt.model in ["BERT","ELECTRA"]:
    for i,j in enumerate(attention_score):
        #attention_mean =attention_score[i].sum()/len(attention_score[i][attention_score[i]>0])
        #attention_score_idx_sel.append(np.where(attention_score[i]>attention_mean))
        attention_score_idx_sel.append(attention_score[i].argsort()[::-1][:15])
        
elif opt.model=='BILSTM':
    for i,j in enumerate(attention_score):
        #attention_mean =attention_score[i].sum()/len(attention_score[i][attention_score[i]>0])
        #attention_score_idx_sel.append(np.where(attention_score[i]>attention_mean))
        attention_score_idx_sel.append(np.sort(attention_score[i].argsort()[::-1][:15]))
        #attention_score_idx_sel.append(attention_score[i].argsort()[::-1][:15])
        

In [None]:
if opt.model in ["BILSTM", "BERT", "CNN"]:
    print("Apply the BertTokenizer...")
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
elif opt.model == "ELECTRA":
    print("Apply the ElectraTokenizer...")
    tokenizer = ElectraTokenizer.from_pretrained('google/electra-base-discriminator')

In [38]:
sentence_word_token_e=[]
for i,j in enumerate(attention_score_idx_sel):
    sentence_word_token_e.append(tokenizer.decode(b_ids_tsr_list[i].detach().cpu().numpy()[attention_score_idx_sel[i]]))

## Option 1
### bert

In [49]:
sentence_word_token

['[CLS] so genial isce, those that you throughout, dearly hoping. [SEP]',
 '[CLS] does has some value - well like [SEP]',
 '[CLS] our, but it grows monotonous after a, as do s. [SEP]',
 '[CLS] one of but fails to to either in three of screen time.',
 '[CLS] a medcre exercise,. [SEP]',
 'that works',
 '[CLS] excessive tires, the, has its handful ofeeming features. [SEP]',
 '[CLS] pity is that responsible did t and retle it direct - video and send it its home. [SEP]',
 '[CLS] halfway, having,tates into a gross - out with that are more. [SEP]',
 '[CLS], but he feels like a. [SEP]',
 "it's better than",
 'is ok to a apparatus, and human [SEP]',
 '[CLS] an untable look, prism. [SEP]',
 '[CLS] hasolate, but a, manages to infuse with flashes warmth gentle humor. [SEP]',
 'another how is losing. [SEP]',
 '[CLS]uchi vividly captures, chafing against manic and. [SEP]',
 "[CLS] it's an interesting effort, and barry's cold - act makes worthwhile. [SEP]",
 '[CLS] an instance. [SEP]',
 "[CLS] would 

### Electra

In [26]:
sentence_word_token

['[CLS] soial isceit, is for throughout, the. [SEP]',
 '[CLS] does - how much on. [SEP]',
 "[CLS] there's violence of in hold attention butous as do's. [SEP]",
 '[CLS] one of those spannings fails to in time. [SEP]',
 '[CLS] a exercise in, unaware that s of. [SEP]',
 'that.',
 '[CLS] thoughome, the principle, as be long. [SEP]',
 "[CLS] the great is that didn't - retitle it video to. [SEP]",
 '[CLS] halfway through, however, dry, ii are. [SEP]',
 '[CLS] nohe has documentary he. [SEP]',
 's than menace.',
 '[CLS] it ok to be sitcom apparatus,. [SEP]',
 '[CLS] angettable at of that omnibus called marriage. [SEP]',
 '[CLS] skins has a desolate air, but eyre, bye the. [SEP]',
 '... of.',
 "[CLS]uchily captures now, chafing against'snessyingsse. [SEP]",
 "[CLS]'s effort -rb and's act worthwhile. [SEP]",
 '[CLS] an instance of dogventing. [SEP]',
 "[CLS] american as wouldn't exist without the precedent fun s sake to of. [SEP]",
 '[CLS], kissing jessica stein is movies. [SEP]',
 '[CLS] whatli

In [31]:
df[(df['e_pred']==4)&(df['l_pred']==3)]

Unnamed: 0,Id,Category,Sentence,b_pred,e_pred,l_pred
29,10060,4,"Bittersweet comedy\/drama full of life , hand ...",3,4,3
58,2606,4,"A compelling , gut-clutching piece of advocacy...",4,4,3
67,8340,4,A conventional but heartwarming tale .,3,4,3
104,11213,4,"A classy , sprightly spin on film .",3,4,3
146,1388,4,A riveting documentary .,4,4,3
168,7286,4,"Great performances , stylish cinematography an...",4,4,3
172,148,4,"An unsettling , memorable cinematic experience...",4,4,3
233,2548,4,A compelling motion picture that illustrates a...,4,4,3
235,6719,4,classic cinema served up with heart and humor,4,4,3
276,5979,4,A truly wonderful tale combined with stunning ...,4,4,3


In [86]:
df['Sentence'].values[235]

'classic cinema served up with heart and humor'

In [87]:
sentence_word_token[235]

'[CLS] up with heart humor [SEP]'

In [85]:
sentence_word_token_e[235]

'up humor with heart [SEP] [CLS] served and cinema classic [PAD] [PAD] [PAD] [PAD] [PAD]'

In [88]:
sentence_word_token_l[235]

'[CLS] classic cinema served up with heart and [PAD] [PAD] [PAD] [PAD] [PAD] humor [PAD]'

### bilstm

In [75]:
sentence_word_token_l[151]

'[CLS] a compelling motion picture that illustrates [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]'

In [14]:
sentence_word_token

['[CLS] so genial is those of the one rare [PAD] [PAD] [PAD] [PAD] [PAD]',
 '[CLS] the movie does has some entertainment - how value much depends on [PAD] [PAD]',
 "[CLS] there'[PAD] [PAD] s lots of and a [PAD] [SEP] to a bid",
 '[CLS] one of those to be decadess intimate strive and do to fails justice',
 '[CLS] a mediocre exercise [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]',
 '[CLS] that works. [PAD] [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]',
 '[CLS] though excessively tiresomely pre, verbalten [PAD] [PAD] may [PAD]',
 '[CLS] the great pity is that [PAD] [PAD] [PAD] [PAD] those [SEP] [PAD] the responsible',
 '[CLS] halfway through, however, sucked having dry the undead scary [PAD] [PAD].',
 "[CLS] nohe has made a decent ` intro'[PAD] [PAD] documentary [PAD] [PAD]",
 "[CLS] it's better than the [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] phantom",
 '[CLS] it is ok for be something to a of movie [PAD] [PAD] [PAD] [PAD]',
 '[CLS] an unforgettable look at throu

## Option 2
### bert

In [86]:
sentence_word_token

['[CLS] is still a [SEP]',
 '[CLS] more, is carried less than by its that holds after. [SEP]',
 'a winning. [SEP]',
 '[CLS] for all its down is a strong, character - oriented piece [SEP]',
 "[CLS] this is not movies'to watch a. [SEP]",
 'but in end, do n t gel [SEP]',
 '[CLS], but reccles the premise [SEP]',
 ', you can medcre cresting next wave [SEP]',
 '[CLS] it would have become a, those so bad it. [SEP]',
 '[CLS] even summer, most restless deserves dignity something. [SEP]',
 '[CLS], zero, too many and a make for a bad [SEP]',
 '##nis is perhaps too effective in. [SEP]',
 'about belongs nicholson [SEP]',
 "[CLS] what makes compelling, besides its terrific, is fontaine's willingness to. [SEP]",
 '[CLS] in captured. [SEP]',
 'is too savy a to let into. [SEP]',
 '[CLS] is, the should have been made for the tube. [SEP]',
 'this thing is virtually [SEP]',
 'has the died strung -. [SEP]',
 "do't seem much. [SEP]",
 '[CLS] it cutting her, but who else engaged today is so cot [SEP]',
 'abo

### Electra

In [38]:
sentence_word_token

['[CLS] years later, t.stone. [SEP]',
 'than, e t. up.',
 '[CLS] winning. [SEP]',
 '[CLS] for all twists, and on down blood work. [SEP]',
 "[CLS] is'watch if live. [SEP]",
 '[CLS] like about -, end, t. [SEP]',
 '[CLS] ` creates acles. [SEP]',
 'the is t but see.',
 "[CLS] if des up, of's to. [SEP]",
 '[CLS] even, the most audience by. [SEP]',
 '[CLS] noop, see, make for bad. [SEP]',
 '[CLS] caconis is perhaps effective creatingtility. [SEP]',
 'to.',
 "[CLS] what how i killed father, is fontaine's to intoing. [SEP]",
 '.',
 '[CLS] byler too toph into. [SEP]',
 '[CLS] given the fact that to for, for the tube. [SEP]',
 'isable.',
 '[CLS] the together. [SEP]',
 "[CLS] the ups ups don't characters. [SEP]",
 '[CLS] it may not be or weekend and two or things i know her, but who else moral [SEP]',
 'is red.',
 '... impact dayfoldsble.',
 '[CLS] cut through the find a scathing portrayal of a stling out of in. [SEP]',
 '... film that have baffled.',
 '[CLS] but silence, s. [SEP]',
 '..., does t

In [160]:
df[(df['e_pred']==3)&(df['l_pred']==4)][130:]

Unnamed: 0,Id,Category,Sentence,b_pred,e_pred,l_pred
513,10291,4,It 's a hoot watching The Rock chomp on jumbo ...,4,3,4
519,10335,4,"If you can get past the taboo subject matter ,...",4,3,4
522,10360,4,Even better than the first one !,4,3,4
528,10558,4,The Pianist is Polanski 's best film .,4,3,4
529,10617,4,One regards Reign of Fire with awe .,4,3,4
530,10619,4,Amazing !,4,3,4
534,10652,4,Steven Spielberg brings us another masterpiece,4,3,4
535,10653,4,"Fairy-tale formula , serves as a paper skeleto...",4,3,4
537,10692,4,Rifkin 's references are ... impeccable throug...,4,3,4
538,10708,4,"As quiet , patient and tenacious as Mr. Lopez ...",4,3,4


In [161]:
df['Sentence'].values[570]

'The script by David Koepp is perfectly serviceable and because he gives the story some soul ... he elevates the experience to a more mythic level .'

In [162]:
sentence_word_token[570]

'[CLS] thepable and because...evates. [SEP]'

In [163]:
sentence_word_token_e[570]

'.able.eva because [SEP] [CLS]tes.p. the and service script'

In [164]:
sentence_word_token_l[570]

'[CLS] the script by david koepp is the experience to a [PAD] [PAD]'

# bilstm

In [25]:
sentence_word_token

['[CLS] twenty years is. t still later. [SEP] [PAD] [PAD] [PAD] [PAD] [PAD]',
 '[CLS] more intimate than spectacular factors by wow less than carried is [PAD]. t',
 '[CLS] a winning and wildly [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]',
 '[CLS] for all its plot twists, and of them some blood verge, on',
 "[CLS] this is not one of the movies d [PAD] you want '. [PAD]",
 '[CLS] there are to things like about murder by numbers - - but, in',
 '[CLS] ` men in black [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]',
 '[CLS] the movie is nio. wave next the oning crestcre can',
 '[CLS] if de of those oneuce movies wilds had a notch, that',
 '[CLS] even in the the summer most,time restless young audience [PAD] [PAD] [PAD]',
 '[CLS] no who bigop nothing, new to see, zero thrill toos',
 '[CLS] cacoyannis is perhaps - dust cake of tood [PAD] [PAD]',
 '[CLS] about schmidt belongs to nicholson [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]. [PAD] [PAD]',
 '[CLS] what makes how i fa

In [165]:
sentence_word_token_l_n[570]

'[CLS] the script by david ko to experienceepp the is [PAD] [PAD] a'

## Before
## Option 1
### bert

In [41]:
sentence_word_token

['[CLS] so genial isce, those that you throughout, dearly hoping. [SEP]',
 '[CLS] does has some value - well like [SEP]',
 '[CLS] our, but it grows monotonous after a, as do s. [SEP]',
 '[CLS] one of but fails to to either in three of screen time.',
 '[CLS] a medcre exercise,. [SEP]',
 'that works',
 '[CLS] excessive tires, the, has its handful ofeeming features. [SEP]',
 '[CLS] pity is that responsible did t and retle it direct - video and send it its home. [SEP]',
 '[CLS] halfway, having,tates into a gross - out with that are more. [SEP]',
 '[CLS], but he feels like a. [SEP]',
 "it's better than",
 'is ok to a apparatus, and human [SEP]',
 '[CLS] an untable look, prism. [SEP]',
 '[CLS] hasolate, but a, manages to infuse with flashes warmth gentle humor. [SEP]',
 'another how is losing. [SEP]',
 '[CLS]uchi vividly captures, chafing against manic and. [SEP]',
 "[CLS] it's an interesting effort, and barry's cold - act makes worthwhile. [SEP]",
 '[CLS] an instance. [SEP]',
 "[CLS] would 

### Electra

In [50]:
sentence_word_token

['[CLS] so genial isce, those that you throughout, dearly hoping. [SEP]',
 '[CLS] does has some value - well like [SEP]',
 '[CLS] our, but it grows monotonous after a, as do s. [SEP]',
 '[CLS] one of but fails to to either in three of screen time.',
 '[CLS] a medcre exercise,. [SEP]',
 'that works',
 '[CLS] excessive tires, the, has its handful ofeeming features. [SEP]',
 '[CLS] pity is that responsible did t and retle it direct - video and send it its home. [SEP]',
 '[CLS] halfway, having,tates into a gross - out with that are more. [SEP]',
 '[CLS], but he feels like a. [SEP]',
 "it's better than",
 'is ok to a apparatus, and human [SEP]',
 '[CLS] an untable look, prism. [SEP]',
 '[CLS] hasolate, but a, manages to infuse with flashes warmth gentle humor. [SEP]',
 'another how is losing. [SEP]',
 '[CLS]uchi vividly captures, chafing against manic and. [SEP]',
 "[CLS] it's an interesting effort, and barry's cold - act makes worthwhile. [SEP]",
 '[CLS] an instance. [SEP]',
 "[CLS] would 

### bilstm

In [77]:
sentence_word_token

['[CLS] so genial is those of the one rare [PAD] [PAD] [PAD] [PAD] [PAD]',
 '[CLS] the movie does has some entertainment - how value much depends on [PAD] [PAD]',
 "[CLS] there'[PAD] [PAD] s lots of and a [PAD] [SEP] to a bid",
 '[CLS] one of those to be decadess intimate strive and do to fails justice',
 '[CLS] a mediocre exercise [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]',
 '[CLS] that works. [PAD] [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]',
 '[CLS] though excessively tiresomely pre, verbalten [PAD] [PAD] may [PAD]',
 '[CLS] the great pity is that [PAD] [PAD] [PAD] [PAD] those [SEP] [PAD] the responsible',
 '[CLS] halfway through, however, sucked having dry the undead scary [PAD] [PAD].',
 "[CLS] nohe has made a decent ` intro'[PAD] [PAD] documentary [PAD] [PAD]",
 "[CLS] it's better than the [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] phantom",
 '[CLS] it is ok for be something to a of movie [PAD] [PAD] [PAD] [PAD]',
 '[CLS] an unforgettable look at throu

## Option 2
### bert

In [16]:
sentence_word_token

['[CLS] later, t.. [SEP]',
 '[CLS],. t. up after. [SEP]',
 '[CLS]. [SEP]',
 '[CLS] for, and on down, blood work. [SEP]',
 "[CLS] is'live. [SEP]",
 "[CLS] to -, the end, don't. [SEP]",
 '[CLS] `b,cles. [SEP]',
 "[CLS] isn't can see. [SEP]",
 "[CLS] if des up,'s good. [SEP]",
 '[CLS] even, most hero by. [SEP]',
 '[CLS] no bigop, see, make for. [SEP]',
 '[CLS] caconis is perhaps effective in. [SEP]',
 '[CLS]. [SEP]',
 "[CLS] what how i, is's toing. [SEP]",
 '[CLS]. [SEP]',
 '[CLS] byler too toph into. [SEP]',
 '[CLS] given the fact it,. [SEP]',
 '[CLS]. [SEP]',
 '[CLS] has the together. [SEP]',
 "[CLS] the don't. [SEP]",
 '[CLS] be and who else moral [SEP]',
 '[CLS] is reding. [SEP]',
 '[CLS]... oncefolds. [SEP]',
 '[CLS] cut through find of out in it. [SEP]',
 '[CLS]... [SEP]',
 "[CLS] but,'s. [SEP]",
 "[CLS]..., doesn't. [SEP]",
 '[CLS] the a is distinguishing in. [SEP]',
 "[CLS] out.., a of's. [SEP]",
 "[CLS] while serving sara does in,'s to match. [SEP]",
 "[CLS]'sing that,'s also, to

### Electra

In [28]:
sentence_word_token

['[CLS] later, t.. [SEP]',
 '[CLS],. t. up after. [SEP]',
 '[CLS]. [SEP]',
 '[CLS] for, and on down, blood work. [SEP]',
 "[CLS] is'live. [SEP]",
 "[CLS] to -, the end, don't. [SEP]",
 '[CLS] `b,cles. [SEP]',
 "[CLS] isn't can see. [SEP]",
 "[CLS] if des up,'s good. [SEP]",
 '[CLS] even, most hero by. [SEP]',
 '[CLS] no bigop, see, make for. [SEP]',
 '[CLS] caconis is perhaps effective in. [SEP]',
 '[CLS]. [SEP]',
 "[CLS] what how i, is's toing. [SEP]",
 '[CLS]. [SEP]',
 '[CLS] byler too toph into. [SEP]',
 '[CLS] given the fact it,. [SEP]',
 '[CLS]. [SEP]',
 '[CLS] has the together. [SEP]',
 "[CLS] the don't. [SEP]",
 '[CLS] be and who else moral [SEP]',
 '[CLS] is reding. [SEP]',
 '[CLS]... oncefolds. [SEP]',
 '[CLS] cut through find of out in it. [SEP]',
 '[CLS]... [SEP]',
 "[CLS] but,'s. [SEP]",
 "[CLS]..., doesn't. [SEP]",
 '[CLS] the a is distinguishing in. [SEP]',
 "[CLS] out.., a of's. [SEP]",
 "[CLS] while serving sara does in,'s to match. [SEP]",
 "[CLS]'sing that,'s also, to

### bilstm

In [123]:
sentence_word_token

['[CLS] twenty years is. t still later. [SEP] [PAD] [PAD] [PAD] [PAD] [PAD]',
 '[CLS] more intimate than spectacular factors by wow less than carried is [PAD]. t',
 '[CLS] a winning and wildly [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]',
 '[CLS] for all its plot twists, and of them some blood verge, on',
 "[CLS] this is not one of the movies d [PAD] you want '. [PAD]",
 '[CLS] there are to things like about murder by numbers - - but, in',
 '[CLS] ` men in black [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]',
 '[CLS] the movie is nio. wave next the oning crestcre can',
 '[CLS] if de of those oneuce movies wilds had a notch, that',
 '[CLS] even in the the summer most,time restless young audience [PAD] [PAD] [PAD]',
 '[CLS] no who bigop nothing, new to see, zero thrill toos',
 '[CLS] cacoyannis is perhaps - dust cake of tood [PAD] [PAD]',
 '[CLS] about schmidt belongs to nicholson [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]. [PAD] [PAD]',
 '[CLS] what makes how i fa

In [92]:
df[df['Category']==0]

Unnamed: 0,Id,Category,Sentence,b_pred,e_pred,l_pred
4,9390,0,This is not one of the movies you 'd want to w...,1,1,0
10,258,0,"No big whoop , nothing new to see , zero thril...",1,1,0
16,195,0,Given the fact that virtually no one is bound ...,1,1,0
17,194,0,This thing is virtually unwatchable .,1,1,0
18,2447,0,Has the disjointed feel of a bunch of strung-t...,1,1,0
21,6468,0,There is nothing redeeming about this movie .,1,1,0
27,6954,0,The only thing that distinguishes a Randall Wa...,1,1,0
33,4502,0,Despite the fact that this film was n't as bad...,1,1,0
34,11092,0,An artsploitation movie with too much exploita...,1,1,0


## Option 3
### bert

In [73]:
sentence_word_token

['[CLS]rous but carl adds enough and to make entertaining [SEP]',
 ', manages depressing as the phones in his. [SEP]',
 'the strength film lies in its two central performances sven and. [SEP]',
 'asty. [SEP]',
 '[CLS] angel presents partly, and infuses the film with thesibility [SEP]',
 'a terrific whatever. [SEP]',
 'too sap. [SEP]',
 '[CLS] while been, he has least one more : his own. [SEP]',
 "[CLS] despite, that deserves a huge amount the credit for the film's thoroughly winning tone. [SEP]",
 '[CLS] but sets itself apart by forming a that circle on a positive - [SEP]',
 '[CLS] one best looking and animated. [SEP]',
 'a assured and work of spare and [SEP]',
 '[CLS] will the funny, even touching story. [SEP]',
 '[CLS] the is like redone by someone ignored favor of old paperbacks. [SEP]',
 'has been written so well that evendam [SEP]',
 '##ous. [SEP]',
 's not [SEP]',
 '[CLS] it does, and uses as a. [SEP]',
 '[CLS] s as,, so he just slopped ` together here. [SEP]',
 '[CLS] royals hav

### Electra

In [60]:
sentence_word_token

['[CLS] lurous, butes. [SEP]',
 '[CLS]ting, manages as. [SEP]',
 '[CLS] the of in byter wife. [SEP]',
 '[CLS]. [SEP]',
 '[CLS] angel,es particularly nightmar. [SEP]',
 '[CLS]. [SEP]',
 '[CLS]... [SEP]',
 '[CLS] while the old evans been. [SEP]',
 "[CLS] despitesson's profile name beingabi's point, thatk of the. [SEP]",
 '[CLS] been. s in positive - lrb - [SEP]',
 '[CLS] while... [SEP]',
 '[CLS]s breath of. [SEP]',
 '[CLS] with even. [SEP]',
 "[CLS] the movie is by who of `'with like. [SEP]",
 "[CLS] ` `'' that!'' [SEP]",
 '[CLS]. [SEP]',
 "[CLS]'s. [SEP]",
 "[CLS] it doesn't. [SEP]",
 "[CLS]'s, couldn't out he ` em here. [SEP]",
 '[CLS] to. [SEP]',
 '[CLS] with,. [SEP]',
 '[CLS]. [SEP]',
 "[CLS] this doesn't. [SEP]",
 '[CLS] be are included [SEP]',
 "[CLS] itically, somehow under the that ` `'' is even. [SEP]",
 '[CLS] resurrection has distinction. [SEP]',
 '[CLS] two by too. [SEP]',
 '[CLS] the. [SEP]',
 "[CLS] while, is one, and you've it to for biting off the out. [SEP]",
 '[CLS] a. 

### bilstm

In [48]:
sentence_word_token

['[CLS] ludicrous, but [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]',
 '[CLS] quitting, however, manages just performance [PAD] [PAD] [PAD] to. [PAD]',
 '[CLS] the strength of the film woll lies sventer in its two by',
 '[CLS] a tasty masala [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]',
 '[CLS] angel presents events partly from theelle of perspective and christ aurelie',
 '[CLS] a terrific date movie [PAD] [PAD] [PAD], [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]',
 '[CLS]... too sappy [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]',
 '[CLS] while the now 72 - year old robert - evans [PAD] [PAD] [PAD] [PAD]',
 "[CLS] despite [PAD] be [PAD]sson's [PAD] high [SEP] - profile [PAD] name",
 '[CLS] there cast has ensemble romance [SEP] been ofs apart itself by a sets',
 '[CLS] one of the best looking a [PAD] [PAD] [PAD] [SEP]... while',
 '[CLS]s jeff has created atakingly assured breath and stylish work',
 '[CLS] kids five fast and the, up with delighted be will funny, [PAD]',
 '[

In [83]:
df[df['Category']==0]

Unnamed: 0,Id,Category,Sentence,b_pred,e_pred,l_pred


In [154]:
df_bert = pd.read_csv('./Dataset/BERT_pred.csv')
df_electra=pd.read_csv('./Dataset/ELECTRA_pred.csv')
df_bilstm=pd.read_csv('./Dataset/BILSTM_pred.csv')

In [155]:
df_bert.rename(columns = {'Pred': 'b_pred'}, inplace = True)
df_electra.rename(columns = {'Pred': 'e_pred'}, inplace = True)
df_bilstm.rename(columns = {'Pred': 'l_pred'}, inplace = True)

In [156]:
merge_bert_electra=pd.merge(df_bert,df_electra,how='inner',on=['Sentence','Category','Id'])
merge_total=pd.merge(merge_bert_electra,df_bilstm,how='inner',on=['Sentence','Category','Id'])

In [157]:
## electra와 bilstm이 같이 맞히는거
merge=merge_total[merge_total['e_pred']==merge_total['l_pred']] ## bert와 electra 예측 같은 경우
merge=merge[merge['l_pred']==merge['Category']]

In [137]:
merge[merge['Category']==3]

Unnamed: 0,Id,Category,Sentence,b_pred,e_pred,l_pred
0,275,3,"Ludicrous , but director Carl Franklin adds en...",3,3,3
3,1715,3,The strength of the film lies in its two centr...,3,3,3
5,789,3,A tasty masala .,3,3,3
10,384,3,A surprisingly charming and even witty match f...,4,3,3
17,10500,3,Angel presents events partly from the perspect...,3,3,3
...,...,...,...,...,...,...
1725,11237,3,A weird little movie that 's amusing enough wh...,3,3,3
1730,11291,3,"It 's a spectacular performance - ahem , we ho...",3,3,3
1736,11370,3,"Poetic , heartbreaking .",3,3,3
1739,11385,3,"High drama , Disney-style - a wing and a praye...",3,3,3


In [126]:
merge[merge['Category']==0]

Unnamed: 0,Id,Category,Sentence,b_pred,e_pred,l_pred
181,5335,0,"The notion of deleting emotion from people , e...",0,1,0
298,612,0,The movie is so resolutely cobbled together ou...,0,1,0
340,4143,0,A 94-minute travesty of unparalleled proportio...,0,1,0
482,8888,0,It 's mired in a shabby script that piles laye...,0,1,0
490,2412,0,"A loud , ugly , irritating movie without any o...",0,1,0
...,...,...,...,...,...,...
1692,10934,0,But the movie that does n't really deliver for...,0,1,0
1699,10959,0,"So muddled , repetitive and ragged that it say...",0,1,0
1712,11095,0,The problem is that it is one that allows him ...,0,1,0
1714,11114,0,There 's only one way to kill Michael Myers fo...,0,1,0


In [158]:
merge_total[merge_total['Category']==0]

Unnamed: 0,Id,Category,Sentence,b_pred,e_pred,l_pred
1,7037,0,It 's hard to understand why anyone in his rig...,0,1,3
28,5930,0,"When it comes out on video , then it 's the pe...",3,3,3
51,9231,0,Just plain bad .,1,1,1
53,1350,0,More of the same old garbage Hollywood has bee...,1,1,2
56,870,0,May cause you to bite your tongue to keep from...,1,1,3
...,...,...,...,...,...,...
1692,10934,0,But the movie that does n't really deliver for...,0,1,0
1699,10959,0,"So muddled , repetitive and ragged that it say...",0,1,0
1712,11095,0,The problem is that it is one that allows him ...,0,1,0
1714,11114,0,There 's only one way to kill Michael Myers fo...,0,1,0


In [159]:
check=merge_total[merge_total['Category']==merge_total['e_pred']]

In [160]:
check[check['Category']==0]

Unnamed: 0,Id,Category,Sentence,b_pred,e_pred,l_pred


In [150]:
check=merge_total[merge_total['Category']==merge_total['e_pred']]

In [151]:
check[check['e_pred']==0]

Unnamed: 0,Id,Category,Sentence,b_pred,e_pred,l_pred


In [164]:
np.unique(merge_total[merge_total['Category']==0]['e_pred'].values,return_counts=True)

(array([1, 2, 3], dtype=int64), array([203,   6,   8], dtype=int64))