In [1]:



import transformers
from transformers import AdamW
from transformers import get_linear_schedule_with_warmup


import torch
import torch.nn as nn
from tqdm import tqdm


from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn import metrics


import pandas as pd
import torchtext



In [25]:
import numpy as np

In [2]:


from torchtext.legacy.data import Field, TabularDataset, BucketIterator



In [3]:
#!pip freeze > requirement.txt

In [4]:
# inputs
PATH = './' #../data/'
TRAIN_PATH = 'train.csv'
TEST_PATH = 'test.csv'

In [5]:
tokenize = lambda x: x.split()
text = Field(sequential=True, use_vocab=True, tokenize=tokenize, lower=True)
label = Field(sequential=False, use_vocab=False)
fields = {'text':('t', text), 'label':('l', label)} # this is how we'll refer to data in batches (batch.t for text)


In [6]:
#train_data, test_data = TabularDataset.splits(path=PATH,
#                                              train=TRAIN_PATH,
#                                              test=TEST_PATH, # could also have validation=VALIDATION_PATH
#                                              format='csv',
#                                              fields=fields)

In [7]:
class BERTClassification(nn.Module):
    def __init__ (self):
        super(BERTClassification, self).__init__()
        self.bert = transformers.BertModel.from_pretrained('bert-base-uncased',return_dict=False)
        self.bert_drop = nn.Dropout(0.4)
        self.out = nn.Linear(768, 1)
        
    def forward(self, ids, mask, token_type_ids):
        #print("BERTClassification.forward----")
        #print("ids, mask, token_type_ids")
        #print(ids, mask, token_type_ids)
        #print("BERTClassification.forward----")
        _, pooledOut = self.bert(ids, attention_mask = mask,
                                token_type_ids=token_type_ids)
        #print("type(pooledOut)",type(pooledOut),pooledOut)
        bertOut = self.bert_drop(pooledOut)
        output = self.out(bertOut)
        
        return output

In [8]:
class DATALoader:
    def __init__(self, data, target, max_length):
        self.data = data
        self.target = target #make sure to convert the target into numerical values
        self.tokeniser = transformers.BertTokenizer.from_pretrained('bert-base-uncased')
        self.max_length = max_length
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, item):
        data = str(self.data[item])
        data = " ".join(data.split())
        
        inputs = self.tokeniser.encode_plus(
            data, 
            None,
            add_special_tokens=True,
            max_length = self.max_length,
            pad_to_max_length=True
            
        )
        
        ids = inputs["input_ids"]
        mask = inputs['attention_mask']
        token_type_ids = inputs["token_type_ids"]


        padding_length = self.max_length - len(ids)
        ids = ids + ([0] * padding_length)
        mask = mask + ([0] * padding_length)
        token_type_ids = token_type_ids + ([0] * padding_length)

        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'targets': torch.tensor(self.target[item], dtype=torch.long)
        }
    

In [9]:
def loss_fn(output, targets):
    return nn.BCEWithLogitsLoss()(output, targets.view(-1,1))

In [10]:
def train_func(data_loader, model, optimizer, device, scheduler):
    #print("here")
    model.to(device)
    print(model)
    model.train()
    
    for bi, d in tqdm(enumerate(data_loader), total=len(data_loader)):
        #print("train_func","bi",bi,"d",d)
        ids = d["ids"]
        token_type_ids = d["token_type_ids"]
        mask = d["mask"]
        targets = d["targets"]
        
        ids = ids.to(device, dtype=torch.long)
        token_type_ids = token_type_ids.to(device, dtype=torch.long)
        mask = mask.to(device, dtype=torch.long)
        targets = targets.to(device, dtype=torch.float)
        
        optimizer.zero_grad()
        output = model(
            ids=ids,
            mask = mask,
            token_type_ids = token_type_ids
        )
        
        
        loss = loss_fn(output, targets)
        loss.backward()
        
        optimizer.step()
        scheduler.step()

In [37]:
def eval_func(data_loader, model, device):
    model.eval()
    
    fin_targets = []
    fin_output = []
    
    with torch.no_grad():
        for bi, d in tqdm(enumerate(data_loader), total=len(data_loader)):
            #print(("bi,d",bi,d))
            ids = d["ids"]
            token_type_ids = d["token_type_ids"]
            mask = d["mask"]
            targets = d["targets"]

            ids = ids.to(device, dtype=torch.long)
            token_type_ids = token_type_ids.to(device, dtype=torch.long)
            mask = mask.to(device, dtype=torch.long)
            targets = targets.to(device, dtype=torch.long)


            output = model(
                ids=ids,
                #masks = mask,
                mask = mask,
                token_type_ids = token_type_ids
            )
        
            fin_targets.extend(targets.cpu().detach().numpy())#.tolist())
            fin_output.extend(torch.sigmoid(output).cpu().detach().numpy())#.tolist())
            
        return fin_output, fin_targets

In [12]:
    #df = pd.read_csv('./cs7643-project-sankarbranch/data/train.csv')

In [13]:
#df['text'][0:3]
#df['label'][0:3]

In [14]:
 #   data = pd.DataFrame({
 #       'text' : df['text'] ,
 #       'label' : df['label']
 #   })


In [15]:
#data.info()

In [None]:
def run():
    df = pd.read_csv('./cs7643-project-sankarbranch/data/train.csv')
    data = pd.DataFrame({
        'text' : df['text'] ,
        'label' : df['label']
    })


    encoder = LabelEncoder()
    data['label'] = encoder.fit_transform(data['label'])

    df_train, df_valid = train_test_split(data, test_size = 0.1, random_state=23, stratify=data.label.values)

    df_train = df_train.reset_index(drop=True)
    df_valid = df_valid.reset_index(drop=True)

    train_dataset = DATALoader(
        data=df_train.text.values,
        target=df_train.label.values,
        max_length=512
    )

    train_data_loader = torch.utils.data.DataLoader(
        train_dataset, 
        batch_size=8,
        num_workers=4,
    )

    val_dataset = DATALoader(
        data=df_valid.text.values,
        target=df_valid.label.values,
        max_length=512
    )

    val_data_loader = torch.utils.data.DataLoader(
        val_dataset, 
        batch_size=4,
        num_workers=1,
    )

    device = torch.device("cuda")
    model = BERTClassification()

    param_optimizer = list(model.named_parameters())
    no_decay = [
        "bias", 
        "LayerNorm,bias",
        "LayerNorm.weight",
               ]
    optimizer_parameters = [
        {'params': [p for n,p in param_optimizer if not any(nd in n for nd in no_decay)],
                   'weight_decay':0.001},
        {'params': [p for n,p in param_optimizer if any(nd in n for nd in no_decay)],
                   'weight_decay':0.0}
    ]

    num_train_steps = int(len(df_train)/ 8*10)

    optimizers = AdamW(optimizer_parameters, lr=3e-5)

    scheduler = get_linear_schedule_with_warmup(
        optimizers,
        num_warmup_steps=0,
        num_training_steps=num_train_steps

    )

    best_accuracy = 0
    for epoch in range(5):
        train_func(data_loader=train_data_loader, model=model, optimizer=optimizers, device=device, scheduler=scheduler)
        outputs, targets = eval_func(data_loader=train_data_loader, model=model, device=device)
        #outputs = np.array(outputs) >= 0.5
        accuracy =  metrics.accuracy_score(np.round(outputs),np.round(np.array(targets)))
        print(f"Accuracy Score: {accuracy}")

        if accuracy>best_accuracy:
            torch.save(model.state_dict(), "model.bin")
            best_accuracy = accuracy
                
                
if __name__ == "__main__":
    run()

In [16]:
    df = pd.read_csv('./cs7643-project-sankarbranch/data/train.csv')
    data = pd.DataFrame({
        'text' : df['text'] ,
        'label' : df['label']
    })


    encoder = LabelEncoder()
    data['label'] = encoder.fit_transform(data['label'])

    df_train, df_valid = train_test_split(data, test_size = 0.1, random_state=23, stratify=data.label.values)

    df_train = df_train.reset_index(drop=True)
    df_valid = df_valid.reset_index(drop=True)

    train_dataset = DATALoader(
        data=df_train.text.values,
        target=df_train.label.values,
        max_length=512
    )

    train_data_loader = torch.utils.data.DataLoader(
        train_dataset, 
        batch_size=8,
        num_workers=4,
    )

    val_dataset = DATALoader(
        data=df_valid.text.values,
        target=df_valid.label.values,
        max_length=512
    )

    val_data_loader = torch.utils.data.DataLoader(
        val_dataset, 
        batch_size=4,
        num_workers=1,
    )

    device = torch.device("cuda")
    model = BERTClassification()

    param_optimizer = list(model.named_parameters())
    no_decay = [
        "bias", 
        "LayerNorm,bias",
        "LayerNorm.weight",
               ]
    optimizer_parameters = [
        {'params': [p for n,p in param_optimizer if not any(nd in n for nd in no_decay)],
                   'weight_decay':0.001},
        {'params': [p for n,p in param_optimizer if any(nd in n for nd in no_decay)],
                   'weight_decay':0.0}
    ]

    num_train_steps = int(len(df_train)/ 8*10)

    optimizers = AdamW(optimizer_parameters, lr=3e-5)

    scheduler = get_linear_schedule_with_warmup(
        optimizers,
        num_warmup_steps=0,
        num_training_steps=num_train_steps

    )

    best_accuracy = 0



Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [17]:
epoch = 0
train_func(data_loader=train_data_loader, model=model, optimizer=optimizers, device=device, scheduler=scheduler)


BERTClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-s

In [38]:
outputs, targets = eval_func(data_loader=train_data_loader, model=model, device=device)


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-s

In [46]:
#outputs = np.array(outputs) >= 0.5
accuracy = metrics.accuracy_score(np.round(outputs),np.round(np.array(targets)))
print(f"Accuracy Score: {accuracy}")

if accuracy>best_accuracy:
    torch.save(model.state_dict(), "model.bin")
    best_accuracy = accuracy

Accuracy Score: 0.9993194332735259


In [39]:
df_train.label.values

array([1, 0, 0, ..., 0, 0, 0])

In [42]:
np.round(outputs)

array([[1.],
       [0.],
       [0.],
       ...,
       [0.],
       [0.],
       [0.]], dtype=float32)

In [45]:
np.round(np.array(targets))

array([1, 0, 0, ..., 0, 0, 0])

In [None]:
    for epoch in range(5):
        train_func(data_loader=train_data_loader, model=model, optimizer=optimizers, device=device, scheduler=scheduler)
        outputs, targets = eval_func(data_loader=train_data_loader, model=model, device=device)
        #outputs = np.array(outputs) >= 0.5
        accuracy =  metrics.accuracy_score(np.round(outputs),np.round(np.array(targets)))
        print(f"Accuracy Score: {accuracy}")

        if accuracy>best_accuracy:
            torch.save(model.state_dict(), "model.bin")
            best_accuracy = accuracy

BERTClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
  0%|                                                  | 0/4041 [00:00<?, ?it/s]Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting 

Accuracy Score: 0.9998453257439832
BERTClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((7

  0%|                                                  | 0/4041 [00:00<?, ?it/s]Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting 

Accuracy Score: 0.9996287817855596
BERTClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((7


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-

Accuracy Score: 0.99990719544639
BERTClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
  0%|                                                  | 0/4041 [00:00<?, ?it/s]Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting 

Accuracy Score: 0.9999690651487967
BERTClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((7

  0%|                                                  | 0/4041 [00:00<?, ?it/s]Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting 

In [None]:
print(transformers(.__version__))

In [None]:
val_outputs, val_targets = eval_func(data_loader=val_data_loader, model=model, device=device)
#outputs = np.array(outputs) >= 0.5
val_accuracy = metrics.accuracy_score(np.round(val_outputs),np.round(np.array(val_targets)))
print(f"Accuracy Score: {val_accuracy}")

In [None]:
from sklearn.metrics import confusion_matrix, f1_score
accuracy_l=[]
cm_l =[]
f1score_l=[] 
best_accuracy1 = 0
for epoch in range(5):
    train_func(data_loader=train_data_loader, model=model, optimizer=optimizers, device=device, scheduler=scheduler)
    val1_outputs, val_targets = eval_func(data_loader=train_data_loader, model=model, device=device)
    #outputs = np.array(outputs) >= 0.5
    val1_accuracy =  metrics.accuracy_score(np.round(val1_outputs),np.round(np.array(val1_targets)))
    tn1, fp1, fn1, tp1 =confusion_matrix(np.round(val1_outputs),np.round(np.array(val1_targets))
    f1score1 =f1_score(np.round(val1_outputs),np.round(np.array(val1_targets))
    print(f"Accuracy Score: {val1_accuracy}")
    accuracy_l.append(val1_accuracy)
    cm_l.append((tn1, fp1, fn1, tp1))  
    f1score_l.append(f1score1)                   

    if val1_accuracy>best_accuracy:
        torch.save(model.state_dict(), "model1.bin")
        best_accuracy1 = accuracy1