<a href="https://colab.research.google.com/github/jwengr/dacon/blob/main/%EC%86%8C%EC%84%A4%20%EC%9E%91%EA%B0%80%20%EB%B6%84%EB%A5%98%20AI%20%EA%B2%BD%EC%A7%84%EB%8C%80%ED%9A%8C/DL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
import re
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.externals import joblib
import matplotlib.pyplot as plt
% matplotlib inline

In [None]:
!pip install torchcontrib

In [None]:
import torch.nn
from torch.nn import CrossEntropyLoss
import torch.nn.functional
from torch.nn.functional import softmax
from torch.utils.data import Dataset, DataLoader
from torchcontrib.optim import SWA
from torch.utils.data import TensorDataset, random_split
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler

In [None]:
import torch
if torch.cuda.is_available():     
    device = torch.device("cuda:0")
    print('There are %d GPU(s) available.' % torch.cuda.device_count())
    print('We will use the GPU:', torch.cuda.get_device_name(0))
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

In [None]:
!pip install transformers
!pip install sentencepiece

In [None]:
from transformers import AdamW, XLNetTokenizer, XLNetModel, XLNetConfig, XLNetForSequenceClassification
import sentencepiece as spm

In [None]:
defaultpath = 'drive/My Drive/dacon/sosul/dataset'

기본전처리

In [None]:
train_df = pd.read_csv(defaultpath+'/train.csv',encoding='utf-8')
train_df = train_df[train_df['text'].str.contains('\* \*')==False]
train_df['sentencelen'] = train_df['text'].apply(lambda x: len(x.split('.')))
train_df['charlen'] = train_df['text'].apply(lambda x: len(x))
train_df['c/s'] = train_df['charlen']/(train_df['sentencelen']+1)  ## 0으로 나뉘는것을 방지
train_df['upperlen'] = train_df['text'].apply(lambda x: len(re.findall('[A-Z]',x)))
train_df['u/s'] = train_df['upperlen']/(train_df['sentencelen']+1)  ## 0으로 나뉘는것을 방지
train_df['u/s'] = train_df['upperlen']/(train_df['charlen']+1)  ## 0으로 나뉘는것을 방지

train_df_fr = train_df[train_df['text'].str.contains('[à|ä|ö|î|ù|â|Œ|ç|ê|ü|ñ|ô|Æ|œ|ë|æ|é|Ê|è|ì]')].copy()

train, valid = train_test_split(train_df,test_size=0.2, random_state=2021, stratify=train_df['author']) 

tfidf all feature + mlp

In [None]:
tfidfv = TfidfVectorizer(token_pattern="[a-zA-Z]+|\W",lowercase=True,dtype=np.float32).fit(train_df['text'])

class TfidfDataset(Dataset):
    def __init__(self,tfidfv=None,df=None):
        self.tfidfv = tfidfv
        self.df = df

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        df = self.df.iloc[idx,:]
        enc = self.tfidfv.transform([df['text']]).toarray().astype(np.float32)
        enc = torch.from_numpy(enc[0]).tolist()
        df = df.drop(['index','text','sentencelen','charlen','upperlen'])
        add = torch.from_numpy(df.drop('author').values.astype(np.float32)).tolist()
        input_ids = enc+add
        labels = df['author'].astype(np.int32).tolist()
        
        return input_ids,labels

In [None]:
tfidf_train_dataset = TfidfDataset(tfidfv,train)
tfidf_valid_dataset = TfidfDataset(tfidfv,valid)
def collate_fn(batch):
    return list(zip(*batch))
tfidf_train_dataloader = DataLoader(tfidf_train_dataset, batch_size=4, shuffle=True, num_workers=2,collate_fn=collate_fn)
tfidf_valid_dataloader = DataLoader(tfidf_valid_dataset, batch_size=4, shuffle=True, num_workers=2,collate_fn=collate_fn)

In [None]:
len(i[0])

33687

In [None]:
class TfidfMLPModel(torch.nn.Module):
    def __init__(self):
        super(TfidfMLPModel, self).__init__()
        self.linear1 = torch.nn.Linear(33687,4096)
        self.linear2 = torch.nn.Linear(4096,512)
        self.linear3 = torch.nn.Linear(512,64)
        self.linear4 = torch.nn.Linear(64,8)
        self.linear5 = torch.nn.Linear(8,5)
        
        self.drop1 = torch.nn.Dropout()
        self.drop2 = torch.nn.Dropout()
        self.drop3 = torch.nn.Dropout()

        torch.nn.init.xavier_normal_(self.linear1.weight)
        torch.nn.init.xavier_normal_(self.linear2.weight)
        torch.nn.init.xavier_normal_(self.linear3.weight)
        torch.nn.init.xavier_normal_(self.linear4.weight)
        torch.nn.init.xavier_normal_(self.linear5.weight)
        
    def forward(self, input_ids):
        x = self.linear1(input_ids)
        x = self.drop1(x)
        x = self.linear2(x)
        x = self.drop2(x)
        x = self.linear3(x)
        x = self.drop3(x)
        x = self.linear4(x)
        logits = self.linear5(x)
        return logits

In [None]:
# function to save and load the model form a specific epoch
def save_model(model, save_path, epochs, lowest_eval_loss, train_loss_hist, valid_loss_hist,train_acc_hist,valid_acc_hist):

    model_to_save = model.module if hasattr(model, 'module') else model
    checkpoint = {'epochs': epochs, 
                    'lowest_eval_loss': lowest_eval_loss,
                    'state_dict': model_to_save.state_dict(),
                    'train_loss_hist': train_loss_hist,
                    'valid_loss_hist': valid_loss_hist,
                    'train_acc_hist' : train_acc_hist,
                    'valid_acc_hist' : valid_acc_hist
                }
    torch.save(checkpoint, save_path+'/MLP_e{0}_loss{1:04.4f}_acc{2:04.4f}.pth'.format(epochs,lowest_eval_loss,valid_acc_hist[-1]))
    print("Saving model at epoch {0} with validation loss of {1} vaildation acc of {2}".format(epochs,
                                                                        lowest_eval_loss,valid_acc_hist[-1]))
    return
  
def load_model(save_path):
    checkpoint = torch.load(save_path)
    model_state_dict = checkpoint['state_dict']
    model = TfidfMLPModel()
    model.load_state_dict(model_state_dict)    
    return model, checkpoint

In [None]:
model = TfidfMLPModel()
#model, checkpoint = load_model(defaultpath+'/model/MLP_e54_loss0.1661_acc0.7747.pth')

In [None]:
adamOptimizer = AdamW(model.parameters(),lr = 1e-5, eps = 1e-8, correct_bias=False)
optimizer = SWA(adamOptimizer, swa_start=4, swa_freq=3, swa_lr=1e-5)

In [None]:
def model_train(model, num_epochs,optimizer,
          train_dataloader, valid_dataloader,model_save_path,checkpoint,device="cpu"
          ):
    if checkpoint is None:
        start_epoch=0
        lowest_eval_loss = float('inf')
        train_loss_hist = []
        valid_loss_hist = []
        train_acc_hist = []
        valid_acc_hist = []
    else:
        start_epoch = checkpoint["epochs"]+1
        lowest_eval_loss = checkpoint["lowest_eval_loss"]
        train_loss_hist = checkpoint["train_loss_hist"]
        valid_loss_hist = checkpoint["valid_loss_hist"]
        train_acc_hist = checkpoint["train_acc_hist"]
        valid_acc_hist = checkpoint["valid_acc_hist"]

    model.to(device)
    for i in range(start_epoch,num_epochs):
        actual_epoch = i

        model.train()
        tr_acc = 0
        tr_loss = 0
        num_train_samples = 0
        train_bar = tqdm(train_dataloader,desc=f"Epoch {actual_epoch} Train ")
        for step, batch in enumerate(train_bar):
            b_input_ids, b_labels = torch.FloatTensor(batch[0]).to(device), torch.LongTensor(batch[1]).to(device)
            num_train_samples += b_labels.size(0) 

            optimizer.zero_grad()
            logits = model(input_ids=b_input_ids)
            loss = CrossEntropyLoss()(logits, b_labels)

            prediction = logits.data.max(1)[1]
            tr_acc += prediction.eq(b_labels.data).sum().item()
            tr_loss += loss.item()
            train_bar.set_postfix({'train_acc': tr_acc/num_train_samples,'train_loss':tr_loss/num_train_samples})

            loss.backward()
            optimizer.step()

        optimizer.swap_swa_sgd()
        train_loss_hist.append(tr_loss/num_train_samples)
        train_acc_hist.append(tr_acc/num_train_samples)
            
        model.eval()
        eval_loss = 0
        eval_acc = 0
        num_eval_samples = 0
        with torch.no_grad():
            valid_bar = tqdm(valid_dataloader,desc=f"Epoch {actual_epoch} Valid ")
            for batch in valid_bar:
                b_input_ids, b_labels = torch.FloatTensor(batch[0]).to(device), torch.LongTensor(batch[1]).to(device)

                logits = model(input_ids=b_input_ids)
                loss = CrossEntropyLoss()(logits, b_labels)
                prediction = logits.data.max(1)[1]

                eval_acc += prediction.eq(b_labels.data).sum().item()
                eval_loss += loss.item()
                num_eval_samples += b_labels.size(0)
                valid_bar.set_postfix({'valid_acc':eval_acc/num_eval_samples,'valid_loss':eval_loss/num_eval_samples})

            valid_loss_hist.append(eval_loss/num_eval_samples)
            valid_acc_hist.append(eval_acc/num_eval_samples)
            
        if valid_loss_hist[-1] < lowest_eval_loss:
            lowest_eval_loss = valid_loss_hist[-1]
            save_model(model, model_save_path, actual_epoch, lowest_eval_loss, train_loss_hist, valid_loss_hist,train_acc_hist,valid_acc_hist)
    return model

In [None]:
model = model_train(model=model, num_epochs = 100, 
            model_save_path=defaultpath+'/model', checkpoint=None,
            optimizer=optimizer, device=device,
           train_dataloader=tfidf_train_dataloader, valid_dataloader=tfidf_valid_dataloader)

MLP_e27_loss0.1475_acc0.8033.pth saved

길이가 매우 긴 문장이 많이때문에 XLNet을 이용하기로 결정하였습니다.

pretrained xlnet tokenizer + unpretrained xlnet

In [None]:
tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased')
input_ids = pd.DataFrame(train_df['text'].apply(lambda x: tokenizer.encode(x)).tolist()).fillna(0).astype(np.int)
train_input_ids, valid_input_ids = train_test_split(input_ids,test_size=0.2, random_state=2021, stratify=train_df['author']) 
train_attention_masks = (train_input_ids>0).astype(np.int).values
valid_attention_masks = (valid_input_ids>0).astype(np.int).values
train_input_ids = torch.from_numpy(train_input_ids.values)
valid_input_ids = torch.from_numpy(valid_input_ids.values)
train_attention_masks = torch.from_numpy(train_attention_masks)
valid_attention_masks = torch.from_numpy(valid_attention_masks)
train_labels, valid_labels = train_test_split(train_df['author'].astype(np.int),test_size=0.2, random_state=2021, stratify=train_df['author']) 
train_labels = torch.from_numpy(train_labels.values)
valid_labels = torch.from_numpy(valid_labels.values)
train_dataset = TensorDataset(train_input_ids, train_attention_masks, train_labels)
valid_dataset = TensorDataset(valid_input_ids, valid_attention_masks, valid_labels)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=798011.0, style=ProgressStyle(descripti…




In [None]:
batch_size = 8
train_dataloader = DataLoader(train_dataset, sampler = RandomSampler(train_dataset),batch_size = batch_size)
valid_dataloader = DataLoader(valid_dataset, sampler = SequentialSampler(valid_dataset), batch_size = batch_size)

In [None]:
config = XLNetConfig(
    vocab_size= tokenizer.vocab_size,
    d_model= 32,
    n_layer= 8,
    n_head=16,
    d_inner=128
)

In [None]:
class XLNetForMultiLabelSequenceClassification(torch.nn.Module):
  
    def __init__(self,config):
        super(XLNetForMultiLabelSequenceClassification, self).__init__()
        self.xlnet = XLNetModel(config)
        self.linear = torch.nn.Linear(32, 5)

        torch.nn.init.xavier_normal_(self.linear.weight)

    def forward(self, input_ids, token_type_ids=None,
                attention_mask=None):

        last_hidden_state = self.xlnet(input_ids=input_ids,
                                    attention_mask=attention_mask,
                                    token_type_ids=token_type_ids
                                    )
        mean_last_hidden_state = torch.mean(last_hidden_state[0],1)
        logits = self.linear(mean_last_hidden_state)
        return logits

In [None]:
# function to save and load the model form a specific epoch
def save_model(model, save_path, epochs, lowest_eval_loss, train_loss_hist, valid_loss_hist,train_acc_hist,valid_acc_hist):

    model_to_save = model.module if hasattr(model, 'module') else model
    checkpoint = {'epochs': epochs, 
                    'lowest_eval_loss': lowest_eval_loss,
                    'state_dict': model_to_save.state_dict(),
                    'train_loss_hist': train_loss_hist,
                    'valid_loss_hist': valid_loss_hist,
                    'train_acc_hist' : train_acc_hist,
                    'valid_acc_hist' : valid_acc_hist
                }
    torch.save(checkpoint, save_path+'/pretokenXLNET_e{0}_loss{1:04.4f}_acc{2:04.4f}.pth'.format(epochs,lowest_eval_loss,valid_acc_hist[-1]))
    print("Saving model at epoch {0} with validation loss of {1} vaildation acc of {2}".format(epochs,
                                                                        lowest_eval_loss,valid_acc_hist[-1]))
    return
  
def load_model(save_path):
    checkpoint = torch.load(save_path)
    model_state_dict = checkpoint['state_dict']
    model = XLNetForMultiLabelSequenceClassification(config=config)
    model.load_state_dict(model_state_dict)    
    return model, checkpoint

In [None]:
#model = XLNetForMultiLabelSequenceClassification(config=config)
model, checkpoint = load_model(defaultpath+'/model/pretokenXLNET_e44_loss0.0806_acc0.7938.pth')

Stochastic Weight Averaging

In [None]:
adamOptimizer = AdamW(model.parameters(),lr = 1e-5, eps = 1e-8, correct_bias=False)
optimizer = SWA(adamOptimizer, swa_start=4, swa_freq=3, swa_lr=1e-5)

In [None]:
def model_train(model, num_epochs,optimizer,
          train_dataloader, valid_dataloader,model_save_path,checkpoint,device="cpu"
          ):
    if checkpoint is None:
        start_epoch=0
        lowest_eval_loss = float('inf')
        train_loss_hist = []
        valid_loss_hist = []
        train_acc_hist = []
        valid_acc_hist = []
    else:
        start_epoch = checkpoint["epochs"]+1
        lowest_eval_loss = checkpoint["lowest_eval_loss"]
        train_loss_hist = checkpoint["train_loss_hist"]
        valid_loss_hist = checkpoint["valid_loss_hist"]
        train_acc_hist = checkpoint["train_acc_hist"]
        valid_acc_hist = checkpoint["valid_acc_hist"]

    model.to(device)
    for i in range(start_epoch,num_epochs):
        actual_epoch = i

        model.train()
        tr_acc = 0
        tr_loss = 0
        num_train_samples = 0
        train_bar = tqdm(train_dataloader,desc=f"Epoch {actual_epoch} Train ")
        for step, batch in enumerate(train_bar):
            b_input_ids, b_attn_masks ,b_labels = (b.long().to(device) for b in batch)
            num_train_samples += b_labels.size(0) 

            optimizer.zero_grad()
            logits = model(input_ids=b_input_ids,attention_mask=b_attn_masks)
            loss = CrossEntropyLoss()(logits, b_labels)

            prediction = logits.data.max(1)[1]
            tr_acc += prediction.eq(b_labels.data).sum().item()
            tr_loss += loss.item()
            train_bar.set_postfix({'train_acc': tr_acc/num_train_samples,'train_loss':tr_loss/num_train_samples})

            loss.backward()
            optimizer.step()

        optimizer.swap_swa_sgd()
        train_loss_hist.append(tr_loss/num_train_samples)
        train_acc_hist.append(tr_acc/num_train_samples)
            
        model.eval()
        eval_loss = 0
        eval_acc = 0
        num_eval_samples = 0
        with torch.no_grad():
            valid_bar = tqdm(valid_dataloader,desc=f"Epoch {actual_epoch} Valid ")
            for batch in valid_bar:
                b_input_ids, b_attn_masks ,b_labels = (b.long().to(device) for b in batch)

                logits = model(input_ids=b_input_ids,attention_mask=b_attn_masks)
                loss = CrossEntropyLoss()(logits, b_labels)
                prediction = logits.data.max(1)[1]

                eval_acc += prediction.eq(b_labels.data).sum().item()
                eval_loss += loss.item()
                num_eval_samples += b_labels.size(0)
                valid_bar.set_postfix({'valid_acc':eval_acc/num_eval_samples,'valid_loss':eval_loss/num_eval_samples})

            valid_loss_hist.append(eval_loss/num_eval_samples)
            valid_acc_hist.append(eval_acc/num_eval_samples)
            
        if valid_loss_hist[-1] < lowest_eval_loss:
            lowest_eval_loss = valid_loss_hist[-1]
            save_model(model, model_save_path, actual_epoch, lowest_eval_loss, train_loss_hist, valid_loss_hist,train_acc_hist,valid_acc_hist)
    return model

In [None]:
model = model_train(model=model, num_epochs = 100, 
            model_save_path=defaultpath+'/model', checkpoint=checkpoint,
            optimizer=optimizer, device=device,
           train_dataloader=train_dataloader, valid_dataloader=valid_dataloader)

pretokenXLNET_e47_loss0.0804_acc0.7946.pth saved

SentencePiece + unpretrained XLNET

In [None]:
with open(defaultpath+'/train.txt', 'w', encoding='utf8') as f:
    f.write('\n'.join(train_df['text']))

In [None]:
spm.SentencePieceTrainer.train(input=defaultpath+'/train.txt',vocab_size=32000,
                               model_prefix=f'{defaultpath}/spm32000',character_coverage=1.0,
                               model_type='bpe', accept_language=['en','fr'])

In [None]:
sp = spm.SentencePieceProcessor()
vocab_file = defaultpath+"/spm32000.model"
sp.load(vocab_file)

True

In [None]:
tokenizer = XLNetTokenizer(vocab_file,keep_accents=True,do_lower_case=True)

In [None]:
input_ids = pd.DataFrame(train_df['text'].apply(lambda x: tokenizer.encode(x)).tolist()).fillna(0).astype(np.int)
train_input_ids, valid_input_ids = train_test_split(input_ids,test_size=0.2, random_state=2021, stratify=train_df['author']) 
train_attention_masks = (train_input_ids>0).astype(np.int).values
valid_attention_masks = (valid_input_ids>0).astype(np.int).values
train_input_ids = torch.from_numpy(train_input_ids.values)
valid_input_ids = torch.from_numpy(valid_input_ids.values)
train_attention_masks = torch.from_numpy(train_attention_masks)
valid_attention_masks = torch.from_numpy(valid_attention_masks)
train_labels, valid_labels = train_test_split(train_df['author'].astype(np.int),test_size=0.2, random_state=2021, stratify=train_df['author']) 
train_labels = torch.from_numpy(train_labels.values)
valid_labels = torch.from_numpy(valid_labels.values)
train_dataset = TensorDataset(train_input_ids, train_attention_masks, train_labels)
valid_dataset = TensorDataset(valid_input_ids, valid_attention_masks, valid_labels)

In [None]:
batch_size = 8
train_dataloader = DataLoader(train_dataset, sampler = RandomSampler(train_dataset),batch_size = batch_size)
valid_dataloader = DataLoader(valid_dataset, sampler = SequentialSampler(valid_dataset), batch_size = batch_size)
config = XLNetConfig(
    vocab_size= tokenizer.vocab_size,
    d_model= 32,
    n_layer= 8,
    n_head=16,
    d_inner=128
)

In [None]:
class XLNetForMultiLabelSequenceClassification(torch.nn.Module):
  
    def __init__(self,config):
        super(XLNetForMultiLabelSequenceClassification, self).__init__()
        self.xlnet = XLNetModel(config)
        self.linear = torch.nn.Linear(32, 5)

        torch.nn.init.xavier_normal_(self.linear.weight)

    def forward(self, input_ids, token_type_ids=None,
                attention_mask=None):

        last_hidden_state = self.xlnet(input_ids=input_ids,
                                    attention_mask=attention_mask,
                                    token_type_ids=token_type_ids
                                    )
        mean_last_hidden_state = torch.mean(last_hidden_state[0],1)
        logits = self.linear(mean_last_hidden_state)
        return logits

In [None]:
# function to save and load the model form a specific epoch
def save_model(model, save_path, epochs, lowest_eval_loss, train_loss_hist, valid_loss_hist,train_acc_hist,valid_acc_hist):

    model_to_save = model.module if hasattr(model, 'module') else model
    checkpoint = {'epochs': epochs, 
                    'lowest_eval_loss': lowest_eval_loss,
                    'state_dict': model_to_save.state_dict(),
                    'train_loss_hist': train_loss_hist,
                    'valid_loss_hist': valid_loss_hist,
                    'train_acc_hist' : train_acc_hist,
                    'valid_acc_hist' : valid_acc_hist
                }
    torch.save(checkpoint, save_path+'/vanillaXLNET_e{0}_loss{1:04.4f}_acc{2:04.4f}.pth'.format(epochs,lowest_eval_loss,valid_acc_hist[-1]))
    print("Saving model at epoch {0} with validation loss of {1} vaildation acc of {2}".format(epochs,
                                                                        lowest_eval_loss,valid_acc_hist[-1]))
    return
  
def load_model(save_path):
    checkpoint = torch.load(save_path)
    model_state_dict = checkpoint['state_dict']
    model = XLNetForMultiLabelSequenceClassification(config=config)
    model.load_state_dict(model_state_dict)    
    return model, checkpoint

In [None]:
#model, checkpoint = XLNetForMultiLabelSequenceClassification(config=config), None
model, checkpoint = load_model(defaultpath+'/model/vanillaXLNET_e42_loss0.0802_acc0.8026.pth')
adamOptimizer = AdamW(model.parameters(),lr = 1e-5, eps = 1e-8, correct_bias=False)
optimizer = SWA(adamOptimizer, swa_start=4, swa_freq=3, swa_lr=1e-5)

In [None]:
def model_train(model, num_epochs,optimizer,
          train_dataloader, valid_dataloader,model_save_path,checkpoint,device="cpu"
          ):
    if checkpoint is None:
        start_epoch=0
        lowest_eval_loss = float('inf')
        train_loss_hist = []
        valid_loss_hist = []
        train_acc_hist = []
        valid_acc_hist = []
    else:
        start_epoch = checkpoint["epochs"]+1
        lowest_eval_loss = checkpoint["lowest_eval_loss"]
        train_loss_hist = checkpoint["train_loss_hist"]
        valid_loss_hist = checkpoint["valid_loss_hist"]
        train_acc_hist = checkpoint["train_acc_hist"]
        valid_acc_hist = checkpoint["valid_acc_hist"]

    model.to(device)
    for i in range(start_epoch,num_epochs):
        actual_epoch = i

        model.train()
        tr_acc = 0
        tr_loss = 0
        num_train_samples = 0
        train_bar = tqdm(train_dataloader,desc=f"Epoch {actual_epoch} Train ")
        for step, batch in enumerate(train_bar):
            b_input_ids, b_attn_masks ,b_labels = (b.long().to(device) for b in batch)
            num_train_samples += b_labels.size(0) 

            optimizer.zero_grad()
            logits = model(input_ids=b_input_ids,attention_mask=b_attn_masks)
            loss = CrossEntropyLoss()(logits, b_labels)

            prediction = logits.data.max(1)[1]
            tr_acc += prediction.eq(b_labels.data).sum().item()
            tr_loss += loss.item()
            train_bar.set_postfix({'train_acc': tr_acc/num_train_samples,'train_loss':tr_loss/num_train_samples})

            loss.backward()
            optimizer.step()

        optimizer.swap_swa_sgd()
        train_loss_hist.append(tr_loss/num_train_samples)
        train_acc_hist.append(tr_acc/num_train_samples)
            
        model.eval()
        eval_loss = 0
        eval_acc = 0
        num_eval_samples = 0
        with torch.no_grad():
            valid_bar = tqdm(valid_dataloader,desc=f"Epoch {actual_epoch} Valid ")
            for batch in valid_bar:
                b_input_ids, b_attn_masks ,b_labels = (b.long().to(device) for b in batch)

                logits = model(input_ids=b_input_ids,attention_mask=b_attn_masks)
                loss = CrossEntropyLoss()(logits, b_labels)
                prediction = logits.data.max(1)[1]

                eval_acc += prediction.eq(b_labels.data).sum().item()
                eval_loss += loss.item()
                num_eval_samples += b_labels.size(0)
                valid_bar.set_postfix({'valid_acc':eval_acc/num_eval_samples,'valid_loss':eval_loss/num_eval_samples})

            valid_loss_hist.append(eval_loss/num_eval_samples)
            valid_acc_hist.append(eval_acc/num_eval_samples)
            
        if valid_loss_hist[-1] < lowest_eval_loss:
            lowest_eval_loss = valid_loss_hist[-1]
            save_model(model, model_save_path, actual_epoch, lowest_eval_loss, train_loss_hist, valid_loss_hist,train_acc_hist,valid_acc_hist)
    return model

In [None]:
model = model_train(model=model, num_epochs = 100, 
            model_save_path=defaultpath+'/model', checkpoint=checkpoint,
            optimizer=optimizer, device=device,
           train_dataloader=train_dataloader, valid_dataloader=valid_dataloader)

vanillaXLNET_e44_loss0.0798_acc0.8052.pth saved