In [None]:
!pip install transformers

In [None]:
import pandas as pd
import torch
import numpy as np
from transformers import BertTokenizer, BertModel, AutoTokenizer, AutoModelForMaskedLM, AdamW
#auto tokenizer and automodel are for bert-base-historical-german-cased from @redewiedergabe
from torch import nn
from torch.optim import Adam
from tqdm import tqdm
from sklearn.metrics import f1_score, recall_score, confusion_matrix
import seaborn as sn
import pandas as pd
import matplotlib.pyplot as plt
import time
import pickle #to save the model into a file .pkl 
from google.colab import files
 

In [None]:
def printProgressBar (iteration, total, prefix = '', suffix = '', decimals = 1, length = 100, fill = '█', printEnd = "\r"):
    """
    Call in a loop to create terminal progress bar
    @params:
        iteration   - Required  : current iteration (Int)
        total       - Required  : total iterations (Int)
        prefix      - Optional  : prefix string (Str)
        suffix      - Optional  : suffix string (Str)
        decimals    - Optional  : positive number of decimals in percent complete (Int)
        length      - Optional  : character length of bar (Int)
        fill        - Optional  : bar fill character (Str)
        printEnd    - Optional  : end character (e.g. "\r", "\r\n") (Str)
    """
    percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
    filledLength = int(length * iteration // total)
    bar = fill * filledLength + '-' * (length - filledLength)
    print(f'\r{prefix} |{bar}| {percent}% {suffix}', end = printEnd)
    # Print New Line on Complete
    if iteration == total: 
        print()

In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-german-cased')
#tokenizer = AutoTokenizer.from_pretrained("redewiedergabe/bert-base-historical-german-rw-cased")

class Dataset(torch.utils.data.Dataset):

    def __init__(self, df):

        self.labels = df['label'].values
        self.texts = [tokenizer(text, 
                               padding='max_length', max_length = 512, truncation=True,
                                return_tensors="pt") for text in df['text']]

    def classes(self):
        return self.labels

    def __len__(self):
        return len(self.labels)

    def get_batch_labels(self, idx):
        # Fetch a batch of labels
        return np.array(self.labels[idx])

    def get_batch_texts(self, idx):
        # Fetch a batch of inputs
        return self.texts[idx]

    def __getitem__(self, idx):

        batch_texts = self.get_batch_texts(idx)
        batch_y = self.get_batch_labels(idx)

        return batch_texts, batch_y

class BertClassifier(nn.Module):

    def __init__(self, dropout=0.5):

        super(BertClassifier, self).__init__()

        self.bert = BertModel.from_pretrained('bert-base-german-cased')
        self.dropout = nn.Dropout(dropout)
        self.linear= nn.Linear(768,3)
        self.relu = nn.ReLU()

    def forward(self, input_id, mask):
        _, pooled_output = self.bert(input_ids= input_id, attention_mask=mask,return_dict=False)
        dropout_output = self.dropout(pooled_output)
        linear_output = self.linear(dropout_output)
        relu_layer = self.relu(linear_output)
        final_layer = relu_layer

        return final_layer

In [None]:
df=pd.read_csv("original+back(train+val).csv")
#df=pd.read_csv("original(train+val).csv")

df=df[['text','label']]
df.text.str.strip()

df_train, df_val = np.split(df.sample(frac=1, random_state=42), 
                                     [int(.8*len(df))])



In [None]:
model = BertClassifier()

label = 'GBert9.pkl'

with open(label, 'wb') as fid:
    pickle.dump(model,fid)  



In [None]:
import torch.nn.functional as F

#PyTorch
ALPHA = 0.8
GAMMA = 2

class FocalLoss(nn.Module):
    def init(self, weight=None, size_average=True):
        super(FocalLoss, self).init()

    def forward(self, inputs, targets, alpha=ALPHA, gamma=GAMMA, smooth=1):
        
        #comment out if your model contains a sigmoid or equivalent activation layer
        #inputs = F.sigmoid(inputs)       
        
        # turn target from indices to n x 3 dummy tensor
        bs = len(targets)
        y = torch.zeros(bs,3)
        y[torch.arange(0, bs), targets.long()] = 1
        targets = y
        
        #flatten label and prediction tensors
        inputs = inputs.view(-1)
        targets = targets.view(-1)
        
        #first compute binary cross-entropy 
        BCE = F.cross_entropy(inputs, targets, reduction='mean')
        BCE_EXP = torch.exp(-BCE)
        focal_loss = alpha * (1-BCE_EXP)**gamma * BCE
                       
        return focal_loss

In [None]:
def train(model, train_data, val_data, learning_rate, epochs, label, hist):

  # #To train only the classifier
  #   for name, param in model.named_parameters():
          
	#          if 'classifier' not in name: 
  #                 #print(name)
  #                 param.no_grad=True
	# 	              #param.requires_grad = False
                  
    model.bert.requires_grad_(False)

    train, val = Dataset(train_data), Dataset(val_data)

    train_dataloader = torch.utils.data.DataLoader(train, batch_size=2, shuffle=True)
    val_dataloader = torch.utils.data.DataLoader(val, batch_size=2)

    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    #criterion = nn.CrossEntropyLoss()
    criterion= FocalLoss()
    optimizer = Adam(model.parameters(), lr= learning_rate)
    #optimizer = AdamW(model.parameters(), lr = learning_rate, eps = 1e-8 )

    if use_cuda:

            model = model.cuda()
            criterion = criterion.cuda()

    for epoch_num in range(epochs):

            total_acc_train = 0
            total_loss_train = 0

            for train_input, train_label in tqdm(train_dataloader):

                train_label = train_label.to(device)
                mask = train_input['attention_mask'].to(device)
                input_id = train_input['input_ids'].squeeze(1).to(device)

                output = model(input_id, mask)

                if hist==True:
                    """ Only for bert-historical from pretrained"""
                    output=output.logits

                batch_loss = criterion(output, train_label.long())
                total_loss_train += batch_loss.item()
                
                acc = (output.argmax(dim=1) == train_label).sum().item()
                total_acc_train += acc

                model.zero_grad()
                batch_loss.backward()
                optimizer.step()
            
            total_acc_val = 0
            total_loss_val = 0

            with torch.no_grad():

                for val_input, val_label in val_dataloader:

                    val_label = val_label.to(device)
                    mask = val_input['attention_mask'].to(device)
                    input_id = val_input['input_ids'].squeeze(1).to(device)

                    output = model(input_id, mask)

                    if hist==True:
                        """ Only for bert-historical from pretrained"""
                        output=output.logits


                    batch_loss = criterion(output, val_label.long())
                    total_loss_val += batch_loss.item()
                    
                    acc = (output.argmax(dim=1) == val_label).sum().item()
                    total_acc_val += acc
                    
            
            with open(label, 'wb') as fid:
                pickle.dump(model, fid)  

            
            print(
                f'Epochs: {epoch_num + 1} | Train Loss: {total_loss_train / len(train_data): .3f} | Train Accuracy: {total_acc_train / len(train_data): .3f} | Val Loss: {total_loss_val / len(val_data): .3f} | Val Accuracy: {total_acc_val / len(val_data): .3f}')



In [None]:
with open('GBert8.pkl', 'rb') as fid:
     model=pickle.load(fid) 

#This command automatically update the model, I suggest you to use a different name and do not rewrite it.
#train(model, df_train, df_val, 2e-5, 3, 'GBert7.pkl', hist=False)
#train(model, df_train, df_val, 1e-5, 2, 'GBert8.pkl', hist=False)



100%|██████████| 2748/2748 [48:00<00:00,  1.05s/it]


Epochs: 1 | Train Loss:  1.273 | Train Accuracy:  0.453 | Val Loss:  1.272 | Val Accuracy:  0.444


100%|██████████| 2748/2748 [48:56<00:00,  1.07s/it]


Epochs: 2 | Train Loss:  1.272 | Train Accuracy:  0.456 | Val Loss:  1.270 | Val Accuracy:  0.463


100%|██████████| 2748/2748 [50:32<00:00,  1.10s/it]


Epochs: 3 | Train Loss:  1.271 | Train Accuracy:  0.463 | Val Loss:  1.274 | Val Accuracy:  0.432


100%|██████████| 2748/2748 [50:38<00:00,  1.11s/it]


Epochs: 4 | Train Loss:  1.271 | Train Accuracy:  0.467 | Val Loss:  1.270 | Val Accuracy:  0.461


100%|██████████| 2748/2748 [50:47<00:00,  1.11s/it]


Epochs: 5 | Train Loss:  1.273 | Train Accuracy:  0.453 | Val Loss:  1.273 | Val Accuracy:  0.459


100%|██████████| 2748/2748 [50:16<00:00,  1.10s/it]


Epochs: 6 | Train Loss:  1.270 | Train Accuracy:  0.468 | Val Loss:  1.269 | Val Accuracy:  0.464


100%|██████████| 2748/2748 [50:20<00:00,  1.10s/it]


Epochs: 7 | Train Loss:  1.272 | Train Accuracy:  0.458 | Val Loss:  1.270 | Val Accuracy:  0.472


100%|██████████| 2748/2748 [50:52<00:00,  1.11s/it]


Epochs: 8 | Train Loss:  1.271 | Train Accuracy:  0.462 | Val Loss:  1.272 | Val Accuracy:  0.441


100%|██████████| 2748/2748 [50:57<00:00,  1.11s/it]


Epochs: 9 | Train Loss:  1.272 | Train Accuracy:  0.461 | Val Loss:  1.270 | Val Accuracy:  0.464


100%|██████████| 2748/2748 [49:41<00:00,  1.08s/it]


Epochs: 10 | Train Loss:  1.272 | Train Accuracy:  0.459 | Val Loss:  1.273 | Val Accuracy:  0.449


100%|██████████| 2748/2748 [49:28<00:00,  1.08s/it]


Epochs: 11 | Train Loss:  1.273 | Train Accuracy:  0.453 | Val Loss:  1.271 | Val Accuracy:  0.452


100%|██████████| 2748/2748 [50:54<00:00,  1.11s/it]


Epochs: 12 | Train Loss:  1.272 | Train Accuracy:  0.456 | Val Loss:  1.272 | Val Accuracy:  0.456


100%|██████████| 2748/2748 [50:52<00:00,  1.11s/it]


Epochs: 13 | Train Loss:  1.272 | Train Accuracy:  0.460 | Val Loss:  1.272 | Val Accuracy:  0.445


100%|██████████| 2748/2748 [50:57<00:00,  1.11s/it]


Epochs: 14 | Train Loss:  1.271 | Train Accuracy:  0.462 | Val Loss:  1.271 | Val Accuracy:  0.453


100%|██████████| 2748/2748 [50:01<00:00,  1.09s/it]


Epochs: 15 | Train Loss:  1.270 | Train Accuracy:  0.467 | Val Loss:  1.271 | Val Accuracy:  0.457


In [None]:
files.download(label)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
def evaluate(model, test_data, hist):

    counts=test_data['label'].value_counts()
    test = Dataset(test_data)

    test_dataloader = torch.utils.data.DataLoader(test, batch_size=2)

    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    if use_cuda:

        model = model.cuda()

    total_acc_test = 0
    total_acc_class=[0,0,0]
    y_true, y_pred= [],[]
    
    l = len(test_data)/2
    i=0
    printProgressBar(0, l, prefix = 'Progress:', suffix = 'Complete', length = 50)

    with torch.no_grad():

        for test_input, test_label in test_dataloader:
            
              flag=0
              
              test_label = test_label.to(device)
              mask = test_input['attention_mask'].to(device)
              input_id = test_input['input_ids'].squeeze(1).to(device)

              output = model(input_id, mask)
              
              y_true.append(test_label.data[0].item())

              try:
                y_true.append(test_label.data[1].item())
              except:
                    flag=1
                    print("batch of size 1")
              #It is for bert-base-german-case (mybert)
              if hist==False:
                
                y_pred.append(output.argmax(dim=1)[0].item())

                if flag==0:
                    y_pred.append(output.argmax(dim=1)[1].item()) 

                
                
                if(output.argmax(dim=1)[0] == test_label[0]):
                    total_acc_class[int(test_label.data[0].item())]+=1
                    
                if flag ==0:    
                    if(output.argmax(dim=1)[1] == test_label[1]):
                        total_acc_class[int(test_label.data[1].item())]+=1
                
                
                acc = (output.argmax(dim=1) == test_label).sum().item()
                total_acc_test += acc
             
              
              #For bert historical (historicalbert)
              if hist == True:
                y_pred.append( np.argmax(output.logits[0]))
                y_pred.append(np.argmax(output.logits[1]))
                if(np.argmax(output.logits[0]) == test_label[0]):
                        
                        total_acc_class[int(test_label.data[0].item())]+=1
                        
                if(np.argmax(output.logits[1]) == test_label[1]):

                        total_acc_class[int(test_label.data[1].item())]+=1
                
                
                acc = (np.argmax(output.logits) == test_label).sum().item()
                total_acc_test += acc

               #end bert historical
              
              # Update Progress Bar
              printProgressBar(i + 1, l, prefix = 'Progress:', suffix = 'Complete', length = 50)
              i+=1
    
    print("all values: ", total_acc_class)
    print("Accuracy per Class: ")
    print(f"Class 0 Nein -> { ( total_acc_class[0]/counts[0] if total_acc_class[0]>0 else 0  ): .3f} on {counts[0]} samples")
    print(f"Class 1 Metapher ->  { total_acc_class[1]/counts[1]: .3f} on {counts[1]} samples")
    print(f"Class 2 Kandidat ->  { total_acc_class[2]/counts[2]: .3f} on {counts[2]} samples")
    print(f'Test Accuracy: {total_acc_test / len(test_data): .3f} \n\n')
    
    print("F1-score: ")
    score= f1_score(y_true, y_pred, average=None)
    print(score)
    print("Class 0 Nein score-> ", score[0])
    print("Class 1 Metapher score -> ", score[1])
    print("Class 2 Kandidat score -> ", score[2])
    
    print("F1 wih average Macro -> ",f1_score(y_true, y_pred, average='macro'),"\n" )
    
    rec=recall_score(y_true, y_pred, average=None)
    print("Class 0 Nein recall-> ", rec[0])
    print("Class 1 Metapher recall -> ", rec[1])
    print("Class 2 Kandidat recall -> ", rec[2])
    print("Recall score macro -> ", recall_score(y_true, y_pred, average='macro'),"\n")
    
    
    print("Confusion Matrix: \n")
    cfm= confusion_matrix(y_true, y_pred)
    print(cfm)
    df_cm = pd.DataFrame(cfm, range(3), range(3))
    sn.set(font_scale=1.4) # for label size
    sn.heatmap(df_cm, annot=True) # font size
    plt.show()
    

In [None]:

#load from pickle
#label='mybert_ft_2.pkl'  #lr 2e-5 with adamW
with open('mybert_ft_2linear.pkl', 'rb') as fid:
     model=pickle.load(fid) 
df_test=pd.read_csv('original_test.csv')
print("Evaluation: ")
evaluate(model, df_test, hist=False)

In [None]:
def evaluate_cross(model, test_data, hist, class_score, macro_score, class_accuracy, class_recall, recall_macro, j):

    counts=test_data['label'].value_counts()
    test = Dataset(test_data)

    test_dataloader = torch.utils.data.DataLoader(test, batch_size=2)

    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    if use_cuda:

        model = model.cuda()

    total_acc_test = 0
    total_acc_class=[0,0,0]
    y_true, y_pred= [],[]
    
    l = len(test_data)/2
    i=0
    printProgressBar(0, l, prefix = 'Progress:', suffix = 'Complete', length = 50)

    with torch.no_grad():

        for test_input, test_label in test_dataloader:
            
              flag=0
              
              test_label = test_label.to(device)
              mask = test_input['attention_mask'].to(device)
              input_id = test_input['input_ids'].squeeze(1).to(device)

              output = model(input_id, mask)
              
              y_true.append(test_label.data[0].item())

              try:
                y_true.append(test_label.data[1].item())
              except:
                    flag=1
                    #print("batch of size 1")
              #It is for bert-base-german-case (mybert)
              if hist==False:
                
                y_pred.append(output.argmax(dim=1)[0].item())

                if flag==0:
                    y_pred.append(output.argmax(dim=1)[1].item()) 

                
                
                if(output.argmax(dim=1)[0] == test_label[0]):
                    total_acc_class[int(test_label.data[0].item())]+=1
                    
                if flag ==0:    
                    if(output.argmax(dim=1)[1] == test_label[1]):
                        total_acc_class[int(test_label.data[1].item())]+=1
                
                
                acc = (output.argmax(dim=1) == test_label).sum().item()
                total_acc_test += acc
             
              
              #For bert historical (historicalbert)
              if hist == True:
                y_pred.append( np.argmax(output.logits[0]))
                y_pred.append(np.argmax(output.logits[1]))
                if(np.argmax(output.logits[0]) == test_label[0]):
                        
                        total_acc_class[int(test_label.data[0].item())]+=1
                        
                if(np.argmax(output.logits[1]) == test_label[1]):

                        total_acc_class[int(test_label.data[1].item())]+=1
                
                
                acc = (np.argmax(output.logits) == test_label).sum().item()
                total_acc_test += acc

               #end bert historical
              
              # Update Progress Bar
              printProgressBar(i + 1, l, prefix = 'Progress:', suffix = 'Complete', length = 50)
              i+=1
    
    class_accuracy[j][0]=total_acc_class[0]/counts[0]
    class_accuracy[j][1]=total_acc_class[1]/counts[1]
    class_accuracy[j][2]=total_acc_class[2]/counts[2]
    
    score= f1_score(y_true, y_pred, average=None)
    class_score[j][0]=score[0]
    class_score[j][1]=score[1]
    class_score[j][2]=score[2]
  
    macro_score.append(f1_score(y_true, y_pred, average='macro'))
    
    rec= recall_score(y_true, y_pred, average=None)
    class_recall[j][0]=rec[0]
    class_recall[j][1]=rec[1]
    class_recall[j][2]=rec[2]

    recall_macro.append(recall_score(y_true, y_pred, average='macro'))
    
    

In [None]:
import os

models=[]

for files in os.listdir("/content/"):
  if files.endswith("pkl"): 
        # Your code comes here such as 
        models.append(files)


In [None]:
df_test=pd.read_csv('original_test.csv')

one_split=df_test[df_test['label']==1].sample(frac=1,random_state=42)
one_split=np.array_split(one_split,10)

two_split=df_test[df_test['label']==2].sample(frac=1,random_state=42)
two_split=np.array_split(two_split,10)

zero_split=df_test[df_test['label']==0].sample(frac=1,random_state=42)
zero_split=np.array_split(zero_split,10)


shuffled = df_test.sample(frac=1, random_state=42)
result = np.array_split(shuffled, 10)  

results=pd.DataFrame( columns=['model', 'F1_score class 0','F1_score class 1','F1_score class 2', 'Macro F1', 'Accuracy class 0', 'Accuracy class 1', 'Accuracy class 2' ], index=range(len(models)))

#loop over all the models in the current directory
for k,mod in enumerate(sorted(models)):

  print(k,mod)
  class_score, macro_score, class_accuracy, class_recall, recall_macro = [3*[None]]*10,[],[3*[None]]*10,[3*[None]]*10,[]

  with open(mod, 'rb') as fid:
      model=pickle.load(fid) 

  eval_0=[]
  eval_1=[]
  eval_2=[]
  eval_00=[]
  eval_01=[]
  eval_02=[]
  j=0

  for i in range(10):

      
      fold=pd.concat([zero_split[i], two_split[i], one_split[i]])
      fold=fold.sample(frac=1, random_state=42)
      fold.reset_index(drop=True, inplace=True)
      evaluate_cross(model, fold, False, class_score, macro_score, class_accuracy, class_recall, recall_macro, j)
      j+=1
      for cs in class_score:
          eval_0.append(cs[0])
          eval_1.append(cs[1])
          eval_2.append(cs[2])

      for cs in class_accuracy:
          eval_00.append(cs[0])
          eval_01.append(cs[1])
          eval_02.append(cs[2])

     
  results.loc[k,'model']=mod

  
  print("score: ", class_score)
  
  results.loc[k,'F1_score class 0']=np.mean(eval_0)
  results.loc[k,'F1_score class 1']=np.mean(eval_1)
  results.loc[k,'F1_score class 2']=np.mean(eval_2)
  results.loc[k,'Macro F1']=np.mean(macro_score)

  print("\n", class_accuracy)
  
  results.loc[k,'Accuracy class 0']=np.mean(eval_00)
  results.loc[k,'Accuracy class 1']=np.mean(eval_01)
  results.loc[k,'Accuracy class 2']=np.mean(eval_02)


In [None]:
print(results)
results.to_csv("results1.csv")

In [None]:
from google.colab import files as gf
gf.download('results.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
gf.download('GBert8.pkl')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>