In [2]:
# Downloading required libraries for bert

!pip install transformers
!pip install bert-for-tf2
!pip install sentencepiece

Collecting transformers
  Using cached https://files.pythonhosted.org/packages/27/3c/91ed8f5c4e7ef3227b4119200fc0ed4b4fd965b1f0172021c25701087825/transformers-3.0.2-py3-none-any.whl
Collecting sacremoses
  Using cached https://files.pythonhosted.org/packages/7d/34/09d19aff26edcc8eb2a01bed8e98f13a1537005d31e95233fd48216eed10/sacremoses-0.0.43.tar.gz
Collecting sentencepiece!=0.1.92
[?25l  Downloading https://files.pythonhosted.org/packages/d4/a4/d0a884c4300004a78cca907a6ff9a5e9fe4f090f5d95ab341c53d28cbc58/sentencepiece-0.1.91-cp36-cp36m-manylinux1_x86_64.whl (1.1MB)
[K     |████████████████████████████████| 1.1MB 3.5MB/s 
Collecting tokenizers==0.8.1.rc1
[?25l  Downloading https://files.pythonhosted.org/packages/40/d0/30d5f8d221a0ed981a186c8eb986ce1c94e3a6e87f994eae9f4aa5250217/tokenizers-0.8.1rc1-cp36-cp36m-manylinux1_x86_64.whl (3.0MB)
[K     |████████████████████████████████| 3.0MB 7.3MB/s 
Building wheels for collected packages: sacremoses
  Building wheel for sacremoses (setup.

In [3]:
import pandas as pd 

from transformers import BertTokenizer, BertForSequenceClassification

import tensorflow as tf
import tensorflow_hub as hub

import bert

import torch
from torch.utils.data import TensorDataset

from tqdm import tqdm

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [7]:
df = pd.read_csv('df_clean.csv')

df.head()

Unnamed: 0,data,section
0,hgs ihlal ge bilgi sorgula hizmet ihlal ge sor...,ekonomi
1,ankara sgtz bulun via twins isim kat bir kule ...,ekonomi
2,akla proje ortak rus enerji dev gazprom gel iz...,ekonomi
3,ermenistan polis erivan merkez dn gece itibare...,ekonomi
4,yldrm haziran tarih kararname bugn resmi gaze...,ekonomi


In [8]:
df.section.value_counts()

kultursanat    600
ekonomi        600
saglık         600
teknoloji      600
spor           600
siyaset        600
Name: section, dtype: int64

In [9]:
le = LabelEncoder()
df.section = le.fit_transform(df.section)

df.head()

Unnamed: 0,data,section
0,hgs ihlal ge bilgi sorgula hizmet ihlal ge sor...,0
1,ankara sgtz bulun via twins isim kat bir kule ...,0
2,akla proje ortak rus enerji dev gazprom gel iz...,0
3,ermenistan polis erivan merkez dn gece itibare...,0
4,yldrm haziran tarih kararname bugn resmi gaze...,0


In [10]:
# train-val split
X_train, X_val, y_train, y_val = train_test_split(df.index.values, 
                                                  df.section.values, 
                                                  test_size=0.15, 
                                                  random_state=17, 
                                                  stratify=df.section.values)

# with stratify parameter, We make sure that the train and val datasets contain an equal number of instances for each class.

In [11]:
# we add a new column to data that specifies whether the row is in train or val dataset
df['data_type'] = ['not_set'] * df.shape[0]
df.loc[X_train, 'data_type'] = 'train'
df.loc[X_val, 'data_type'] = 'val'
df.groupby(['section', 'data_type']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,data
section,data_type,Unnamed: 2_level_1
0,train,510
0,val,90
1,train,510
1,val,90
2,train,510
2,val,90
3,train,510
3,val,90
4,train,510
4,val,90


In [12]:
# reading bert tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', 
                                          do_lower_case=True)

# we encode train and val datasets text data with BertTokenizer's batch_encode_plus method
encoded_data_train = tokenizer.batch_encode_plus(
    df[df.data_type=='train'].data.values, 
    add_special_tokens=True, 
    return_attention_mask=True, 
    pad_to_max_length=True, 
    max_length=256, 
    return_tensors='pt'
)

encoded_data_val = tokenizer.batch_encode_plus(
    df[df.data_type=='val'].data.values, 
    add_special_tokens=True, 
    return_attention_mask=True, 
    pad_to_max_length=True, 
    max_length=256, 
    return_tensors='pt'
)

# before creating TensorDatasets, we adapt the inputs to the BERT format
# the inputs should have 3 columns => 'input_ids', 'attention_masks' and 'labels'
# input_ids and attention_masks can be reachable from encoded data
input_ids_train = encoded_data_train['input_ids']
attention_masks_train = encoded_data_train['attention_mask']
labels_train = torch.tensor(df[df.data_type=='train'].section.values)

input_ids_val = encoded_data_val['input_ids']
attention_masks_val = encoded_data_val['attention_mask']
labels_val = torch.tensor(df[df.data_type=='val'].section.values)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=231508.0, style=ProgressStyle(descripti…

Truncation was not explicitely activated but `max_length` is provided a specific value, please use `truncation=True` to explicitely truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.





Truncation was not explicitely activated but `max_length` is provided a specific value, please use `truncation=True` to explicitely truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


In [13]:
# we create TensorDatasets for train and val data in BERT format
dataset_train = TensorDataset(input_ids_train, attention_masks_train, labels_train)
dataset_val = TensorDataset(input_ids_val, attention_masks_val, labels_val)


print(len(dataset_train))
print(len(dataset_val))

3060
540


In [14]:
# reading the BERT model
# num_labels parameter is the number of outputs that neural network has
# in other words it is the number of our label classes
# we can reach this value with label encoder object
model = BertForSequenceClassification.from_pretrained("bert-base-uncased",
                                                      num_labels=len(le.classes_),
                                                      output_attentions=False,
                                                      output_hidden_states=False)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=433.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=440473133.0, style=ProgressStyle(descri…




Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [15]:
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler

# we use RandomSampler for getting random samples while training the model (it is for train dataset)
# while testing our model, it is not important whether the inputs are random or not, so we use SequentialSampler for val dataset

batch_size = 32

# by using TensorDataset, we create dataloader for train and val datasets
dataloader_train = DataLoader(dataset_train, 
                              sampler=RandomSampler(dataset_train), 
                              batch_size=batch_size)

dataloader_validation = DataLoader(dataset_val, 
                                   sampler=SequentialSampler(dataset_val), 
                                   batch_size=batch_size)

In [16]:
from transformers import AdamW, get_linear_schedule_with_warmup


# we use AdamW optimizer
optimizer = AdamW(model.parameters(),
                  lr=1e-5, 
                  eps=1e-8)

# set the epoch value
epochs = 5

scheduler = get_linear_schedule_with_warmup(optimizer, 
                                            num_warmup_steps=0,
                                            num_training_steps=len(dataloader_train)*epochs)

In [17]:
import numpy as np
from sklearn.metrics import f1_score

def f1_score_func(preds, labels):

    """
    This method is for calculating one of the success metrics weighted f1 score.

    Args:
        preds (numpy.ndarray): predicted label values (class)
        labels (numpy.ndarray): true label value (class)

    Returns:
        f1_score (float): f1 score value
    """

    preds_flat = np.argmax(preds, axis=1).flatten() # with flatten(), we make a 2-D list into a 1-D list (list[list] ---flatten>>> list[])
    labels_flat = labels.flatten()
    return f1_score(labels_flat, preds_flat, average='weighted')

# we create a dictionary that keeps class name with its label encoded value
# example: {'ekonomi': 0, 'kultursanat': 1 ...}
label_dict = dict(zip(le.classes_, le.transform(le.classes_)))

def accuracy_per_class(preds, labels):
    
    """
    This method is for calculating one of the success metrics accuracy.
    It calculates accuracy value for each classes and also total accuracy.
    And it prints each values.

    Args:
        preds (numpy.ndarray): predicted label values (class)
        labels (numpy.ndarray): true label value (class)

    Returns: None
    """

    label_dict_inverse = {v: k for k, v in label_dict.items()}
    
    preds_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()

    # total accuracy = count of total true predictions / count of total predictions
    total_pred_positive = 0 # to calculate total accuracy, we keep the total count of prediction that is correct
    total = 0 # also, we keep the count of total predictions

    print('\n')

    for label in np.unique(labels_flat):
        y_preds = preds_flat[labels_flat==label]
        y_true = labels_flat[labels_flat==label]
        print(f'Class: {label_dict_inverse[label]}')
        print(f'Accuracy: {len(y_preds[y_preds==label])}/{len(y_true)}\n')

        total_pred_positive += len(y_preds[y_preds==label])
        total += len(y_true)

    print('Total accuracy: ', total_pred_positive/total)  

In [18]:
import random

seed_val = 17
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

# we define our device as cuda because we use gpu 
# if you are using cpu instead, you need to change 'cuda' to 'cpu'
device = torch.device('cuda') 
model.to(device) # we send our model to device (cuda)

print(device)

cuda


In [19]:
def evaluate(dataloader_val):

    """
    This method is for testing the model which we trained with validation data.
    It produce predictions and calculates the loss value.

    Args:
        dataloader_val (torch.utils.data.dataloader.DataLoader): dataloader for validation dataset

    Returns:
        loss_val_avg (float): average loss value of validation dataset
        predictions (numpy.ndarray): predicted label values (class)
        true_vals (numpy.ndarray): true label value (class)
    """

    model.eval() # we set our model to eval mode. (this is the mode that we test our model with val dataset)
    
    loss_val_total = 0
    predictions, true_vals = [], []
    
    for batch in tqdm(dataloader_val):
        
        batch = tuple(b.to(device) for b in batch)
        
        # BERT's input format that I was mantioned above
        inputs = {'input_ids':      batch[0],
                  'attention_mask': batch[1],
                  'labels':         batch[2],
                 }

        with torch.no_grad():        
            outputs = model(**inputs)
            
        loss = outputs[0] # loss value
        logits = outputs[1]
        loss_val_total += loss.item()

        logits = logits.detach().cpu().numpy()
        label_ids = inputs['labels'].cpu().numpy()
        predictions.append(logits)
        true_vals.append(label_ids)
    
    loss_val_avg = loss_val_total/len(dataloader_val) 
    
    predictions = np.concatenate(predictions, axis=0)
    true_vals = np.concatenate(true_vals, axis=0)
            
    return loss_val_avg, predictions, true_vals

In [20]:
for epoch in tqdm(range(1, epochs+1)):

    model.train() # we set our model to train mode. (this is the mode that we train our model with train dataset)
    
    loss_train_total = 0

    progress_bar = tqdm(dataloader_train, desc='Epoch {:1d}'.format(epoch), leave=False, disable=False)

    for batch in progress_bar:

        model.zero_grad()
        
        batch = tuple(b.to(device) for b in batch)
        
        # BERT's input format that I was mantioned above
        inputs = {'input_ids':      batch[0],
                  'attention_mask': batch[1],
                  'labels':         batch[2],
                 }       

        outputs = model(**inputs)
        
        loss = outputs[0] # loss value
        loss_train_total += loss.item() 
        loss.backward() # backpropogation

        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

        optimizer.step()
        scheduler.step()
        
        progress_bar.set_postfix({'training_loss': '{:.3f}'.format(loss.item()/len(batch))})
      
    # after this loop one epoch will be trained.
         
    # we save the model after each epoch, so we can use whichever we want
    torch.save(model.state_dict(), f'finetuned_BERT_epoch_{epoch}.model')
        
    tqdm.write(f'\nEpoch {epoch}')
    
    loss_train_avg = loss_train_total/len(dataloader_train)            
    tqdm.write(f'Training loss: {loss_train_avg}') # writing the training loss value after each epoch
    
    val_loss, predictions, true_vals = evaluate(dataloader_validation) # calculating the validation loss and predicting the labels
    val_f1 = f1_score_func(predictions, true_vals) # calculating the f1 score
    tqdm.write(f'Validation loss: {val_loss}')  # writing the validation loss value after each epoch
    tqdm.write(f'F1 Score (Weighted): {val_f1}') # writing the weighted f1 score value after each epoch

# training begins

  0%|          | 0/5 [00:00<?, ?it/s]
Epoch 1:   0%|          | 0/96 [00:00<?, ?it/s][A
Epoch 1:   0%|          | 0/96 [00:02<?, ?it/s, training_loss=0.638][A
Epoch 1:   1%|          | 1/96 [00:02<04:26,  2.80s/it, training_loss=0.638][A
Epoch 1:   1%|          | 1/96 [00:05<04:26,  2.80s/it, training_loss=0.579][A
Epoch 1:   2%|▏         | 2/96 [00:05<04:13,  2.69s/it, training_loss=0.579][A
Epoch 1:   2%|▏         | 2/96 [00:07<04:13,  2.69s/it, training_loss=0.598][A
Epoch 1:   3%|▎         | 3/96 [00:07<04:02,  2.61s/it, training_loss=0.598][A
Epoch 1:   3%|▎         | 3/96 [00:10<04:02,  2.61s/it, training_loss=0.597][A
Epoch 1:   4%|▍         | 4/96 [00:10<03:55,  2.56s/it, training_loss=0.597][A
Epoch 1:   4%|▍         | 4/96 [00:12<03:55,  2.56s/it, training_loss=0.575][A
Epoch 1:   5%|▌         | 5/96 [00:12<03:49,  2.52s/it, training_loss=0.575][A
Epoch 1:   5%|▌         | 5/96 [00:14<03:49,  2.52s/it, training_loss=0.588][A
Epoch 1:   6%|▋         | 6/96 [00:14<0


Epoch 1
Training loss: 1.4242615662515163



  6%|▌         | 1/17 [00:00<00:14,  1.11it/s][A
 12%|█▏        | 2/17 [00:01<00:13,  1.10it/s][A
 18%|█▊        | 3/17 [00:02<00:12,  1.09it/s][A
 24%|██▎       | 4/17 [00:03<00:11,  1.09it/s][A
 29%|██▉       | 5/17 [00:04<00:10,  1.09it/s][A
 35%|███▌      | 6/17 [00:05<00:10,  1.09it/s][A
 41%|████      | 7/17 [00:06<00:09,  1.09it/s][A
 47%|████▋     | 8/17 [00:07<00:08,  1.08it/s][A
 53%|█████▎    | 9/17 [00:08<00:07,  1.08it/s][A
 59%|█████▉    | 10/17 [00:09<00:06,  1.08it/s][A
 65%|██████▍   | 11/17 [00:10<00:05,  1.09it/s][A
 71%|███████   | 12/17 [00:11<00:04,  1.09it/s][A
 76%|███████▋  | 13/17 [00:11<00:03,  1.09it/s][A
 82%|████████▏ | 14/17 [00:12<00:02,  1.09it/s][A
 88%|████████▊ | 15/17 [00:13<00:01,  1.09it/s][A
 94%|█████████▍| 16/17 [00:14<00:00,  1.09it/s][A
100%|██████████| 17/17 [00:15<00:00,  1.09it/s]
 20%|██        | 1/5 [04:16<17:04, 256.13s/it]
Epoch 2:   0%|          | 0/96 [00:00<?, ?it/s][A

Validation loss: 1.0219465231194216
F1 Score (Weighted): 0.6556919134697533



Epoch 2:   0%|          | 0/96 [00:02<?, ?it/s, training_loss=0.360][A
Epoch 2:   1%|          | 1/96 [00:02<03:58,  2.51s/it, training_loss=0.360][A
Epoch 2:   1%|          | 1/96 [00:05<03:58,  2.51s/it, training_loss=0.333][A
Epoch 2:   2%|▏         | 2/96 [00:05<03:56,  2.51s/it, training_loss=0.333][A
Epoch 2:   2%|▏         | 2/96 [00:07<03:56,  2.51s/it, training_loss=0.366][A
Epoch 2:   3%|▎         | 3/96 [00:07<03:53,  2.52s/it, training_loss=0.366][A
Epoch 2:   3%|▎         | 3/96 [00:10<03:53,  2.52s/it, training_loss=0.354][A
Epoch 2:   4%|▍         | 4/96 [00:10<03:51,  2.52s/it, training_loss=0.354][A
Epoch 2:   4%|▍         | 4/96 [00:12<03:51,  2.52s/it, training_loss=0.349][A
Epoch 2:   5%|▌         | 5/96 [00:12<03:49,  2.52s/it, training_loss=0.349][A
Epoch 2:   5%|▌         | 5/96 [00:15<03:49,  2.52s/it, training_loss=0.319][A
Epoch 2:   6%|▋         | 6/96 [00:15<03:46,  2.52s/it, training_loss=0.319][A
Epoch 2:   6%|▋         | 6/96 [00:17<03:46,  2


Epoch 2
Training loss: 0.8287055486192306



  6%|▌         | 1/17 [00:00<00:14,  1.10it/s][A
 12%|█▏        | 2/17 [00:01<00:13,  1.10it/s][A
 18%|█▊        | 3/17 [00:02<00:12,  1.10it/s][A
 24%|██▎       | 4/17 [00:03<00:11,  1.09it/s][A
 29%|██▉       | 5/17 [00:04<00:10,  1.09it/s][A
 35%|███▌      | 6/17 [00:05<00:10,  1.09it/s][A
 41%|████      | 7/17 [00:06<00:09,  1.08it/s][A
 47%|████▋     | 8/17 [00:07<00:08,  1.08it/s][A
 53%|█████▎    | 9/17 [00:08<00:07,  1.08it/s][A
 59%|█████▉    | 10/17 [00:09<00:06,  1.09it/s][A
 65%|██████▍   | 11/17 [00:10<00:05,  1.08it/s][A
 71%|███████   | 12/17 [00:11<00:04,  1.08it/s][A
 76%|███████▋  | 13/17 [00:11<00:03,  1.09it/s][A
 82%|████████▏ | 14/17 [00:12<00:02,  1.08it/s][A
 88%|████████▊ | 15/17 [00:13<00:01,  1.08it/s][A
 94%|█████████▍| 16/17 [00:14<00:00,  1.08it/s][A
100%|██████████| 17/17 [00:15<00:00,  1.09it/s]
 40%|████      | 2/5 [08:33<12:49, 256.65s/it]
Epoch 3:   0%|          | 0/96 [00:00<?, ?it/s][A

Validation loss: 0.6644105078542933
F1 Score (Weighted): 0.8309847139858879



Epoch 3:   0%|          | 0/96 [00:02<?, ?it/s, training_loss=0.208][A
Epoch 3:   1%|          | 1/96 [00:02<03:59,  2.52s/it, training_loss=0.208][A
Epoch 3:   1%|          | 1/96 [00:05<03:59,  2.52s/it, training_loss=0.174][A
Epoch 3:   2%|▏         | 2/96 [00:05<03:56,  2.52s/it, training_loss=0.174][A
Epoch 3:   2%|▏         | 2/96 [00:07<03:56,  2.52s/it, training_loss=0.298][A
Epoch 3:   3%|▎         | 3/96 [00:07<03:54,  2.52s/it, training_loss=0.298][A
Epoch 3:   3%|▎         | 3/96 [00:10<03:54,  2.52s/it, training_loss=0.248][A
Epoch 3:   4%|▍         | 4/96 [00:10<03:51,  2.52s/it, training_loss=0.248][A
Epoch 3:   4%|▍         | 4/96 [00:12<03:51,  2.52s/it, training_loss=0.156][A
Epoch 3:   5%|▌         | 5/96 [00:12<03:49,  2.52s/it, training_loss=0.156][A
Epoch 3:   5%|▌         | 5/96 [00:15<03:49,  2.52s/it, training_loss=0.172][A
Epoch 3:   6%|▋         | 6/96 [00:15<03:46,  2.52s/it, training_loss=0.172][A
Epoch 3:   6%|▋         | 6/96 [00:17<03:46,  2


Epoch 3
Training loss: 0.5584449792901675



  6%|▌         | 1/17 [00:00<00:14,  1.10it/s][A
 12%|█▏        | 2/17 [00:01<00:13,  1.10it/s][A
 18%|█▊        | 3/17 [00:02<00:12,  1.09it/s][A
 24%|██▎       | 4/17 [00:03<00:11,  1.09it/s][A
 29%|██▉       | 5/17 [00:04<00:11,  1.09it/s][A
 35%|███▌      | 6/17 [00:05<00:10,  1.09it/s][A
 41%|████      | 7/17 [00:06<00:09,  1.09it/s][A
 47%|████▋     | 8/17 [00:07<00:08,  1.09it/s][A
 53%|█████▎    | 9/17 [00:08<00:07,  1.09it/s][A
 59%|█████▉    | 10/17 [00:09<00:06,  1.09it/s][A
 65%|██████▍   | 11/17 [00:10<00:05,  1.09it/s][A
 71%|███████   | 12/17 [00:11<00:04,  1.09it/s][A
 76%|███████▋  | 13/17 [00:11<00:03,  1.09it/s][A
 82%|████████▏ | 14/17 [00:12<00:02,  1.09it/s][A
 88%|████████▊ | 15/17 [00:13<00:01,  1.08it/s][A
 94%|█████████▍| 16/17 [00:14<00:00,  1.08it/s][A
100%|██████████| 17/17 [00:15<00:00,  1.09it/s]
 60%|██████    | 3/5 [12:51<08:34, 257.05s/it]
Epoch 4:   0%|          | 0/96 [00:00<?, ?it/s][A

Validation loss: 0.5136461108922958
F1 Score (Weighted): 0.873722961670262



Epoch 4:   0%|          | 0/96 [00:02<?, ?it/s, training_loss=0.118][A
Epoch 4:   1%|          | 1/96 [00:02<04:00,  2.53s/it, training_loss=0.118][A
Epoch 4:   1%|          | 1/96 [00:05<04:00,  2.53s/it, training_loss=0.117][A
Epoch 4:   2%|▏         | 2/96 [00:05<03:57,  2.53s/it, training_loss=0.117][A
Epoch 4:   2%|▏         | 2/96 [00:07<03:57,  2.53s/it, training_loss=0.178][A
Epoch 4:   3%|▎         | 3/96 [00:07<03:54,  2.52s/it, training_loss=0.178][A
Epoch 4:   3%|▎         | 3/96 [00:10<03:54,  2.52s/it, training_loss=0.124][A
Epoch 4:   4%|▍         | 4/96 [00:10<03:52,  2.53s/it, training_loss=0.124][A
Epoch 4:   4%|▍         | 4/96 [00:12<03:52,  2.53s/it, training_loss=0.136][A
Epoch 4:   5%|▌         | 5/96 [00:12<03:49,  2.53s/it, training_loss=0.136][A
Epoch 4:   5%|▌         | 5/96 [00:15<03:49,  2.53s/it, training_loss=0.135][A
Epoch 4:   6%|▋         | 6/96 [00:15<03:47,  2.53s/it, training_loss=0.135][A
Epoch 4:   6%|▋         | 6/96 [00:17<03:47,  2


Epoch 4
Training loss: 0.41105999595796067



  6%|▌         | 1/17 [00:00<00:14,  1.08it/s][A
 12%|█▏        | 2/17 [00:01<00:13,  1.08it/s][A
 18%|█▊        | 3/17 [00:02<00:12,  1.08it/s][A
 24%|██▎       | 4/17 [00:03<00:11,  1.09it/s][A
 29%|██▉       | 5/17 [00:04<00:11,  1.09it/s][A
 35%|███▌      | 6/17 [00:05<00:10,  1.09it/s][A
 41%|████      | 7/17 [00:06<00:09,  1.09it/s][A
 47%|████▋     | 8/17 [00:07<00:08,  1.09it/s][A
 53%|█████▎    | 9/17 [00:08<00:07,  1.09it/s][A
 59%|█████▉    | 10/17 [00:09<00:06,  1.09it/s][A
 65%|██████▍   | 11/17 [00:10<00:05,  1.09it/s][A
 71%|███████   | 12/17 [00:11<00:04,  1.09it/s][A
 76%|███████▋  | 13/17 [00:11<00:03,  1.08it/s][A
 82%|████████▏ | 14/17 [00:12<00:02,  1.08it/s][A
 88%|████████▊ | 15/17 [00:13<00:01,  1.08it/s][A
 94%|█████████▍| 16/17 [00:14<00:00,  1.08it/s][A
100%|██████████| 17/17 [00:15<00:00,  1.09it/s]
 80%|████████  | 4/5 [17:10<04:17, 257.47s/it]
Epoch 5:   0%|          | 0/96 [00:00<?, ?it/s][A

Validation loss: 0.47325433878337636
F1 Score (Weighted): 0.8812405608892372



Epoch 5:   0%|          | 0/96 [00:02<?, ?it/s, training_loss=0.131][A
Epoch 5:   1%|          | 1/96 [00:02<03:58,  2.51s/it, training_loss=0.131][A
Epoch 5:   1%|          | 1/96 [00:05<03:58,  2.51s/it, training_loss=0.122][A
Epoch 5:   2%|▏         | 2/96 [00:05<03:56,  2.51s/it, training_loss=0.122][A
Epoch 5:   2%|▏         | 2/96 [00:07<03:56,  2.51s/it, training_loss=0.060][A
Epoch 5:   3%|▎         | 3/96 [00:07<03:53,  2.51s/it, training_loss=0.060][A
Epoch 5:   3%|▎         | 3/96 [00:10<03:53,  2.51s/it, training_loss=0.087][A
Epoch 5:   4%|▍         | 4/96 [00:10<03:51,  2.52s/it, training_loss=0.087][A
Epoch 5:   4%|▍         | 4/96 [00:12<03:51,  2.52s/it, training_loss=0.108][A
Epoch 5:   5%|▌         | 5/96 [00:12<03:49,  2.52s/it, training_loss=0.108][A
Epoch 5:   5%|▌         | 5/96 [00:15<03:49,  2.52s/it, training_loss=0.108][A
Epoch 5:   6%|▋         | 6/96 [00:15<03:47,  2.52s/it, training_loss=0.108][A
Epoch 5:   6%|▋         | 6/96 [00:17<03:47,  2


Epoch 5
Training loss: 0.35155836461732787



  6%|▌         | 1/17 [00:00<00:14,  1.10it/s][A
 12%|█▏        | 2/17 [00:01<00:13,  1.09it/s][A
 18%|█▊        | 3/17 [00:02<00:12,  1.09it/s][A
 24%|██▎       | 4/17 [00:03<00:11,  1.09it/s][A
 29%|██▉       | 5/17 [00:04<00:11,  1.09it/s][A
 35%|███▌      | 6/17 [00:05<00:10,  1.09it/s][A
 41%|████      | 7/17 [00:06<00:09,  1.09it/s][A
 47%|████▋     | 8/17 [00:07<00:08,  1.08it/s][A
 53%|█████▎    | 9/17 [00:08<00:07,  1.08it/s][A
 59%|█████▉    | 10/17 [00:09<00:06,  1.08it/s][A
 65%|██████▍   | 11/17 [00:10<00:05,  1.09it/s][A
 71%|███████   | 12/17 [00:11<00:04,  1.09it/s][A
 76%|███████▋  | 13/17 [00:11<00:03,  1.09it/s][A
 82%|████████▏ | 14/17 [00:12<00:02,  1.09it/s][A
 88%|████████▊ | 15/17 [00:13<00:01,  1.09it/s][A
 94%|█████████▍| 16/17 [00:14<00:00,  1.09it/s][A
100%|██████████| 17/17 [00:15<00:00,  1.09it/s]
100%|██████████| 5/5 [21:28<00:00, 257.76s/it]

Validation loss: 0.4344688198145698
F1 Score (Weighted): 0.8830702613271352





In [22]:
# we reread our BERT model in case it has changed before
model = BertForSequenceClassification.from_pretrained("bert-base-uncased",
                                                      num_labels=len(label_dict),
                                                      output_attentions=False,
                                                      output_hidden_states=False)

model.to(device); # we send our model to device (cuda)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [23]:
model_name = 'finetuned_BERT_epoch_' + str(epochs) +'.model' # read the model that we saved while training, you can read any epoch
model.load_state_dict(torch.load(model_name, map_location=torch.device('cpu')))

_, predictions, true_vals = evaluate(dataloader_validation) # calculating predictions and getting true labels

accuracy_per_class(predictions, true_vals) # printing the accuracy of each class and the total accuracy

100%|██████████| 17/17 [00:15<00:00,  1.10it/s]



Class: ekonomi
Accuracy: 75/90

Class: kultursanat
Accuracy: 80/90

Class: saglık
Accuracy: 81/90

Class: siyaset
Accuracy: 83/90

Class: spor
Accuracy: 86/90

Class: teknoloji
Accuracy: 72/90

Total accuracy:  0.8833333333333333



