In [34]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf
import torch
from sklearn.preprocessing import LabelEncoder
from transformers import BertTokenizer
from torch.utils.data import TensorDataset, random_split
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from transformers import BertForSequenceClassification, AdamW, BertConfig, Adafactor
from transformers import get_linear_schedule_with_warmup
import datetime
import random
import seaborn as sns
import numpy as np
import time
import matplotlib.pyplot as plt


from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score


In [35]:
tokenizer = BertTokenizer.from_pretrained('dbmdz/bert-base-turkish-128k-uncased', do_lower_case=True)


In [36]:
df = pd.read_csv('datasets/clean_data.csv')

df.head(10)
df.info()

```
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3388 entries, 0 to 3387
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   scraped_id   3388 non-null   int64 
 1   text         3388 non-null   object
 2   tagger       3375 non-null   object
 3   tagged_date  3388 non-null   object
 4   label        3388 non-null   object
 5   clean_data   3388 non-null   object
dtypes: int64(1), object(5)
memory usage: 158.9+ KB
```

In [37]:
# check GPU
device_name = tf.test.gpu_device_name()
if device_name == '/device:GPU:0':
    device = torch.device("cuda")
    print('GPU:', torch.cuda.get_device_name(0))
else:
    raise SystemError('GPU device not found')

```
GPU: Tesla P100-PCIE-16GB
2022-08-16 20:37:12.159662: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-16 20:37:12.160856: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-16 20:37:12.161558: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-16 20:37:12.162267: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-16 20:37:12.162857: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-16 20:37:12.163383: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /device:GPU:0 with 15047 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0
```


In [38]:
df.groupby('label').size()

```
label
Cinsiyetçilik     601
Irkçılık          490
Kızdırma          910
Nötr             1387
dtype: int64
```

In [39]:
df['scraped_id'].size

```
3388
```

In [40]:
df['label'] = LabelEncoder().fit_transform(df['label'])

In [None]:
training = df.groupby('label').apply(lambda x : x.sample(frac = 0.8))
test = pd.concat([df,training]).drop_duplicates(keep=False)
result = test.groupby('text')['label'].count().sum()
print(df['scraped_id'].size - result)

In [42]:
training

In [43]:
test

In [44]:
print("Training: ", len(training))
print("Test: ", len(test))

```
Training:  2372
Test:  1016
```

In [45]:
training_texts = training.clean_data.values
training_labels = training.label.values

In [46]:
training_labels

```
array([0, 0, 0, ..., 3, 3, 3])
```

In [47]:
input_ids = []
attention_masks = []
max_len = 100


for text in training_texts:
    encoded_dict = tokenizer.encode_plus(
                        str(text),                     
                        add_special_tokens = True,
                        max_length = max_len,      
                        pad_to_max_length = True,
                        return_attention_mask = True, 
                        return_tensors = 'pt',
                   )
    
    input_ids.append(encoded_dict['input_ids'])
    attention_masks.append(encoded_dict['attention_mask'])

input_ids = torch.cat(input_ids, dim=0)
attention_masks = torch.cat(attention_masks, dim=0)
labels = torch.tensor(training_labels)

print('Original: ', training_texts[0])
print('Token IDs:', input_ids[0])

```
Original:  boyle pasta yapamayan da kendine kizim demesin
Token IDs: tensor([    2, 21181,  9692, 43783,  1972,  4852, 68907, 86075,     3,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0])
```

In [48]:
train_dataset = TensorDataset(input_ids, attention_masks, labels)

In [49]:
batch_size = 16

train_dataloader = DataLoader(
            train_dataset,  
            sampler = RandomSampler(train_dataset), 
            batch_size = batch_size 
        )

In [50]:
number_of_categories = len(df['label'].unique())

In [51]:
model = BertForSequenceClassification.from_pretrained(
    "dbmdz/bert-base-turkish-128k-uncased",
    num_labels = number_of_categories, 
    output_attentions = False,
    output_hidden_states = False,
)

model.cuda()

```
BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(128000, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
          )
          (intermediate): BertIntermediate(
            (dense): Linear(in_features=768, out_features=3072, bias=True)
            (intermediate_act_fn): GELUActivation()
          )
          (output): BertOutput(
            (dense): Linear(in_features=3072, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
        (1): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
          )
          (intermediate): BertIntermediate(
            (dense): Linear(in_features=768, out_features=3072, bias=True)
            (intermediate_act_fn): GELUActivation()
          )
          (output): BertOutput(
            (dense): Linear(in_features=3072, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
        (2): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
          )
          (intermediate): BertIntermediate(
            (dense): Linear(in_features=768, out_features=3072, bias=True)
            (intermediate_act_fn): GELUActivation()
          )
          (output): BertOutput(
            (dense): Linear(in_features=3072, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
        (3): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
          )
          (intermediate): BertIntermediate(
            (dense): Linear(in_features=768, out_features=3072, bias=True)
            (intermediate_act_fn): GELUActivation()
          )
          (output): BertOutput(
            (dense): Linear(in_features=3072, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
        (4): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
          )
          (intermediate): BertIntermediate(
            (dense): Linear(in_features=768, out_features=3072, bias=True)
            (intermediate_act_fn): GELUActivation()
          )
          (output): BertOutput(
            (dense): Linear(in_features=3072, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
        (5): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
          )
          (intermediate): BertIntermediate(
            (dense): Linear(in_features=768, out_features=3072, bias=True)
            (intermediate_act_fn): GELUActivation()
          )
          (output): BertOutput(
            (dense): Linear(in_features=3072, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
        (6): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
          )
          (intermediate): BertIntermediate(
            (dense): Linear(in_features=768, out_features=3072, bias=True)
            (intermediate_act_fn): GELUActivation()
          )
          (output): BertOutput(
            (dense): Linear(in_features=3072, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
        (7): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
          )
          (intermediate): BertIntermediate(
            (dense): Linear(in_features=768, out_features=3072, bias=True)
            (intermediate_act_fn): GELUActivation()
          )
          (output): BertOutput(
            (dense): Linear(in_features=3072, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
        (8): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
          )
          (intermediate): BertIntermediate(
            (dense): Linear(in_features=768, out_features=3072, bias=True)
            (intermediate_act_fn): GELUActivation()
          )
          (output): BertOutput(
            (dense): Linear(in_features=3072, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
        (9): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
          )
          (intermediate): BertIntermediate(
            (dense): Linear(in_features=768, out_features=3072, bias=True)
            (intermediate_act_fn): GELUActivation()
          )
          (output): BertOutput(
            (dense): Linear(in_features=3072, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
        (10): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
          )
          (intermediate): BertIntermediate(
            (dense): Linear(in_features=768, out_features=3072, bias=True)
            (intermediate_act_fn): GELUActivation()
          )
          (output): BertOutput(
            (dense): Linear(in_features=3072, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
        (11): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
          )
          (intermediate): BertIntermediate(
            (dense): Linear(in_features=768, out_features=3072, bias=True)
            (intermediate_act_fn): GELUActivation()
          )
          (output): BertOutput(
            (dense): Linear(in_features=3072, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
    )
    (pooler): BertPooler(
      (dense): Linear(in_features=768, out_features=768, bias=True)
      (activation): Tanh()
    )
  )
  (dropout): Dropout(p=0.1, inplace=False)
  (classifier): Linear(in_features=768, out_features=4, bias=True)
)
```

In [52]:
    epochs = 8 #denemelerim sonucu kayıp 0 a 8. epochta yaklaşıyor

    optimizer = AdamW(model.parameters(),
                      lr = 5e-5,
                      eps = 1e-8 
                    )
    #bu optimazer kaldırılacakmış yakında yeni versiyona uygun torch.optim.AdamW kullanalım.

    total_steps = len(train_dataloader) * epochs
    scheduler = get_linear_schedule_with_warmup(optimizer, 
                                                num_warmup_steps = 0,
                                                num_training_steps = total_steps)

In [53]:
def format_time(elapsed):
    elapsed_rounded = int(round((elapsed)))
    return str(datetime.timedelta(seconds=elapsed_rounded))

In [54]:
seed_val = 3000

random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

training_stats = []
total_t0 = time.time()

for epoch_i in range(0, epochs):
    print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
    t0 = time.time()
    total_train_loss = 0
    model.train()
    
    for step, batch in enumerate(train_dataloader):
        if step % 10 == 0 and not step == 0:
            elapsed = format_time(time.time() - t0)
            print('Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(step, len(train_dataloader), elapsed))

        b_input_ids = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        b_labels = batch[2].to(device)

        model.zero_grad()        
        output = model(b_input_ids, 
                             token_type_ids=None, 
                             attention_mask=b_input_mask, 
                             labels=b_labels)
        loss = output['loss']
        logits = output['logits']
        total_train_loss += loss.item()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        scheduler.step()

    avg_train_loss = total_train_loss / len(train_dataloader)            
    training_time = format_time(time.time() - t0)

    print("Average training loss: {0:.2f}".format(avg_train_loss))
    print("Training epoch took: {:}".format(training_time))

    training_stats.append(
        {
            'epoch': epoch_i + 1,
            'Training Loss': avg_train_loss,
            'Training Time': training_time,
        }
    )

print("Training completed in {:} (h:mm:ss)".format(format_time(time.time()-total_t0)))

```
======== Epoch 1 / 8 ========
Batch    10  of    149.    Elapsed: 0:00:01.
Batch    20  of    149.    Elapsed: 0:00:02.
Batch    30  of    149.    Elapsed: 0:00:03.
Batch    40  of    149.    Elapsed: 0:00:04.
Batch    50  of    149.    Elapsed: 0:00:04.
Batch    60  of    149.    Elapsed: 0:00:05.
Batch    70  of    149.    Elapsed: 0:00:06.
Batch    80  of    149.    Elapsed: 0:00:07.
Batch    90  of    149.    Elapsed: 0:00:08.
Batch   100  of    149.    Elapsed: 0:00:09.
Batch   110  of    149.    Elapsed: 0:00:10.
Batch   120  of    149.    Elapsed: 0:00:11.
Batch   130  of    149.    Elapsed: 0:00:11.
Batch   140  of    149.    Elapsed: 0:00:12.
Average training loss: 0.67
Training epoch took: 0:00:13
======== Epoch 2 / 8 ========
Batch    10  of    149.    Elapsed: 0:00:01.
Batch    20  of    149.    Elapsed: 0:00:02.
Batch    30  of    149.    Elapsed: 0:00:03.
Batch    40  of    149.    Elapsed: 0:00:04.
Batch    50  of    149.    Elapsed: 0:00:04.
Batch    60  of    149.    Elapsed: 0:00:05.
Batch    70  of    149.    Elapsed: 0:00:06.
Batch    80  of    149.    Elapsed: 0:00:07.
Batch    90  of    149.    Elapsed: 0:00:08.
Batch   100  of    149.    Elapsed: 0:00:09.
Batch   110  of    149.    Elapsed: 0:00:10.
Batch   120  of    149.    Elapsed: 0:00:10.
Batch   130  of    149.    Elapsed: 0:00:11.
Batch   140  of    149.    Elapsed: 0:00:12.
Average training loss: 0.32
Training epoch took: 0:00:13
======== Epoch 3 / 8 ========
Batch    10  of    149.    Elapsed: 0:00:01.
Batch    20  of    149.    Elapsed: 0:00:02.
Batch    30  of    149.    Elapsed: 0:00:03.
Batch    40  of    149.    Elapsed: 0:00:03.
Batch    50  of    149.    Elapsed: 0:00:04.
Batch    60  of    149.    Elapsed: 0:00:05.
Batch    70  of    149.    Elapsed: 0:00:06.
Batch    80  of    149.    Elapsed: 0:00:07.
Batch    90  of    149.    Elapsed: 0:00:08.
Batch   100  of    149.    Elapsed: 0:00:09.
Batch   110  of    149.    Elapsed: 0:00:09.
Batch   120  of    149.    Elapsed: 0:00:10.
Batch   130  of    149.    Elapsed: 0:00:11.
Batch   140  of    149.    Elapsed: 0:00:12.
Average training loss: 0.18
Training epoch took: 0:00:13
======== Epoch 4 / 8 ========
Batch    10  of    149.    Elapsed: 0:00:01.
Batch    20  of    149.    Elapsed: 0:00:02.
Batch    30  of    149.    Elapsed: 0:00:03.
Batch    40  of    149.    Elapsed: 0:00:04.
Batch    50  of    149.    Elapsed: 0:00:05.
Batch    60  of    149.    Elapsed: 0:00:05.
Batch    70  of    149.    Elapsed: 0:00:06.
Batch    80  of    149.    Elapsed: 0:00:07.
Batch    90  of    149.    Elapsed: 0:00:08.
Batch   100  of    149.    Elapsed: 0:00:09.
Batch   110  of    149.    Elapsed: 0:00:10.
Batch   120  of    149.    Elapsed: 0:00:11.
Batch   130  of    149.    Elapsed: 0:00:12.
Batch   140  of    149.    Elapsed: 0:00:12.
Average training loss: 0.09
Training epoch took: 0:00:13
======== Epoch 5 / 8 ========
Batch    10  of    149.    Elapsed: 0:00:01.
Batch    20  of    149.    Elapsed: 0:00:02.
Batch    30  of    149.    Elapsed: 0:00:03.
Batch    40  of    149.    Elapsed: 0:00:03.
Batch    50  of    149.    Elapsed: 0:00:04.
Batch    60  of    149.    Elapsed: 0:00:05.
Batch    70  of    149.    Elapsed: 0:00:06.
Batch    80  of    149.    Elapsed: 0:00:07.
Batch    90  of    149.    Elapsed: 0:00:08.
Batch   100  of    149.    Elapsed: 0:00:09.
Batch   110  of    149.    Elapsed: 0:00:10.
Batch   120  of    149.    Elapsed: 0:00:11.
Batch   130  of    149.    Elapsed: 0:00:12.
Batch   140  of    149.    Elapsed: 0:00:13.
Average training loss: 0.05
Training epoch took: 0:00:13
======== Epoch 6 / 8 ========
Batch    10  of    149.    Elapsed: 0:00:01.
Batch    20  of    149.    Elapsed: 0:00:02.
Batch    30  of    149.    Elapsed: 0:00:03.
Batch    40  of    149.    Elapsed: 0:00:03.
Batch    50  of    149.    Elapsed: 0:00:04.
Batch    60  of    149.    Elapsed: 0:00:05.
Batch    70  of    149.    Elapsed: 0:00:06.
Batch    80  of    149.    Elapsed: 0:00:07.
Batch    90  of    149.    Elapsed: 0:00:08.
Batch   100  of    149.    Elapsed: 0:00:09.
Batch   110  of    149.    Elapsed: 0:00:10.
Batch   120  of    149.    Elapsed: 0:00:11.
Batch   130  of    149.    Elapsed: 0:00:11.
Batch   140  of    149.    Elapsed: 0:00:12.
Average training loss: 0.01
Training epoch took: 0:00:13
======== Epoch 7 / 8 ========
Batch    10  of    149.    Elapsed: 0:00:01.
Batch    20  of    149.    Elapsed: 0:00:02.
Batch    30  of    149.    Elapsed: 0:00:03.
Batch    40  of    149.    Elapsed: 0:00:04.
Batch    50  of    149.    Elapsed: 0:00:05.
Batch    60  of    149.    Elapsed: 0:00:06.
Batch    70  of    149.    Elapsed: 0:00:07.
Batch    80  of    149.    Elapsed: 0:00:08.
Batch    90  of    149.    Elapsed: 0:00:09.
Batch   100  of    149.    Elapsed: 0:00:09.
Batch   110  of    149.    Elapsed: 0:00:10.
Batch   120  of    149.    Elapsed: 0:00:11.
Batch   130  of    149.    Elapsed: 0:00:12.
Batch   140  of    149.    Elapsed: 0:00:13.
Average training loss: 0.01
Training epoch took: 0:00:14
======== Epoch 8 / 8 ========
Batch    10  of    149.    Elapsed: 0:00:01.
Batch    20  of    149.    Elapsed: 0:00:02.
Batch    30  of    149.    Elapsed: 0:00:03.
Batch    40  of    149.    Elapsed: 0:00:03.
Batch    50  of    149.    Elapsed: 0:00:04.
Batch    60  of    149.    Elapsed: 0:00:05.
Batch    70  of    149.    Elapsed: 0:00:06.
Batch    80  of    149.    Elapsed: 0:00:07.
Batch    90  of    149.    Elapsed: 0:00:08.
Batch   100  of    149.    Elapsed: 0:00:09.
Batch   110  of    149.    Elapsed: 0:00:09.
Batch   120  of    149.    Elapsed: 0:00:10.
Batch   130  of    149.    Elapsed: 0:00:11.
Batch   140  of    149.    Elapsed: 0:00:12.
Average training loss: 0.00
Training epoch took: 0:00:13
Training completed in 0:01:45 (h:mm:ss)
```


* ## Burada model oluşmuş oluyor. model save işlemi yapmamız ve sonra bir text gönderip ne etiket bastıgına bakmamız gerekiyor.


In [55]:
tokenizer.save_pretrained("./bigscience_t0_tokenizer")
model.save_pretrained("./bigscience_t0_model")

In [56]:
df_stats = pd.DataFrame(data=training_stats)
plt.plot(df_stats['Training Loss'], label="Training")
plt.title("Training Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.xticks([1, 2, 3, 4])
plt.show()

In [57]:
test_texts = test.text.values
test_labels = test.label.values

input_ids = []
attention_masks = []

for text in test_texts:
    encoded_dict = tokenizer.encode_plus(
                        text,                     
                        add_special_tokens = True, 
                        max_length = max_len,          
                        pad_to_max_length = True,
                        return_attention_mask = True,  
                        return_tensors = 'pt',   
                   )
    
    input_ids.append(encoded_dict['input_ids'])
    attention_masks.append(encoded_dict['attention_mask'])

input_ids = torch.cat(input_ids, dim=0)
attention_masks = torch.cat(attention_masks, dim=0)
labels = torch.tensor(test_labels)

batch_size = 32  

prediction_data = TensorDataset(input_ids, attention_masks, labels)
prediction_sampler = SequentialSampler(prediction_data)
prediction_dataloader = DataLoader(prediction_data, sampler=prediction_sampler, batch_size=batch_size)

In [58]:
print('Prediction started on test data')
model.eval()
predictions , true_labels = [], []

for batch in prediction_dataloader:
  batch = tuple(t.to(device) for t in batch)
  b_input_ids, b_input_mask, b_labels = batch

  with torch.no_grad():
      outputs = model(b_input_ids, token_type_ids=None, 
                      attention_mask=b_input_mask)

  logits = outputs[0]
  logits = logits.detach().cpu().numpy()
  label_ids = b_labels.to('cpu').numpy()
  
  predictions.append(logits)
  true_labels.append(label_ids)

print('Prediction completed')

```
Prediction started on test data
Prediction completed
```

In [59]:
prediction_set = []

for i in range(len(true_labels)):
  pred_labels_i = np.argmax(predictions[i], axis=1).flatten()
  prediction_set.append(pred_labels_i)

prediction_scores = [item for sublist in prediction_set for item in sublist]

In [60]:
f_score = f1_score(test_labels, prediction_scores, average='macro')
precision = precision_score(test_labels, prediction_scores, average='macro')
recall = recall_score(test_labels, prediction_scores, average='macro')
accr = accuracy_score(test_labels, prediction_scores)

In [61]:
print("F-Score: ", f_score)
print("Recall: ", recall)
print("Precision: ", precision)
print("Accuracy: ", accr)


```
F-Score:  0.8684970081779455
Recall:  0.866681111547183
Precision:  0.8704137206600524
Accuracy:  0.8710629921259843
```

In [62]:
report = pd.DataFrame(classification_report(test_labels, prediction_scores, output_dict=True))

In [63]:
report = report.rename(columns={
                                '0':'Cinsiyetçilik',
                                '1':'Irkçılık',
                                '2':'Kızdırma',
                                '3':'Nötr'})
report

## Bundan alt kısım veri çoğaltma çabalarım... 

In [64]:
"""df = pd.read_csv('../input/nane-limon-son/clean_data (2) - clean_data (2).csv')
df = df.sample(10)
df.head(10)
df.info()

!pip install nlpaug
import nlpaug.augmenter.word as naw


def augmentation(text):
    aug = naw.ContextualWordEmbsAug(
        model_path='dbmdz/bert-base-turkish-128k-uncased', action="insert", top_k=0)
    augmented_text = aug.augment(text)
    return augmented_text


df['augmentation'] = df.clean_data.apply(augmentation)
df.to_csv('augmentation.csv')"""

In [65]:
print(df.head(10))

```

   scraped_id                                               text  \
0           1       -185 altı kendine erkeğim demesin diyen kasa   
1           2  "feminen bir erkek neden olamıyorsun, neden be...   
2           3  "Kızlar böyle tırnağı olmayan da kendine kızım...   
3           4  "Şık olmalı kadın dediğin.Gelisi, gülüşü, bakı...   
4           5  “ Çokta …de “ demek için erkek olmak isterdim....   
5           6  “ Şöyle olmayan kendine kadınım demesin böyle ...   
6           7  “Ben öldüğüm zaman, beni gece gömün ki ! Namah...   
7           8  *Sakallı lavuk trans kız olduğunu iddia eden t...   
8           9  *Twittera girerim*\nEğer benim gibi değilseniz...   
9          10  #kücükköy Kadınlar  gibi  takdir  değil, her  ...   

                         tagger                 tagged_date  label  \
0  model_tarafından_etiketlendi  2022-07-31 22:11:42.000000      0   
1  model_tarafından_etiketlendi  2022-07-31 22:11:47.000000      0   
2  model_tarafından_etiketlendi  2022-07-31 22:11:49.000000      0   
3  model_tarafından_etiketlendi  2022-07-31 22:11:51.000000      0   
4  model_tarafından_etiketlendi  2022-07-31 22:11:53.000000      0   
5  model_tarafından_etiketlendi  2022-07-31 22:11:55.000000      0   
6  model_tarafından_etiketlendi  2022-07-31 22:11:57.000000      0   
7  model_tarafından_etiketlendi  2022-07-31 22:11:59.000000      0   
8  model_tarafından_etiketlendi  2022-07-31 22:12:01.000000      0   
9  model_tarafından_etiketlendi  2022-07-31 22:12:03.000000      0   

                                          clean_data  
0            alti kendine erkegim demesin diyen kasa  
1  feminen bir erkek neden olamiyorsun neden ben ...  
2  kizlar boyle tirnagi olmayan da kendine kizim ...  
3  sik olmali kadin dedigingelisi gulusu bakisi d...  
4   cokta de demek icin erkek olmak isterdim cok ...  
5   soyle olmayan kendine kadinim demesin boyle o...  
6  ben oldugum zaman beni gece gomun ki namahrem ...  
7  sakalli lavuk trans kiz oldugunu iddia eden tw...  
8  twittera girerim eger benim gibi degilseniz ba...  
9      kadinlar gibi takdir degil her deprenis anlar  
```

In [66]:

from transformers import BertTokenizer,BertTokenizerFast, TFBertForSequenceClassification, BertConfig, TFBertModel
config = BertConfig.from_json_file("./bigscience_t0_model/config.json")
model_path = "./bigscience_t0_model"
tokenizer_path = "./bigscience_t0_tokenizer"
#model = TFBertModel.from_pretrained(model_path, from_pt=True, config=config)
model = TFBertForSequenceClassification.from_pretrained(model_path, from_pt=True) # modify labels as needed.
"""
 id2label={'0':'Cinsiyetçilik',
                                                                                   '1':'Irkçılık',
                                                                                   '2':'Kızdırma',
                                                                                   '3':'Nötr'}
"""

```
2022-08-16 20:39:09.086630: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-16 20:39:09.087439: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-16 20:39:09.087989: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-16 20:39:09.089037: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-16 20:39:09.089599: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-16 20:39:09.090225: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-16 20:39:09.090982: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-16 20:39:09.091566: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-16 20:39:09.092111: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15047 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0
2022-08-16 20:39:09.130184: I tensorflow/stream_executor/cuda/cuda_driver.cc:732] failed to allocate 14.69G (15778709504 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
2022-08-16 20:39:09.132511: I tensorflow/stream_executor/cuda/cuda_driver.cc:732] failed to allocate 13.22G (14200838144 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
2022-08-16 20:39:09.134833: I tensorflow/stream_executor/cuda/cuda_driver.cc:732] failed to allocate 11.90G (12780753920 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
2022-08-16 20:39:09.137573: I tensorflow/stream_executor/cuda/cuda_driver.cc:732] failed to allocate 10.71G (11502678016 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertForSequenceClassification: ['bert.embeddings.position_ids']
- This IS expected if you are initializing TFBertForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertForSequenceClassification were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertForSequenceClassification for predictions without further training.
```

In [67]:
from transformers import TextClassificationPipeline

text = ["Selam herkese bugün güzel bir gün",
        "Aptal zihniyetinizde bir Yunan yatıyor",
        "Akşam halısahaya giderken karısından izin alanda kendine erkeğim demesin!",
        "kör olası çöpçüler aşkımı süpürmüşler",
        "sınıfımdaki deve hörgüçleri",
       "bugün de ölmedik",
       'seninle iyi anlaştık',
       'seni sevmek ümitli şey ama artık ümit yetmiyor bana',
       'Selam sen hariç piç',
       'bana bak kadın']

pipe = TextClassificationPipeline(model=model, tokenizer=tokenizer)



In [68]:
[print(f"{text[index]} - {i['label']}") for index, i in enumerate(pipe(text))]

```
Selam herkese bugün güzel bir gün - LABEL_3
Aptal zihniyetinizde bir Yunan yatıyor - LABEL_1
Akşam halısahaya giderken karısından izin alanda kendine erkeğim demesin! - LABEL_0
kör olası çöpçüler aşkımı süpürmüşler - LABEL_2
sınıfımdaki deve hörgüçleri - LABEL_2
bugün de ölmedik - LABEL_3
akilli erdogan! - LABEL_2
Sen çok adi piç bir insansın diyebilirdim lakin doğru olmazdı - LABEL_2
seninle iyi anlaştık - LABEL_3
seni sevmek ümitli şey ama artık ümit yetmiyor bana - LABEL_3
Selam sen hariç piç - LABEL_2
bana bak kadın - LABEL_3
```