In [1]:
# Mount the Google Drve for getting dataset

from google.colab import drive
drive.mount('/content/drive')

# Change directory to the project directory

import os
os.chdir('/content/drive/MyDrive/turkish-tsa/')

Mounted at /content/drive


In [2]:
#Install the Hugging Face Transformers library:

!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.28.1-py3-none-any.whl (7.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m75.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.13.4-py3-none-any.whl (200 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m200.1/200.1 kB[0m [31m27.2 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m86.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.13.4 tokenizers-0.13.3 transformers-4.28.1


In [3]:
# Import the necessary modules:

import torch
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader, Dataset
from transformers import BertTokenizerFast, BertForSequenceClassification, AdamW, get_linear_schedule_with_warmup

In [4]:
# Loading the pretrained BERTurk tokenizer:

tokenizer = BertTokenizerFast.from_pretrained("dbmdz/bert-base-turkish-cased")

Downloading (…)okenizer_config.json:   0%|          | 0.00/60.0 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/251k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/385 [00:00<?, ?B/s]

In [5]:
# Defining a custom dataset class for sentient analysis:

class SentimentDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=128):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]

        encoding = self.tokenizer(text, add_special_tokens=False, max_length=self.max_length, truncation=True, padding='max_length')
        input_ids = encoding["input_ids"]
        attention_mask = encoding["attention_mask"]

        return {
            "input_ids": torch.tensor(input_ids, dtype=torch.long),
            "attention_mask": torch.tensor(attention_mask, dtype=torch.long),
            "label": torch.tensor(label, dtype=torch.long)
        }

In [6]:
# Create the train, validation, and test sets:
train_data = pd.read_csv("dataset/cls_dataset/cls_train_dataset.csv")
validation_data = pd.read_csv("dataset/cls_dataset/cls_validation_dataset.csv")
test_data = pd.read_csv("dataset/cls_dataset/cls_test_dataset.csv")

# Data whose sentence and targeted sentiment are different:
different_s_t_data = pd.read_csv("dataset/cls_dataset/cls_diff_s_t.csv")

same_s_t_data = pd.read_csv("dataset/cls_dataset/cls_same_s_t.csv")

positive_t_data = pd.read_csv("dataset/cls_dataset/cls_positive_t.csv")

neutral_t_data = pd.read_csv("dataset/cls_dataset/cls_neutral_t.csv")

negative_t_data = pd.read_csv("dataset/cls_dataset/cls_negative_t.csv")

positive_s_t_data = pd.read_csv("dataset/cls_dataset/cls_positive_s_t.csv")

neutral_s_t_data = pd.read_csv("dataset/cls_dataset/cls_neutral_s_t.csv")

negative_s_t_data = pd.read_csv("dataset/cls_dataset/cls_negative_s_t.csv")

train_data.head()

Unnamed: 0,Text,Tweet URL,Targeted Sentiment,Sentence Sentiment,Target
0,[CLS] tüm maaşı sana yatırıyorum bi hediye gön...,https://twitter.com/empalii/status/12341991646...,negative,negative,trendyol
1,[CLS] trendyol para iademi sonunda yapmış artı...,https://twitter.com/tosbaalice/status/12552710...,negative,positive,trendyol
2,[CLS] napıyorsun sen?? trendyol,https://twitter.com/aleynozhn/status/121720847...,negative,negative,trendyol
3,[CLS] kurumsal şirketlere hizmet verdiğimiz #t...,https://twitter.com/eitatli/status/12552572590...,neutral,neutral,turkcell
4,[CLS] trendyol’da külçe altına yapılan tavsiye...,https://twitter.com/eegns/status/1253457666669...,neutral,positive,trendyol


In [7]:
# For the Baseline model, training and validation will be done by using sentence
# sentiments, but test will be done by using targeted sentiments:

train_texts = train_data["Text"]
train_labels = train_data["Sentence Sentiment"]
val_texts = validation_data["Text"]
val_labels = validation_data["Sentence Sentiment"]

test_texts = test_data["Text"]
test_labels = test_data["Targeted Sentiment"]

different_s_t_texts = different_s_t_data["Text"]
different_s_t_labels = different_s_t_data["Targeted Sentiment"]

same_s_t_texts = same_s_t_data["Text"]
same_s_t_labels = same_s_t_data["Targeted Sentiment"]

positive_t_texts = positive_t_data["Text"]
positive_t_labels = positive_t_data["Targeted Sentiment"]

neutral_t_texts = neutral_t_data["Text"]
neutral_t_labels = neutral_t_data["Targeted Sentiment"]

negative_t_texts = negative_t_data["Text"]
negative_t_labels = negative_t_data["Targeted Sentiment"]

positive_s_t_texts = positive_s_t_data["Text"]
positive_s_t_labels = positive_s_t_data["Targeted Sentiment"]

neutral_s_t_texts = neutral_s_t_data["Text"]
neutral_s_t_labels = neutral_s_t_data["Targeted Sentiment"]

negative_s_t_texts = negative_s_t_data["Text"]
negative_s_t_labels = negative_s_t_data["Targeted Sentiment"]

In [8]:
# Convert string labels to numerical values
label_mapping = {"negative": 0, "neutral": 1, "positive": 2}
train_labels = [label_mapping[label] for label in train_labels]
val_labels = [label_mapping[label] for label in val_labels]
test_labels = [label_mapping[label] for label in test_labels]
different_s_t_labels = [label_mapping[label] for label in different_s_t_labels]
same_s_t_labels = [label_mapping[label] for label in same_s_t_labels]
positive_t_labels = [label_mapping[label] for label in positive_t_labels]
negative_t_labels = [label_mapping[label] for label in negative_t_labels]
neutral_t_labels = [label_mapping[label] for label in neutral_t_labels]
positive_s_t_labels = [label_mapping[label] for label in positive_s_t_labels]
negative_s_t_labels = [label_mapping[label] for label in negative_s_t_labels]
neutral_s_t_labels = [label_mapping[label] for label in neutral_s_t_labels]

# Create SentimentDataset instances
train_dataset = SentimentDataset(train_texts, train_labels, tokenizer)
val_dataset = SentimentDataset(val_texts, val_labels, tokenizer)
test_dataset = SentimentDataset(test_texts, test_labels, tokenizer)
different_s_t_dataset = SentimentDataset(different_s_t_texts, different_s_t_labels, tokenizer)
same_s_t_dataset = SentimentDataset(same_s_t_texts, same_s_t_labels, tokenizer)
positive_t_dataset = SentimentDataset(positive_t_texts, positive_t_labels, tokenizer)
neutral_t_dataset = SentimentDataset(neutral_t_texts, neutral_t_labels, tokenizer)
negative_t_dataset = SentimentDataset(negative_t_texts, negative_t_labels, tokenizer)
positive_s_t_dataset = SentimentDataset(positive_s_t_texts, positive_s_t_labels, tokenizer)
neutral_s_t_dataset = SentimentDataset(neutral_s_t_texts, neutral_s_t_labels, tokenizer)
negative_s_t_dataset = SentimentDataset(negative_s_t_texts, negative_s_t_labels, tokenizer)

# Create DataLoader instances
batch_size = 24
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
different_s_t_loader = DataLoader(different_s_t_dataset, batch_size=batch_size, shuffle=False)
same_s_t_loader = DataLoader(same_s_t_dataset, batch_size=batch_size, shuffle=False)
positive_t_loader = DataLoader(positive_t_dataset, batch_size=batch_size, shuffle=False)
neutral_t_loader = DataLoader(neutral_t_dataset, batch_size=batch_size, shuffle=False)
negative_t_loader = DataLoader(negative_t_dataset, batch_size=batch_size, shuffle=False)
positive_s_t_loader = DataLoader(positive_s_t_dataset, batch_size=batch_size, shuffle=False)
neutral_s_t_loader = DataLoader(neutral_s_t_dataset, batch_size=batch_size, shuffle=False)
negative_s_t_loader = DataLoader(negative_s_t_dataset, batch_size=batch_size, shuffle=False)

In [9]:
# Load the pre-trained  BERTurk model and prepare it for sequence classification:

num_labels = 3  # positive, negative, and neutral
model = BertForSequenceClassification.from_pretrained("dbmdz/bert-base-turkish-cased", num_labels=num_labels)

Downloading pytorch_model.bin:   0%|          | 0.00/445M [00:00<?, ?B/s]

Some weights of the model checkpoint at dbmdz/bert-base-turkish-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were 

In [10]:
# Set the device and move the model to the device:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

model = model.to(device)

Sun Apr 23 20:12:15 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   34C    P0    25W / 300W |      2MiB / 16384MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [11]:
# Get class counts for training dataset:

class_count_0 = train_labels.count(0)
class_count_1 = train_labels.count(1)
class_count_2 = train_labels.count(2)

def getKey(dict, value):
  return list(filter(lambda x: dict[x] == value, dict))[0]

print("Class counts:")
print(f"{getKey(label_mapping,0)}: {class_count_0}")
print(f"{getKey(label_mapping,1)}: {class_count_1}")
print(f"{getKey(label_mapping,2)}: {class_count_2}")

Class counts:
negative: 1298
neutral: 503
positive: 399


In [12]:
# Set the class weights and define the loss function:

from torch import nn
import torch.nn.functional as F

class_weights = torch.tensor([1 / class_count_0, 1 / class_count_1, 1 / class_count_2]).to(device)
loss_fn = nn.CrossEntropyLoss(weight=class_weights)

In [13]:
# Prepare the optimizer and learning rate scheduler:

epochs = 18
num_training_steps = epochs * len(train_loader)
lr = 1e-5
weight_decay = 0.1
warmup_steps = 300

optimizer = AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=warmup_steps, num_training_steps=num_training_steps)




In [14]:
# Define helper functions for training and evaluation:

def train_epoch(model, data_loader, loss_fn, optimizer, device, scheduler):
    model = model.train()
    total_train_loss = 0
    
    for batch in data_loader:
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["label"].to(device)

        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss

        total_train_loss += loss.item()

        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        scheduler.step()
        optimizer.zero_grad()

    return total_train_loss / len(data_loader)

def eval_epoch(model, data_loader, loss_fn, device):
    model = model.eval()
    total_eval_loss = 0

    with torch.no_grad():
        for batch in data_loader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["label"].to(device)

            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss

            total_eval_loss += loss.item()

    return total_eval_loss / len(data_loader)


In [15]:
# Calculate performance metrics:

from sklearn.metrics import precision_recall_fscore_support

def get_predictions(model, data_loader, device):
    model = model.eval()
    predictions = []
    true_labels = []

    with torch.no_grad():
        for batch in data_loader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["label"].to(device)

            outputs = model(input_ids, attention_mask=attention_mask)
            _, preds = torch.max(outputs.logits, dim=1)

            predictions.extend(preds.cpu().numpy().tolist())
            true_labels.extend(labels.cpu().numpy().tolist())

    return predictions, true_labels

def get_f1_scores(model, device):
    print("F1 Scores:")

    test_preds, test_labels = get_predictions(model, test_loader, device)
    test_f1 = precision_recall_fscore_support(test_labels, test_preds, average='weighted', zero_division=0)[2]
    print(f"\tTest dataset against targeted sentiment labels: {test_f1:.4f}")

    diff_s_t_preds, diff_s_t_labels = get_predictions(model, different_s_t_loader, device)
    diff_s_t_f1 = precision_recall_fscore_support(diff_s_t_labels, diff_s_t_preds, average='weighted', zero_division=0)[2]
    print(f"\tData whose targeted and sentence sentiment are different: {diff_s_t_f1:.4f}")

    same_s_t_preds, same_s_t_labels = get_predictions(model, same_s_t_loader, device)
    same_s_t_f1 = precision_recall_fscore_support(same_s_t_labels, same_s_t_preds, average='weighted', zero_division=0)[2]
    print(f"\tData whose targeted and sentence sentiment are the same: {same_s_t_f1:.4f}")

    positive_t_preds, positive_t_labels = get_predictions(model, positive_t_loader, device)
    positive_t_f1 = precision_recall_fscore_support(positive_t_labels, positive_t_preds, average='weighted', zero_division=0)[2]
    print(f"\tData whose targeted sentiment is 'positive': {positive_t_f1:.4f}")

    positive_s_t_preds, positive_t_labels = get_predictions(model, positive_s_t_loader, device)
    positive_s_t_f1 = precision_recall_fscore_support(positive_s_t_labels, positive_s_t_preds, average='weighted', zero_division=0)[2]
    print(f"\tData whose targeted sentiment and sentence sentiments are both 'positive': {positive_s_t_f1:.4f}")

    neutral_t_preds, neutral_t_labels = get_predictions(model, neutral_t_loader, device)
    neutral_t_f1 = precision_recall_fscore_support(neutral_t_labels, neutral_t_preds, average='weighted', zero_division=0)[2]
    print(f"\tData whose targeted sentiment is 'neutral': {neutral_t_f1:.4f}")

    neutral_s_t_preds, neutral_t_labels = get_predictions(model, neutral_s_t_loader, device)
    neutral_s_t_f1 = precision_recall_fscore_support(neutral_s_t_labels, neutral_s_t_preds, average='weighted', zero_division=0)[2]
    print(f"\tData whose targeted sentiment and sentence sentiments are both 'neutral': {neutral_s_t_f1:.4f}")

    negative_t_preds, negative_t_labels = get_predictions(model, negative_t_loader, device)
    negative_t_f1 = precision_recall_fscore_support(negative_t_labels, negative_t_preds, average='weighted', zero_division=0)[2]
    print(f"\tData whose targeted sentiment is 'negative': {negative_t_f1:.4f}")

    negative_s_t_preds, negative_t_labels = get_predictions(model, negative_s_t_loader, device)
    negative_s_t_f1 = precision_recall_fscore_support(negative_s_t_labels, negative_s_t_preds, average='weighted', zero_division=0)[2]
    print(f"\tData whose targeted sentiment and sentence sentiments are both 'negative': {negative_s_t_f1:.4f}")

    return test_f1, diff_s_t_f1, same_s_t_f1, positive_t_f1, positive_s_t_f1, neutral_t_f1, neutral_s_t_f1, negative_t_f1, negative_s_t_f1

In [16]:
import csv

f = open("./models/baseline/results.csv","w")
writer = csv.writer(f)
writer.writerow(["Epoch","Train loss", "Val. loss", "Test loss", "Test data F1",\
            "Diff. labels F1", "Same labels F1", "T=positive F1", "TS=positive F1",\
            "T=neutral F1", "TS=neutral F1", "T=negative F1", "TS=negative F1"])
# Train and evaluate the model:

for epoch in range(epochs):
    print("-" * 10)
    print(f"Epoch {epoch+1}/{epochs}")

    train_loss = train_epoch(model, train_loader, loss_fn, optimizer, device, scheduler)
    print(f"Train Loss: {train_loss:.4f}")

    val_loss = eval_epoch(model, val_loader, loss_fn, device)
    print(f"Validation Loss: {val_loss:.4f}")

    test_loss = "-"

    if (epoch+1) % 3 == 0:

        test_loss = eval_epoch(model, test_loader, loss_fn, device)
        print(f"Test Loss: {test_loss:.4f}")

        model.save_pretrained(f"./models/baseline/epoch{epoch+1}/model")
        tokenizer.save_pretrained(f"./models/baseline/epoch{epoch+1}/tokenizer")
        print("Saved.")

    test_f1, diff_s_t_f1, same_s_t_f1, positive_t_f1, positive_s_t_f1, \
    neutral_t_f1, neutral_s_t_f1, negative_t_f1, negative_s_t_f1 = get_f1_scores(model, device)

    writer.writerow([epoch+1,train_loss,val_loss,test_loss,test_f1, diff_s_t_f1, same_s_t_f1, positive_t_f1, \
               positive_s_t_f1, neutral_t_f1, neutral_s_t_f1, negative_t_f1, negative_s_t_f1])

f.close()

----------
Epoch 1/18
Train Loss: 1.0664
Validation Loss: 0.9248
F1 Scores:
	Test dataset against targeted sentiment labels: 0.3143
	Data whose targeted and sentence sentiment are different: 0.0411
	Data whose targeted and sentence sentiment are the same: 0.4394
	Data whose targeted sentiment is 'positive': 0.0055
	Data whose targeted sentiment and sentence sentiments are both 'positive': 0.0042
	Data whose targeted sentiment is 'neutral': 0.0202
	Data whose targeted sentiment and sentence sentiments are both 'neutral': 0.0228
	Data whose targeted sentiment is 'negative': 0.9975
	Data whose targeted sentiment and sentence sentiments are both 'negative': 0.9980
----------
Epoch 2/18
Train Loss: 0.9540
Validation Loss: 0.8949
F1 Scores:
	Test dataset against targeted sentiment labels: 0.3223
	Data whose targeted and sentence sentiment are different: 0.0648
	Data whose targeted and sentence sentiment are the same: 0.4542
	Data whose targeted sentiment is 'positive': 0.0110
	Data whose tar