In [14]:
# Mount the Google Drve for getting dataset

from google.colab import drive
drive.mount('/content/drive')

# Change directory to the project directory

import os
os.chdir('/content/drive/MyDrive/turkish-tsa/')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [15]:
#Install the Hugging Face Transformers library:

!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [16]:
# Import the necessary modules:

import torch
import numpy as np
from transformers import BertTokenizerFast, BertForSequenceClassification

In [17]:
epochs = [6,9,12,15,18]

In [18]:
baseline_models = {}
baseline_tokenizers = {}
t_bert_models = {}
t_bert_tokenizers = {}
t_bert_marked_models = {}
t_bert_marked_tokenizers = {}

In [19]:
for epoch in epochs:
    baseline_model = BertForSequenceClassification.from_pretrained(f"./models/baseline/epoch{epoch}/model")
    baseline_tokenizer = BertTokenizerFast.from_pretrained(f"./models/baseline/epoch{epoch}/tokenizer")
    baseline_models[epoch] = baseline_model
    baseline_tokenizers[epoch] = baseline_tokenizer

In [20]:
for epoch in epochs:
    t_bert = BertForSequenceClassification.from_pretrained(f"./models/t-bert/epoch{epoch}/model")
    t_bert_tokenizer = BertTokenizerFast.from_pretrained(f"./models/t-bert/epoch{epoch}/tokenizer")
    t_bert_models[epoch] = t_bert
    t_bert_tokenizers[epoch] = t_bert_tokenizer

In [21]:
for epoch in epochs:
    t_bert_marked = BertForSequenceClassification.from_pretrained(f"./models/t-bert_marked/epoch{epoch}/model")
    t_bert_marked_tokenizer = BertTokenizerFast.from_pretrained(f"./models/t-bert_marked/epoch{epoch}/tokenizer")
    t_bert_marked_models[epoch] = t_bert_marked
    t_bert_marked_tokenizers[epoch] = t_bert_marked_tokenizer

In [22]:
def predict_sentiment(text, model, tokenizer):
    model.eval()
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
    input_ids = inputs["input_ids"]
    attention_mask = inputs["attention_mask"]

    with torch.no_grad():
        outputs = model(input_ids, attention_mask=attention_mask)
        preds = torch.argmax(outputs.logits, dim=1)

    return preds.item()

In [23]:
import re

def process_sentence(sentence, target_word):
    target_word = target_word.replace('I', 'ı').replace('İ', 'i').lower()

    # Convert Turkish letters to lowercase
    sentence = sentence.replace('I', 'ı').replace('İ', 'i').lower()

    # Remove newlines and replace with a single space
    sentence = re.sub(r'\s+', ' ', sentence)

    # Remove hashtags
    sentence = re.sub(r'#(\w+)', r'\1', sentence)

    # Remove mentions
    sentence = re.sub(r'@\w+', '', sentence)

    # Remove URLs
    sentence = re.sub(r'http\S+|www\S+', '', sentence)

    # Add [TAR] tokens around the target word
    sentence = re.sub(r'\b({0})\b'.format(re.escape(target_word)), r'[TAR] \1 [TAR]', sentence)

    # Add [CLS] token at the beginning of the sentence
    sentence = '[CLS] ' + sentence

    return sentence.strip()

In [85]:
trial_sentence = "E-ticaret sitelerinin maalesef sorunları var. Ama Trendyol iyi çalışıyor."  
target = "trendyol"
trial_sentence_tar = process_sentence(trial_sentence, target)

print(trial_sentence_tar)

sentiment_label_map = {0: "negative", 1: "neutral", 2: "positive"}

for epoch in epochs:

    print(f"\nEpoch : {epoch}")

    baseline_model = baseline_models[epoch]
    basleine_tokenizer = baseline_tokenizers[epoch]
    t_bert_model = t_bert_models[epoch]
    t_bert_tokenizer = t_bert_tokenizers[epoch]
    t_bert_marked_model = t_bert_marked_models[epoch]
    t_bert_marked_tokenizer = t_bert_marked_tokenizers[epoch]

    baseline_prediction = predict_sentiment(trial_sentence, baseline_model, baseline_tokenizer)
    t_bert_prediction = predict_sentiment(trial_sentence_tar, t_bert_model, t_bert_tokenizer)
    t_bert_marked_prediction = predict_sentiment(trial_sentence_tar, t_bert_marked_model, t_bert_marked_tokenizer)

    baseline_sentiment_label = sentiment_label_map[baseline_prediction]
    t_bert_sentiment_label = sentiment_label_map[t_bert_prediction]
    t_bert_marked_sentiment_label = sentiment_label_map[t_bert_marked_prediction]

    print(f"\tBaseline Model's predicted sentiment: {baseline_sentiment_label}")
    print(f"\tT-BERT's predicted sentiment: {t_bert_sentiment_label}")
    print(f"\tT-BERT_marked's predicted sentiment: {t_bert_marked_sentiment_label}")

[CLS] e-ticaret sitelerinin maalesef sorunları var. ama [TAR] trendyol [TAR] iyi çalışıyor.

Epoch : 6
	Baseline Model's predicted sentiment: neutral
	T-BERT's predicted sentiment: neutral
	T-BERT_marked's predicted sentiment: neutral

Epoch : 9
	Baseline Model's predicted sentiment: neutral
	T-BERT's predicted sentiment: neutral
	T-BERT_marked's predicted sentiment: neutral

Epoch : 12
	Baseline Model's predicted sentiment: neutral
	T-BERT's predicted sentiment: neutral
	T-BERT_marked's predicted sentiment: neutral

Epoch : 15
	Baseline Model's predicted sentiment: neutral
	T-BERT's predicted sentiment: neutral
	T-BERT_marked's predicted sentiment: neutral

Epoch : 18
	Baseline Model's predicted sentiment: neutral
	T-BERT's predicted sentiment: neutral
	T-BERT_marked's predicted sentiment: neutral


In [35]:
tweet = ""
target = "whatsapp"

text = process_sentence(tweet, target)

t_bert = BertForSequenceClassification.from_pretrained(f"./models/t-bert/epoch9/model")
t_bert_tokenizer = BertTokenizerFast.from_pretrained(f"./models/t-bert/epoch9/tokenizer")

t_bert_prediction = predict_sentiment(text, t_bert_model, t_bert_tokenizer)
t_bert_sentiment_label = sentiment_label_map[t_bert_prediction]

print(t_bert_sentiment_label)

neutral
