### Sinan Parmar

In this notebook we will train a BERT model from scratch on Turkish Language and implement a fine-tuning task on sentiment analysis with this pretrained model.

# 1 - BERTAN: Pretraining of BERT on Turkish Corpora

## 1.1 - Data

### 1.1.1 - Loading the data

In [1]:
#We are using the OSCAR dataset which is a combination of web crawls. 
#This dataset is availabe for donwload using the dataset package

from datasets import load_dataset

In [2]:
#Download the dataset.

dataset = load_dataset('oscar', 'unshuffled_deduplicated_tr')

Found cached dataset oscar (/Users/mrp3anut/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_tr/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2)


  0%|          | 0/1 [00:00<?, ?it/s]

In [4]:
dataset

DatasetDict({
    train: Dataset({
        features: ['id', 'text'],
        num_rows: 11596446
    })
})

In [3]:
example_text = dataset["train"][0]["text"]

In [4]:
example_text[:200]

'Son yıllarda görülen ay tutulmalarına göre daha etkili olacağı söylenen Kanlı veya Kırmızı Ay Tutulmasına saatler kaldı. Bu akşam (27 Temmuz 2018) gerçekleşecek olan bu tutulmayı ülkemizin her yerinde'

In [5]:
#Importing tqdm for visuals.

from tqdm.auto import tqdm

text_data = []

#Filling text_data to a list by using new line seprator to seperate list elements.
n_samples = 0

for sample in tqdm(dataset['train']):
    sample = sample['text'].replace('\n', '')
    text_data.append(sample)
    if n_samples==len(dataset["train"])//100:
        break
    else:
        n_samples+=1

  0%|          | 0/11596446 [00:00<?, ?it/s]

### 1.1.2 - Preprocess

In [6]:
#Divide data into train, test and validation splits.

from sklearn.model_selection import train_test_split

text_data_train, text_data_testval = train_test_split(text_data, test_size=0.2, random_state=42, shuffle=True)
text_data_test, text_data_val = train_test_split(text_data_testval, test_size=0.1, random_state=10, shuffle=True) 

In [7]:
print("Train size: {}".format(len(text_data_train)))
print("Test size: {}".format(len(text_data_test)))
print("Val size: {}".format(len(text_data_val)))

Train size: 92772
Test size: 20873
Val size: 2320


In [8]:
#Write data to text files to save for later use and tokenizer training.

with open(f'./data/text_micro_train.txt', 'w', encoding='utf-8') as fp:
    fp.write('\n'.join(text_data_train))
    
with open(f'./data/text_micro_test.txt', 'w', encoding='utf-8') as fp:
    fp.write('\n'.join(text_data_test))
    
with open(f'./data/text_micro_val.txt', 'w', encoding='utf-8') as fp:
    fp.write('\n'.join(text_data_val))

In [9]:
#Train tokenizer for vocab-token mapping.

import tokenizers
 
bwpt = tokenizers.BertWordPieceTokenizer()

In [10]:
#import glob to get the filepaths of the text files.
from glob import glob

filepaths = glob("./data/*")

In [11]:
#Train a tokenizer to tokenize and words/word-pairs.

bwpt.train(
    files=filepaths,
    vocab_size=50000,
    min_frequency=3,
    limit_alphabet=1000
)






In [12]:
bwpt.save_model("BERTANmicro")

['BERTANmicro/vocab.txt']

In [13]:
#Import BertTokenizerFast and LineByLineTextDataset to read the data by using our trained tokenizer.
from transformers import BertTokenizerFast, LineByLineTextDataset

In [14]:
tokenizer = BertTokenizerFast.from_pretrained("./BERTANmicro")

In [15]:
#Create training and validation datasets using the trained tokenizer.

In [16]:
val_dataset = LineByLineTextDataset(
    tokenizer = tokenizer,
    file_path = './data/text_micro_val.txt',
    block_size = 512  # maximum sequence length
)

train_dataset = LineByLineTextDataset(
    tokenizer = tokenizer,
    file_path = './data/text_micro_train.txt',
    block_size = 512  # maximum sequence length
)



## 1.2 - Model config and training

In [17]:
#Create a BERT config model

In [18]:
from transformers import BertConfig, BertForMaskedLM, DataCollatorForLanguageModeling

config = BertConfig(
    vocab_size=50000,
    hidden_size=128, 
    num_hidden_layers=2, 
    num_attention_heads=4,
    max_position_embeddings=512
)
 
model = BertForMaskedLM(config)
print('No of parameters: ', model.num_parameters())


data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer, mlm=True, mlm_probability=0.15
)

No of parameters:  8245200


In [19]:
from transformers import Trainer, TrainingArguments
from transformers import EarlyStoppingCallback

#Enter training arguments, trainer and define callback for early stopping.

training_args = TrainingArguments(
    output_dir='./BERTANmicro/',
    overwrite_output_dir=True,
    num_train_epochs=10,
    logging_steps =10000,
    save_steps=10000,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    prediction_loss_only=True,
    save_total_limit=3,
    evaluation_strategy="steps",
    metric_for_best_model = 'eval_loss',
    load_best_model_at_end = True,
    use_mps_device = True
)



trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    callbacks = [EarlyStoppingCallback(early_stopping_patience=3)]
)

In [None]:
#Train
trainer.train()

## 1.3 - Evaluation

In [13]:
from transformers import TrainingArguments, Trainer, BertForMaskedLM, EarlyStoppingCallback
import torch

#Empty GPU memmory.

torch.cuda.empty_cache()

#Load BERTAN

model = BertForMaskedLM.from_pretrained("./BERTAN/checkpoint-370000/")

In [26]:
test_dataset = LineByLineTextDataset(
    tokenizer = tokenizer,
    file_path = './data/text_micro_test.txt',
    block_size = 512  # maximum sequence length
)

Creating features from dataset file at ./data/text_micro_test.txt


In [27]:
from transformers import Trainer, TrainingArguments
from transformers import EarlyStoppingCallback

#Enter evaluation arguments, trainer and define callback for early stopping.

training_args = TrainingArguments(
    output_dir='./BERTANmicro/',
    overwrite_output_dir=True,
    per_device_eval_batch_size=8,
    use_mps_device = True
)


trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    eval_dataset=test_dataset
)

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [None]:
#Evaluate
model_eval = trainer.evaluate()

# 2 - Fine-tuning BERTAN for Turkish Sentiment Classification on Product Reviews

## 2.1 - Data

### 2.1.1 - Loading the data

In [3]:
# Again we use a dataset from HugginFace's datasets
from datasets import load_dataset

tsentiment = load_dataset("winvoker/turkish-sentiment-analysis-dataset")

Using custom data configuration winvoker--turkish-sentiment-analysis-dataset-d9120c0cfbe0af48
Found cached dataset csv (/Users/mrp3anut/.cache/huggingface/datasets/winvoker___csv/winvoker--turkish-sentiment-analysis-dataset-d9120c0cfbe0af48/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317)


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/2 [00:00<?, ?it/s]

In [4]:
tsentiment["train"][0]

{'text': 'ürünü hepsiburadadan alalı 3 hafta oldu. orjinal ve eksiksiz şekilde geldi. şarj konusunda 1 günü rahat çıkarıyor oyun oynamama rağmen. teslimat sürecide hızlı gerçekleşti. en uygun fiyata iphone kalitesi kaçırmayın..',
 'label': 'Positive',
 'dataset': 'urun_yorumlari'}

### 2.1.2 - Preprocess

In [5]:
#split our data into train,validation and test splits

train_val_dataset = tsentiment["train"].train_test_split(test_size=0.01, seed=54)


train_dataset = train_val_dataset["train"].shuffle(seed=2)
val_dataset = train_val_dataset["test"].shuffle(seed=2)
test_dataset = tsentiment["test"].shuffle(seed=2)

Loading cached split indices for dataset at /Users/mrp3anut/.cache/huggingface/datasets/winvoker___csv/winvoker--turkish-sentiment-analysis-dataset-d9120c0cfbe0af48/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-05a0471dcb2f7a30.arrow and /Users/mrp3anut/.cache/huggingface/datasets/winvoker___csv/winvoker--turkish-sentiment-analysis-dataset-d9120c0cfbe0af48/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-f1f9b112779e29e9.arrow
Loading cached shuffled indices for dataset at /Users/mrp3anut/.cache/huggingface/datasets/winvoker___csv/winvoker--turkish-sentiment-analysis-dataset-d9120c0cfbe0af48/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-d9c24b5afa7fbe0a.arrow
Loading cached shuffled indices for dataset at /Users/mrp3anut/.cache/huggingface/datasets/winvoker___csv/winvoker--turkish-sentiment-analysis-dataset-d9120c0cfbe0af48/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cac

In [6]:
#Turn labels to integers

train_label = []
for label in train_dataset["label"]:
    if label=="Negative":
        train_label.append(0)
        
    if label=="Notr":
        train_label.append(1)
        
    if label=="Positive":
        train_label.append(2)

test_label = []
for label in test_dataset["label"]:
    if label=="Negative":
        test_label.append(0)
        
    if label=="Notr":
        test_label.append(1)
        
    if label=="Positive":
        test_label.append(2)

val_label = []
for label in val_dataset["label"]:
    if label=="Negative":
        val_label.append(0)
        
    if label=="Notr":
        val_label.append(1)
        
    if label=="Positive":
        val_label.append(2)

In [7]:
#Replace text labels with text and remove some columns

train_dataset = train_dataset.remove_columns(["label","dataset"])
val_dataset = val_dataset.remove_columns(["label","dataset"])
test_dataset = test_dataset.remove_columns(["label","dataset"])

train_dataset = train_dataset.add_column("label",train_label)
val_dataset = val_dataset.add_column("label",val_label)
test_dataset = test_dataset.add_column("label",test_label)

Loading cached processed dataset at /Users/mrp3anut/.cache/huggingface/datasets/winvoker___csv/winvoker--turkish-sentiment-analysis-dataset-d9120c0cfbe0af48/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-1c9b7640a12338ff.arrow
Loading cached processed dataset at /Users/mrp3anut/.cache/huggingface/datasets/winvoker___csv/winvoker--turkish-sentiment-analysis-dataset-d9120c0cfbe0af48/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-2ab5c4a195f1699d.arrow
Loading cached processed dataset at /Users/mrp3anut/.cache/huggingface/datasets/winvoker___csv/winvoker--turkish-sentiment-analysis-dataset-d9120c0cfbe0af48/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-9ef29fb605cb25eb.arrow


In [8]:
#Import pre-trained BERTAN tokenizer.

from transformers import BertTokenizerFast
tokenizer = BertTokenizerFast.from_pretrained("./BERTAN/")

In [9]:
#Create as preprocess function to process our data.
#As our data is small we won't be using LineByLineText so we need to tokenize beforehand.

def preprocess_function(examples):
    return tokenizer(examples["text"], truncation=True, max_length=512)

In [10]:
#Tokenize the datasets

tokenized_train = train_dataset.map(preprocess_function, batched=True)
tokenized_test = test_dataset.map(preprocess_function, batched=True)
tokenized_val = val_dataset.map(preprocess_function, batched=True)

#A little bit more cleaning...
tokenized_train = tokenized_train.remove_columns(["text"])
tokenized_test = tokenized_test.remove_columns(["text"])
tokenized_val = tokenized_val.remove_columns(["text"])

Loading cached processed dataset at /Users/mrp3anut/.cache/huggingface/datasets/winvoker___csv/winvoker--turkish-sentiment-analysis-dataset-d9120c0cfbe0af48/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-312c9f1e7770cfc2.arrow
Loading cached processed dataset at /Users/mrp3anut/.cache/huggingface/datasets/winvoker___csv/winvoker--turkish-sentiment-analysis-dataset-d9120c0cfbe0af48/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-3cb885b9f1c2beb7.arrow
Loading cached processed dataset at /Users/mrp3anut/.cache/huggingface/datasets/winvoker___csv/winvoker--turkish-sentiment-analysis-dataset-d9120c0cfbe0af48/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-5c2f01bad9d9733f.arrow


In [11]:
tokenized_train["input_ids"][0]

[2,
 1583,
 9168,
 32456,
 1585,
 57,
 1845,
 10831,
 1566,
 3119,
 13065,
 2018,
 18,
 18,
 18,
 9918,
 6954,
 8267,
 6792,
 1911,
 1710,
 8548,
 31453,
 46127,
 1821,
 1870,
 20423,
 16847,
 2982,
 2335,
 31,
 2561,
 1655,
 6853,
 4653,
 18,
 18,
 18,
 2335,
 5521,
 2345,
 17867,
 2569,
 1785,
 1974,
 18,
 10755,
 1566,
 6794,
 7860,
 27250,
 21792,
 12234,
 31,
 13,
 3]

## 2.2 - Model config and training

In [12]:
#Import the data loader for the model and define compute metrics for training.

from transformers import DataCollatorWithPadding
import numpy as np
import evaluate

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [15]:
from transformers import TrainingArguments, Trainer, BertForSequenceClassification, EarlyStoppingCallback
import torch

#Empty GPU memmory.

torch.cuda.empty_cache()

#Load BERTAN

model = BertForSequenceClassification.from_pretrained("./BERTAN/checkpoint-370000/", num_labels=3)

#Define training arguments and callbacks
 
training_args = TrainingArguments(
    output_dir="./tsentiment_BERTAN/",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=2,
    weight_decay=0.01,
    save_strategy="steps",
    save_steps=1000,
    eval_steps=500,
    evaluation_strategy="steps",
    logging_steps=500,
    save_total_limit=5,
    prediction_loss_only=False,
    metric_for_best_model = 'accuracy',
    load_best_model_at_end = True,
    use_mps_device=True
)

#Train

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
)

loading configuration file ./BERTAN/checkpoint-370000/config.json
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.24.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 50000
}

loading weights file ./BERTAN/checkpoint-370000/pytorch_model.bin
Some weights of the model checkpoint at ./BERTAN/checkpoint-370000/ were not used when initializing B

In [16]:
#Train
trainer.train()

***** Running training *****
  Num examples = 436272
  Num Epochs = 2
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 109068
  Number of trainable parameters = 81916419


Step,Training Loss,Validation Loss


KeyboardInterrupt: 

## 2.3 - Evalution

In [1]:
from transformers import TrainingArguments, Trainer, BertForSequenceClassification
import torch

#Empty GPU memmory.

torch.cuda.empty_cache()

#Load BERTAN
model = BertForSequenceClassification.from_pretrained("./tsentiment_BERTAN/checkpoint-11000/", num_labels=3)
    
#Enter evaluation arguments, trainer and define callback for early stopping.

training_args = TrainingArguments(
    output_dir="./tsentiment_BERTAN/",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    use_mps_device=True
)
 
trainer = Trainer(
    model=model,
    args=training_args,
    eval_dataset=tokenized_test,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics
)

NameError: name 'tokenized_test' is not defined

In [44]:
#Evaluate
trainer.evaluate()

***** Running Evaluation *****
  Num examples = 48965
  Batch size = 8


{'eval_loss': 0.09338823705911636,
 'eval_accuracy': 0.9683651587868886,
 'eval_runtime': 859.4299,
 'eval_samples_per_second': 56.974,
 'eval_steps_per_second': 7.122}

In [45]:
pred = trainer.predict(tokenized_test)

***** Running Prediction *****
  Num examples = 48965
  Batch size = 8


In [59]:
f1_metric = evaluate.load("f1")
f1 = f1_metric.compute(predictions=pred[0].argmax(-1), references=tokenized_test["label"], average="weighted")

In [60]:
f1

{'f1': 0.9680109183650836}

In [61]:
roc_auc = evaluate.load("roc_auc","multilabel")


ref = np.zeros((len(tokenized_test["label"]), max(tokenized_test["label"]) + 1))
ref[np.arange(len(tokenized_test["label"])), tokenized_test["label"]] = 1

roc_auc = roc_auc.compute(prediction_scores=pred[0], references=ref)

In [62]:
roc_auc

{'roc_auc': 0.9922663277468269}

# 3 - Hands-on with both models: MLM with BERTAN and Sentiment Classification with duygusalBERTAN

# 3.1 Masked Language Modeling with BERTAN

In [1]:
from transformers import BertForMaskedLM, BertTokenizerFast, pipeline

#Load model, tokenizer and setup mask filling pipeline

model = BertForMaskedLM.from_pretrained("./BERTAN/checkpoint-370000/")

tokenizer = BertTokenizerFast.from_pretrained("./BERTAN/")

fill_mask = pipeline(
    "fill-mask",
    model=model,
    tokenizer=tokenizer
)

In [2]:
fill_mask("Bugün eve giderken yere düştüm ve canım [MASK].")

[{'score': 0.19067342579364777,
  'token': 25731,
  'token_str': 'yandı',
  'sequence': 'bugun eve giderken yere dustum ve canım yandı.'},
 {'score': 0.1750452071428299,
  'token': 6748,
  'token_str': 'cekti',
  'sequence': 'bugun eve giderken yere dustum ve canım cekti.'},
 {'score': 0.09005220234394073,
  'token': 26688,
  'token_str': 'yanıyor',
  'sequence': 'bugun eve giderken yere dustum ve canım yanıyor.'},
 {'score': 0.07144200801849365,
  'token': 42287,
  'token_str': 'sıkıldı',
  'sequence': 'bugun eve giderken yere dustum ve canım sıkıldı.'},
 {'score': 0.05144298076629639,
  'token': 21414,
  'token_str': 'sagolsun',
  'sequence': 'bugun eve giderken yere dustum ve canım sagolsun.'}]

In [5]:
fill_mask("Akşam namazı geç saatte Nafi Baba [MASK] kılınıdı.")

[{'score': 0.9608170390129089,
  'token': 42646,
  'token_str': 'camiinde',
  'sequence': 'aksam namazı gec saatte nafi baba camiinde kılınıdı.'},
 {'score': 0.01153489574790001,
  'token': 7348,
  'token_str': 'evinde',
  'sequence': 'aksam namazı gec saatte nafi baba evinde kılınıdı.'},
 {'score': 0.003500595223158598,
  'token': 2001,
  'token_str': 'tarafından',
  'sequence': 'aksam namazı gec saatte nafi baba tarafından kılınıdı.'},
 {'score': 0.002813145285472274,
  'token': 11090,
  'token_str': 'koyunde',
  'sequence': 'aksam namazı gec saatte nafi baba koyunde kılınıdı.'},
 {'score': 0.0022763388697057962,
  'token': 8669,
  'token_str': 'camii',
  'sequence': 'aksam namazı gec saatte nafi baba camii kılınıdı.'}]

In [3]:
fill_mask("Okuldaki en zor ders [MASK] dersi")

[{'score': 0.22464631497859955,
  'token': 6148,
  'token_str': 'matematik',
  'sequence': 'okuldaki en zor ders matematik dersi'},
 {'score': 0.0655607357621193,
  'token': 4126,
  'token_str': 'ingilizce',
  'sequence': 'okuldaki en zor ders ingilizce dersi'},
 {'score': 0.03280796855688095,
  'token': 24860,
  'token_str': 'geometri',
  'sequence': 'okuldaki en zor ders geometri dersi'},
 {'score': 0.023688795045018196,
  'token': 42131,
  'token_str': 'zili',
  'sequence': 'okuldaki en zor ders zili dersi'},
 {'score': 0.02036508359014988,
  'token': 4340,
  'token_str': 'fizik',
  'sequence': 'okuldaki en zor ders fizik dersi'}]

# 3.2 Sentiment Classification with duygusalBERTAN

In [4]:
from transformers import BertForSequenceClassification, BertTokenizerFast, pipeline

#Load model, tokenizer and setup mask filling pipeline

model = BertForSequenceClassification.from_pretrained("./tsentiment_BERTAN/checkpoint-13000/", num_labels=3)

tokenizer = BertTokenizerFast.from_pretrained("./BERTAN/")

text_classify = pipeline(
    "text-classification",
    model=model,
    tokenizer=tokenizer
)

In [5]:
text_classify("Ne kadar kötü bir telefon, Steve Jobs gelse yüzünüze tükürüdü.")

[{'label': 'NEGATIVE', 'score': 0.992276132106781}]

In [6]:
text_classify("Gençleştim resmen bu kadar mı farkeder?")

[{'label': 'POSITIVE', 'score': 0.9725152254104614}]

In [7]:
text_classify("Arjantin milli futbol takımının kaptanı Lionel Messi'dir.")

[{'label': 'NEUTRAL', 'score': 0.9999432563781738}]

In [8]:
text_classify("İnekler sulak ortamda otlanmayı sever")

[{'label': 'NEUTRAL', 'score': 0.9990272521972656}]