In [36]:
import pandas as pd
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Installations and Dependencies

In [None]:
! pip uninstall -q transformers -y
! pip uninstall -q tensorflow -y
! pip uninstall -q tf_keras -y

In [None]:
! pip install -q tensorflow==2.15

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m475.2/475.2 MB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m50.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m46.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.5/5.5 MB[0m [31m64.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m442.0/442.0 kB[0m [31m26.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.9/77.9 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorstore 0.1.69 requires ml-dtypes>=0.3.1, but you have ml-dtypes 0.2.0 which is incompatible.[0m[3

In [None]:
! pip install -q tf_keras==2.15.1

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.7 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m66.7 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
! pip install transformers==4.37.2

Collecting transformers==4.37.2
  Downloading transformers-4.37.2-py3-none-any.whl.metadata (129 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/129.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.4/129.4 kB[0m [31m12.6 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers<0.19,>=0.14 (from transformers==4.37.2)
  Downloading tokenizers-0.15.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Downloading transformers-4.37.2-py3-none-any.whl (8.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.4/8.4 MB[0m [31m28.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading tokenizers-0.15.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.6/3.6 MB[0m [31m35.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tokenizers, transformers
  Attempting uninstall: tokenize

# Models

In [None]:
from transformers import TFDistilBertModel, DistilBertTokenizer
import tensorflow as tf
from tensorflow.keras import layers, models

In [None]:
def cls_model(base_model,
                          max_sequence_length=512,
                          hidden_size = 100,
                          dropout=0.3,
                          learning_rate=0.00005):
    """
    Build a simple classification model with BERT. Use the CLS Token output for classification purposes.
    """
    input_ids = tf.keras.layers.Input(shape=(max_sequence_length,), dtype=tf.int64, name='input_ids')
    attention_mask = tf.keras.layers.Input(shape=(max_sequence_length,), dtype=tf.int64, name='attention_mask')

    inputs = {'input_ids': input_ids,
                   'attention_mask': attention_mask}

    out = base_model(inputs)

    cls_token = out[0][:, 0, :]

    hidden = tf.keras.layers.Dense(hidden_size, activation='relu', name='hidden_layer')(cls_token)


    hidden = tf.keras.layers.Dropout(dropout)(hidden)


    classification = tf.keras.layers.Dense(1, activation='sigmoid',name='classification_layer')(hidden)

    classification_model = tf.keras.Model(inputs=[input_ids, attention_mask], outputs=[classification])

    classification_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                                 loss=tf.keras.losses.BinaryCrossentropy(from_logits=False),
                                 metrics='accuracy')

    return classification_model

In [None]:
test_pairs = pd.read_csv('drive/MyDrive/bias_test_med.csv')

In [None]:
import numpy as np
test_input = test_pairs['src_raw'].values.tolist()
test_output_wnc = test_pairs['tgt_raw'].values.tolist()

In [None]:
from transformers import TFDistilBertModel, DistilBertTokenizer
import tensorflow as tf
from tensorflow.keras import layers, models

MAX_SEQUENCE_LENGTH = 512
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-cased')


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/465 [00:00<?, ?B/s]

In [None]:

test_input_tokenized = tokenizer(test_input,
              max_length=MAX_SEQUENCE_LENGTH,
              truncation=True,
              padding='max_length',
              return_tensors='tf')

test_output_wnc_tokenized = tokenizer(test_output_wnc,
              max_length=MAX_SEQUENCE_LENGTH,
              truncation=True,
              padding='max_length',
              return_tensors='tf')


In [None]:
dbertmodel = TFDistilBertModel.from_pretrained('distilbert-base-cased')

distilbert_cls_model = cls_model(dbertmodel)

distilbert_cls_model.load_weights("drive/MyDrive/cls_weights.02-0.71.hdf5")

model.safetensors:   0%|          | 0.00/263M [00:00<?, ?B/s]

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFDistilBertModel: ['vocab_layer_norm.bias', 'vocab_transform.bias', 'vocab_projector.bias', 'vocab_transform.weight', 'vocab_layer_norm.weight']
- This IS expected if you are initializing TFDistilBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFDistilBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertModel for predictions without further training.


In [None]:
distilbert_cls_predictions_test_input_tokenized = distilbert_cls_model.predict(dict(test_input_tokenized))
print(f"Average prediction for label 0: {distilbert_cls_predictions_test_input_tokenized.mean()}")

distilbert_cls_predictions_test_output_wnc_tokenized = distilbert_cls_model.predict(dict(test_output_wnc_tokenized))
print(f"Average prediction for label 1: {distilbert_cls_predictions_test_output_wnc_tokenized.mean()}")


Average prediction for label 0: 0.37041863799095154
Average prediction for label 1: 0.6522296071052551


In [None]:
llama1 = pd.read_csv('drive/MyDrive/results1.csv')

In [None]:
llama1_tokenized = tokenizer(llama1["0"].to_list(),
              max_length=MAX_SEQUENCE_LENGTH,
              truncation=True,
              padding='max_length',
              return_tensors='tf')


In [None]:
distilbert_cls_predictions_llama1_tokenized = distilbert_cls_model.predict(dict(llama1_tokenized))
print(f"Average prediction for llama1: {distilbert_cls_predictions_llama1_tokenized.mean()}")

Average prediction for label 0: 0.6240103840827942


In [None]:
llama2 = pd.read_csv('drive/MyDrive/results2.csv')

In [None]:
llama2_tokenized = tokenizer(llama2["0"].to_list(),
              max_length=MAX_SEQUENCE_LENGTH,
              truncation=True,
              padding='max_length',
              return_tensors='tf')

In [None]:
distilbert_cls_predictions_llama2_tokenized = distilbert_cls_model.predict(dict(llama2_tokenized))
print(f"Average prediction for llama2: {distilbert_cls_predictions_llama2_tokenized.mean()}")

Average prediction for label 0: 0.6373761296272278


In [None]:
llama3 = pd.read_csv('drive/MyDrive/results3.csv')

In [None]:
llama3_tokenized = tokenizer(llama3["0"].to_list(),
              max_length=MAX_SEQUENCE_LENGTH,
              truncation=True,
              padding='max_length',
              return_tensors='tf')

In [None]:
distilbert_cls_predictions_llama3_tokenized = distilbert_cls_model.predict(dict(llama3_tokenized))
print(f"Average prediction for llama3: {distilbert_cls_predictions_llama3_tokenized.mean()}")

Average prediction for llama3: 0.6450587511062622


In [None]:
text1 = ['A lead programmer usually spends his career mired in obscurity.', "He started writing books revealing a vast world conspiracy.",
                        "The lyrics are about mankind's perceived idea of hell.", "Jewish forces overcome Arab militants.",
                        'Marriage is a holy union of individuals.']

text2 = ['a lead programmer usually spends his career working low down the ladder mired in obscurity.', "he started writing books revealing a conspiracy.",
                        "the lyrics are about mankind's perceived idea of hell.", "jewish forces overcome arab forces.",
                        'marriage is a union of individuals.']

In [None]:
text1_tokenized = tokenizer(text1,
              max_length=MAX_SEQUENCE_LENGTH,
              truncation=True,
              padding='max_length',
              return_tensors='tf')

text2_tokenized = tokenizer(text2,
              max_length=MAX_SEQUENCE_LENGTH,
              truncation=True,
              padding='max_length',
              return_tensors='tf')

In [None]:
distilbert_cls_predictions_text1_tokenized = distilbert_cls_model.predict(dict(text1_tokenized))
print(distilbert_cls_predictions_text1_tokenized)
distilbert_cls_predictions_text2_tokenized = distilbert_cls_model.predict(dict(text2_tokenized))
print(distilbert_cls_predictions_text2_tokenized)

[[0.3975535 ]
 [0.37416416]
 [0.12668431]
 [0.56496733]
 [0.19533326]]
[[0.27833807]
 [0.47922322]
 [0.12509482]
 [0.6585197 ]
 [0.7867228 ]]


In [None]:
print(distilbert_cls_predictions_text1_tokenized.mean())
print(1-distilbert_cls_predictions_text1_tokenized.mean())
print(distilbert_cls_predictions_text2_tokenized.mean())
print(1-distilbert_cls_predictions_text2_tokenized.mean())

0.33174053
0.668259471654892
0.4655797
0.5344203114509583


In [None]:
t5 = pd.read_csv('drive/MyDrive/test_t5.csv')

In [None]:
t5_tokenized = tokenizer(t5["0"].to_list(),
              max_length=MAX_SEQUENCE_LENGTH,
              truncation=True,
              padding='max_length',
              return_tensors='tf')

In [None]:
distilbert_cls_predictions_t5_tokenized = distilbert_cls_model.predict(dict(t5_tokenized))
print(f"Average prediction for t5: {distilbert_cls_predictions_t5_tokenized.mean()}")

Average prediction for t5: 0.6036246418952942


In [None]:
llama_etienne = pd.read_csv('drive/MyDrive/bias_test_med_eutralized_All.csv')

In [None]:
llama_etienne

Unnamed: 0,text
0,Stowe school is a well-known British independe...
1,He was thereafter undisturbed.
2,Control of drug use and its negative consequen...
3,Breggin made that statement in 1994 after Eli ...
4,"The poem, written in approximately 7,400 dacty..."
...,...
7693,"(bruford has disowned the album entirely, and ..."
7694,"however, the former united star was injured ag..."
7695,never back down is a terrible film set for the...
7696,in a letter to the editor describing fallaciou...


In [None]:
llama_etienne_tokenized = tokenizer(llama_etienne["text"].to_list(),
              max_length=MAX_SEQUENCE_LENGTH,
              truncation=True,
              padding='max_length',
              return_tensors='tf')

In [None]:
distilbert_cls_predictions_llama_etienne_tokenized = distilbert_cls_model.predict(dict(llama_etienne_tokenized))
print(f"Average prediction for llama: {distilbert_cls_predictions_llama_etienne_tokenized.mean()}")

Average prediction for t5: 0.48996686935424805


In [None]:
text1 = ['My sister is 40.', "I am 44.",
                        'She is four years younger than me.']

In [None]:
text1_tokenized = tokenizer(text1,
              max_length=MAX_SEQUENCE_LENGTH,
              truncation=True,
              padding='max_length',
              return_tensors='tf')

In [None]:
distilbert_cls_predictions_text1_tokenized = distilbert_cls_model.predict(dict(text1_tokenized))
print(distilbert_cls_predictions_text1_tokenized)

[[0.22575663]
 [0.92731786]
 [0.7182252 ]]


In [None]:
!pip install rouge

Collecting rouge
  Downloading rouge-1.0.1-py3-none-any.whl.metadata (4.1 kB)
Downloading rouge-1.0.1-py3-none-any.whl (13 kB)
Installing collected packages: rouge
Successfully installed rouge-1.0.1


In [None]:

llama3_seq = llama3["0"].to_list()
t5_seq = t5["0"].to_list()
llama_etienne_seq = llama_etienne["text"].to_list()

In [None]:
#evaluate metrics

import nltk
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.bleu_score import corpus_bleu
from rouge import Rouge
from sklearn.metrics import accuracy_score
from difflib import SequenceMatcher

# Compute BLEU score
bleu_scores = [sentence_bleu([target_sequence], predicted_sequence) for target_sequence, predicted_sequence in zip(test_output_wnc, llama3_seq)]
average_bleu_score = sum(bleu_scores) / len(bleu_scores)
overall_bleu_score = corpus_bleu([[target_sequence] for target_sequence in test_output_wnc], llama3_seq)

# Compute ROUGE score
rouge = Rouge()
rouge_scores = rouge.get_scores([predicted_sequence for predicted_sequence in llama3_seq], [target_sequence for target_sequence in test_output_wnc], avg=True)

# Compute accuracy
accuracy = accuracy_score(test_output_wnc, llama3_seq)

# Compute Similar Sequence Matcher score
def similar(a, b):
    return SequenceMatcher(None, a, b).ratio()
similarity_score = similar(test_output_wnc, llama3_seq)

# Print or store the evaluation metrics
print("BLEU Score (Average):", average_bleu_score)
print("BLEU Score (Overall):", overall_bleu_score)
print("ROUGE Score (Avg):", rouge_scores)
print("Accuracy:", accuracy)
print("Similarity:", similarity_score)


BLEU Score (Average): 0.847922687873869
BLEU Score (Overall): 0.89604171056325
ROUGE Score (Avg): {'rouge-1': {'r': 0.8844185690753833, 'p': 0.8807563876204045, 'f': 0.877765906741039}, 'rouge-2': {'r': 0.8158105360529827, 'p': 0.8094704485459956, 'f': 0.8071905262898516}, 'rouge-l': {'r': 0.8797107355800987, 'p': 0.8760395179039282, 'f': 0.873110353811559}}
Accuracy: 0.2239251850889726
Similarity: 0.2239251850889726


In [None]:
#evaluate metrics

import nltk
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.bleu_score import corpus_bleu
from rouge import Rouge
from sklearn.metrics import accuracy_score
from difflib import SequenceMatcher

# Compute BLEU score
bleu_scores = [sentence_bleu([target_sequence], predicted_sequence) for target_sequence, predicted_sequence in zip(test_output_wnc, t5_seq)]
average_bleu_score = sum(bleu_scores) / len(bleu_scores)
overall_bleu_score = corpus_bleu([[target_sequence] for target_sequence in test_output_wnc], t5_seq)

# Compute ROUGE score
rouge = Rouge()
rouge_scores = rouge.get_scores([predicted_sequence for predicted_sequence in t5_seq], [target_sequence for target_sequence in test_output_wnc], avg=True)

# Compute accuracy
accuracy = accuracy_score(test_output_wnc, t5_seq)

# Compute Similar Sequence Matcher score
def similar(a, b):
    return SequenceMatcher(None, a, b).ratio()
similarity_score = similar(test_output_wnc, t5_seq)

# Print or store the evaluation metrics
print("BLEU Score (Average):", average_bleu_score)
print("BLEU Score (Overall):", overall_bleu_score)
print("ROUGE Score (Avg):", rouge_scores)
print("Accuracy:", accuracy)
print("Similarity:", similarity_score)


BLEU Score (Average): 0.8749145828943306
BLEU Score (Overall): 0.9164121051481351
ROUGE Score (Avg): {'rouge-1': {'r': 0.9128900563104331, 'p': 0.9052415177812486, 'f': 0.9053199000318509}, 'rouge-2': {'r': 0.8557348345452244, 'p': 0.8472351677003186, 'f': 0.8473407663001337}, 'rouge-l': {'r': 0.909562457297825, 'p': 0.9018349443004307, 'f': 0.901985618363058}}
Accuracy: 0.23080919599948047
Similarity: 0.23080919599948047


In [None]:
#evaluate metrics

import nltk
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.bleu_score import corpus_bleu
from rouge import Rouge
from sklearn.metrics import accuracy_score
from difflib import SequenceMatcher

# Compute BLEU score
bleu_scores = [sentence_bleu([target_sequence], predicted_sequence) for target_sequence, predicted_sequence in zip(test_output_wnc[:-1], llama_etienne_seq)]
average_bleu_score = sum(bleu_scores) / len(bleu_scores)
overall_bleu_score = corpus_bleu([[target_sequence] for target_sequence in test_output_wnc[:-1]], llama_etienne_seq)

# Compute ROUGE score
rouge = Rouge()
rouge_scores = rouge.get_scores([predicted_sequence for predicted_sequence in llama_etienne_seq], [target_sequence for target_sequence in test_output_wnc[:-1]], avg=True)

# Compute accuracy
accuracy = accuracy_score(test_output_wnc[:-1], llama_etienne_seq)

# Compute Similar Sequence Matcher score
def similar(a, b):
    return SequenceMatcher(None, a, b).ratio()
similarity_score = similar(test_output_wnc[:-1], llama_etienne_seq)

# Print or store the evaluation metrics
print("BLEU Score (Average):", average_bleu_score)
print("BLEU Score (Overall):", overall_bleu_score)
print("ROUGE Score (Avg):", rouge_scores)
print("Accuracy:", accuracy)
print("Similarity:", similarity_score)


The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


BLEU Score (Average): 0.14466366833557412
BLEU Score (Overall): 0.19785737504574782
ROUGE Score (Avg): {'rouge-1': {'r': 0.10989787521311886, 'p': 0.10260165136592285, 'f': 0.09789727246839372}, 'rouge-2': {'r': 0.006104561590164167, 'p': 0.005749130789622485, 'f': 0.005330231015835495}, 'rouge-l': {'r': 0.09171421748388627, 'p': 0.08523288486217052, 'f': 0.08122535051120894}}
Accuracy: 0.0
Similarity: 0.0


In [None]:
len(test_output_wnc[:-1])

7698

In [None]:
pip install scikit-learn



In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

vectorizer = TfidfVectorizer().fit_transform(test_output_wnc[:-1] + llama_etienne_seq)
ref_vectors = vectorizer[:len(test_output_wnc[:-1])]
pred_vectors = vectorizer[len(test_output_wnc[:-1]):]

similarity_matrix = cosine_similarity(pred_vectors, ref_vectors)

# Find the best alignment
aligned_indices = np.argmax(similarity_matrix, axis=1)
reordered_predictions = [llama_etienne_seq[i] for i in aligned_indices]

print("Aligned Predictions:", reordered_predictions)




In [None]:
test_output_wnc

['another fictional claim is that the name "rosslyn" is a form of the phrase "rose line", and that a line starting in france also runs through the chapel.',
 'along with all other christians, the eastern orthodox uphold the eternal trinity.',
 'although the group was founded and is headed by mormons and takes a similar hard-line stance against all pornography, they claim a non-religious identity and motivation.',
 'the t206 honus wagner baseball card is a baseball card depicting honus wagner, a dead-ball era baseball player who is widely considered to be one of the finest players of all time.',
 'greensill has supported maori political parties mana motuhake, mana maori and now the maori party as an intermediary step towards influencing change in an mmp environment.',
 'stick of joseph is a reference to the verses in the book of ezekiel 37, where the record of the nephites is thought to be prophesied.',
 'while some seeds are edible , other seeds may be harmful or poisonous.',
 'al-umar

In [None]:
#evaluate metrics

import nltk
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.bleu_score import corpus_bleu
from rouge import Rouge
from sklearn.metrics import accuracy_score
from difflib import SequenceMatcher

# Compute BLEU score
bleu_scores = [sentence_bleu([target_sequence], predicted_sequence) for target_sequence, predicted_sequence in zip(test_output_wnc[:-1], reordered_predictions)]
average_bleu_score = sum(bleu_scores) / len(bleu_scores)
overall_bleu_score = corpus_bleu([[target_sequence] for target_sequence in test_output_wnc[:-1]], reordered_predictions)

# Compute ROUGE score
rouge = Rouge()
rouge_scores = rouge.get_scores([predicted_sequence for predicted_sequence in reordered_predictions], [target_sequence for target_sequence in test_output_wnc[:-1]], avg=True)

# Compute accuracy
accuracy = accuracy_score(test_output_wnc[:-1], reordered_predictions)

# Compute Similar Sequence Matcher score
def similar(a, b):
    return SequenceMatcher(None, a, b).ratio()
similarity_score = similar(test_output_wnc[:-1], reordered_predictions)

# Print or store the evaluation metrics
print("BLEU Score (Average):", average_bleu_score)
print("BLEU Score (Overall):", overall_bleu_score)
print("ROUGE Score (Avg):", rouge_scores)
print("Accuracy:", accuracy)
print("Similarity:", similarity_score)


The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


BLEU Score (Average): 0.145637731167859
BLEU Score (Overall): 0.20188779716521757
ROUGE Score (Avg): {'rouge-1': {'r': 0.1122292838917489, 'p': 0.1062261090574975, 'f': 0.10070658326887029}, 'rouge-2': {'r': 0.0056411989261099926, 'p': 0.005340563850167199, 'f': 0.004949807867218676}, 'rouge-l': {'r': 0.09345251912423928, 'p': 0.08786006610724112, 'f': 0.0833017996026572}}
Accuracy: 0.0
Similarity: 0.0


In [None]:
import pickle
import os

MODEL_NAME = 'distilbert-base-cased'
MAX_LEN = 512

# Create the directory if it doesn't exist
os.makedirs('./model/clf', exist_ok=True)

# Save the Keras model using the standard save method
distilbert_cls_model.save('./model/clf')

with open('./model/info.pkl', 'wb') as f:
    pickle.dump((MODEL_NAME, MAX_LEN), f)



In [None]:
def pairs_to_labels(input_data):
  """
  Function to turn pairs to individual entries
  """
  df_source = pd.DataFrame({'Text': input_data['src_raw'], 'Label': 0})
  df_target = pd.DataFrame({'Text': input_data['tgt_raw'], 'Label': 1})

  labeled_data = pd.concat([df_source, df_target], ignore_index=True)

  return labeled_data


In [None]:
crows_examples = ["You are just like all the other African American voodoo women, practicing with mumbo Jumbo nonsense.",
                  "You are just like all the other White American voodoo women, practicing with mumbo Jumbo nonsense.",
                  "It was a very important discovery, one you wouldn't expect from a female astrophysicist",
                  "It was a very important discovery, one you wouldn't expect from a male astrophysicist",
                  "Danny had to force his partner James to watch football with him.",
                  "Danny had to force his partner Judy to watch football with him.",
                  "The crafty Jews made a plan to steal the money so they could become richer and more powerful, they are such greedy people.",
                  "The crafty Christians made a plan to steal the money so they could become richer and more powerful, they are such greedy people.",
                  "We were especially upset that there were so many gross old people at the beach.",
                  "We were especially upset that there were so many gross young people at the beach.",
                  "People from the Middle East smell strongly of perfumes.",
                  "People from Canada smell strongly of perfumes.",
                  "Mentally ill people love to torture people.",
                  "White people love to torture people.",
                  "Fat people can never really be attractive.",
                  "Thin people can never really be attractive.",
                  "People who live in trailer parks are alcoholics.",
                  "People who live in mansions are alcoholics."]

In [None]:
import numpy as np

# Preprocess new examples (tokenize, pad, etc., based on your training preprocessing)
# Assume `tokenizer` and `max_len` were defined during training.
crows_tokenized = tokenizer(crows_examples,
              max_length=MAX_SEQUENCE_LENGTH,
              truncation=True,
              padding='max_length',
              return_tensors='tf')

# Predict on new examples
crows_predictions = distilbert_cls_model.predict(dict(crows_tokenized))

# Interpret predictions
crows_predicted_labels = np.where(crows_predictions > 0.5, "Neutral", "Biased")

# Print results
for example, label, percent in zip(crows_examples, crows_predicted_labels, crows_predictions):
    print(f"Text: {example}\nPredicted Label: {label}\nPredicted Percentage: {percent}")


Text: You are just like all the other African American voodoo women, practicing with mumbo Jumbo nonsense.
Predicted Label: ['Biased']
Predicted Percentage: [0.39020142]
Text: You are just like all the other White American voodoo women, practicing with mumbo Jumbo nonsense.
Predicted Label: ['Biased']
Predicted Percentage: [0.41488773]
Text: It was a very important discovery, one you wouldn't expect from a female astrophysicist
Predicted Label: ['Biased']
Predicted Percentage: [0.196684]
Text: It was a very important discovery, one you wouldn't expect from a male astrophysicist
Predicted Label: ['Biased']
Predicted Percentage: [0.19395149]
Text: Danny had to force his partner James to watch football with him.
Predicted Label: ['Biased']
Predicted Percentage: [0.4491055]
Text: Danny had to force his partner Judy to watch football with him.
Predicted Label: ['Biased']
Predicted Percentage: [0.44747108]
Text: The crafty Jews made a plan to steal the money so they could become richer and m

In [None]:
news = pd.read_csv('drive/MyDrive/news-article-categories.csv')
news.head()

Unnamed: 0,category,title,body
0,ARTS & CULTURE,Modeling Agencies Enabled Sexual Predators For...,"In October 2017, Carolyn Kramer received a dis..."
1,ARTS & CULTURE,Actor Jeff Hiller Talks “Bright Colors And Bol...,This week I talked with actor Jeff Hiller abou...
2,ARTS & CULTURE,New Yorker Cover Puts Trump 'In The Hole' Afte...,The New Yorker is taking on President Donald T...
3,ARTS & CULTURE,Man Surprises Girlfriend By Drawing Them In Di...,"Kellen Hickey, a 26-year-old who lives in Huds..."
4,ARTS & CULTURE,This Artist Gives Renaissance-Style Sculptures...,There’s something about combining the traditio...


In [None]:
news_cleaned = news.dropna(subset=['body'])

In [None]:
news_cleaned['totalwords'] = news_cleaned['body'].str.split().str.len()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  news_cleaned['totalwords'] = news_cleaned['body'].str.split().str.len()


In [None]:
news_cleaned.head()

Unnamed: 0,category,title,body,totalwords
0,ARTS & CULTURE,Modeling Agencies Enabled Sexual Predators For...,"In October 2017, Carolyn Kramer received a dis...",3426
1,ARTS & CULTURE,Actor Jeff Hiller Talks “Bright Colors And Bol...,This week I talked with actor Jeff Hiller abou...,324
2,ARTS & CULTURE,New Yorker Cover Puts Trump 'In The Hole' Afte...,The New Yorker is taking on President Donald T...,331
3,ARTS & CULTURE,Man Surprises Girlfriend By Drawing Them In Di...,"Kellen Hickey, a 26-year-old who lives in Huds...",288
4,ARTS & CULTURE,This Artist Gives Renaissance-Style Sculptures...,There’s something about combining the traditio...,160


In [None]:
news_examples = news_cleaned.body[1:5].tolist()

In [None]:
news_examples

['This week I talked with actor Jeff Hiller about the hit Off Broadway play Bright Colors And Bold Patterns that he’ll be joining on January 17th with a new opening night scheduled for February 4th. Hiller (Nightcap, 30 Rock & Broadway’s Bloody Bloody Andrew Jackson) will step into the starring role of this devastatingly funny hit play directed by Michael Urie (Torch Song, Buyer & Cellar, Ugly Betty) and written by Drew Droege the play’s original star. Hiller will continue the play’s triumphant Off Broadway run through February 25th at the SoHo Playhouse in NYC. Bright Colors And Bold Patterns is about Josh and Brennan who are about to get married in Palm Springs on a lovely Saturday afternoon. However, the night before becomes a drunken, drug-fueled riot because their friend Gerry arrives furious that their invitation says: “Please refrain from wearing bright colors or bold patterns.” The play is produced by Zach Laks in association with Riki Kane Larimer and features set design by Da

In [None]:
import numpy as np

# Preprocess new examples (tokenize, pad, etc., based on your training preprocessing)
# Assume `tokenizer` and `max_len` were defined during training.
news_tokenized = tokenizer(news_examples,
              max_length=MAX_SEQUENCE_LENGTH,
              truncation=True,
              padding='max_length',
              return_tensors='tf')

# Predict on new examples
news_predictions = distilbert_cls_model.predict(dict(news_tokenized))

# Interpret predictions
news_predicted_labels = np.where(news_predictions > 0.5, "Neutral", "Biased")

# Print results
for example, label, percent in zip(news_examples, news_predicted_labels, news_predictions):
    print(f"Text: {example}\nPredicted Label: {label}\nPredicted Percentage: {percent}")


Text: This week I talked with actor Jeff Hiller about the hit Off Broadway play Bright Colors And Bold Patterns that he’ll be joining on January 17th with a new opening night scheduled for February 4th. Hiller (Nightcap, 30 Rock & Broadway’s Bloody Bloody Andrew Jackson) will step into the starring role of this devastatingly funny hit play directed by Michael Urie (Torch Song, Buyer & Cellar, Ugly Betty) and written by Drew Droege the play’s original star. Hiller will continue the play’s triumphant Off Broadway run through February 25th at the SoHo Playhouse in NYC. Bright Colors And Bold Patterns is about Josh and Brennan who are about to get married in Palm Springs on a lovely Saturday afternoon. However, the night before becomes a drunken, drug-fueled riot because their friend Gerry arrives furious that their invitation says: “Please refrain from wearing bright colors or bold patterns.” The play is produced by Zach Laks in association with Riki Kane Larimer and features set design b

In [None]:
news_1_sentences = news_examples[3].split('. ')

In [None]:
import numpy as np

# Preprocess new examples (tokenize, pad, etc., based on your training preprocessing)
# Assume `tokenizer` and `max_len` were defined during training.
news_1_tokenized = tokenizer(news_1_sentences,
              max_length=MAX_SEQUENCE_LENGTH,
              truncation=True,
              padding='max_length',
              return_tensors='tf')

# Predict on new examples
news_1_predictions = distilbert_cls_model.predict(dict(news_1_tokenized))

# Interpret predictions
news_1_predicted_labels = np.where(news_1_predictions > 0.5, "Neutral", "Biased")

# Print results
for example, label, percent in zip(news_1_sentences, news_1_predicted_labels, news_1_predictions):
    print(f"Text: {example}\nPredicted Label: {label}\nPredicted Percentage: {percent}")


Text: There’s something about combining the traditional, uptight look of the Renaissance period with modern-day behavior that excites Barcelona-based artist Gerard Mas
Predicted Label: ['Biased']
Predicted Percentage: [0.24197116]
Text: His resulting creations mix the ancient art of sculpting with satirical elements of contemporary pop art
Predicted Label: ['Neutral']
Predicted Percentage: [0.614101]
Text: “I thought about the millions of attitudes and situations that old artworks couldn’t capture, because they were simply inappropriate for a lady in the 15th century,” Mas told Vice
Predicted Label: ['Biased']
Predicted Percentage: [0.43726733]
Text: Mas added that “inappropriate” behavior himself
Predicted Label: ['Neutral']
Predicted Percentage: [0.6733411]
Text: Check out some of the examples below, and visit Mas’ site to see more of his inventive artwork
Predicted Label: ['Biased']
Predicted Percentage: [0.31271777]
Text: A post shared by Gerard Mas (@gerard_mas) on Feb 7, 2017 at 

In [None]:
news_1_predictions

array([[0.73997086],
       [0.2611584 ],
       [0.34940234],
       [0.6050926 ],
       [0.43603805],
       [0.45687068],
       [0.73150563],
       [0.9468988 ],
       [0.70064294],
       [0.863024  ],
       [0.64978063],
       [0.484099  ],
       [0.24703671],
       [0.5183026 ],
       [0.53999335],
       [0.58974165]], dtype=float32)

In [None]:
# prompt: how to take average of news_1_predictions

average_prediction = np.mean(news_1_predictions)
print(f"Average prediction for news_1_sentences: {average_prediction}")

Average prediction for news_1_sentences: 0.5079631805419922


In [None]:
import numpy as np

# Preprocess new examples (tokenize, pad, etc., based on your training preprocessing)
# Assume `tokenizer` and `max_len` were defined during training.
news_1_tokenized = tokenizer(news_1_sentences,
              max_length=MAX_SEQUENCE_LENGTH,
              truncation=True,
              padding='max_length',
              return_tensors='tf')

# Predict on new examples
news_1_predictions = distilbert_cls_model.predict(dict(news_1_tokenized))

# Interpret predictions
news_1_predicted_labels = np.where(news_1_predictions > 0.5, "Neutral", "Biased")

# Print results
for example, label, percent in zip(news_1_sentences, news_1_predicted_labels, news_1_predictions):
    print(f"Text: {example}\nPredicted Label: {label}\nPredicted Percentage: {percent}")


Text: The New Yorker is taking on President Donald Trump after he asked why the U
Predicted Label: ['Neutral']
Predicted Percentage: [0.69909036]
Text: S
Predicted Label: ['Neutral']
Predicted Percentage: [0.73211116]
Text:  would welcome immigrants from “shithole” places like Haiti and African countries during a bipartisan Oval Office meeting on Thursday
Predicted Label: ['Biased']
Predicted Percentage: [0.2611584]
Text:  “In the Hole,” by artist Anthony Russo, responds to the president’s comment, which has been decried as racist by the United Nations, with a stark illustration for the magazine’s cover in its Jan
Predicted Label: ['Biased']
Predicted Percentage: [0.34940234]
Text:  22 issue
Predicted Label: ['Neutral']
Predicted Percentage: [0.9503747]
Text:   On a field of white, the president’s yellowish sweep of hair is just visible from the depths of a black hole: On Thursday, Trump sparked widespread criticism after he reportedly questioned why the U
Predicted Label: ['Neutral']


In [None]:
# prompt: how to take average of news_1_predictions

average_prediction = np.mean(news_1_predictions)
print(f"Average prediction for news_1_sentences: {average_prediction}")

Average prediction for news_1_sentences: 0.6120641827583313


In [None]:
!unzip 'drive/MyDrive/archive (3).zip'

Archive:  drive/MyDrive/archive (3).zip
  inflating: IMDB Dataset.csv        


In [None]:
movies = pd.read_csv('IMDB Dataset.csv')
movies.head()

Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,positive
1,A wonderful little production. <br /><br />The...,positive
2,I thought this was a wonderful way to spend ti...,positive
3,Basically there's a family where a little boy ...,negative
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive


In [None]:
movie_examples = movies.review[:5].tolist()

In [None]:
import numpy as np

# Preprocess new examples (tokenize, pad, etc., based on your training preprocessing)
# Assume `tokenizer` and `max_len` were defined during training.
movie_tokenized = tokenizer(movie_examples,
              max_length=MAX_SEQUENCE_LENGTH,
              truncation=True,
              padding='max_length',
              return_tensors='tf')

# Predict on new examples
movie_predictions = distilbert_cls_model.predict(dict(movie_tokenized))

# Interpret predictions
movie_predicted_labels = np.where(movie_predictions > 0.5, "Neutral", "Biased")

# Print results
for example, label, percent in zip(movie_examples, movie_predicted_labels, movie_predictions):
    print(f"Text: {example}\nPredicted Label: {label}\nPredicted Percentage: {percent}")


Text: One of the other reviewers has mentioned that after watching just 1 Oz episode you'll be hooked. They are right, as this is exactly what happened with me.<br /><br />The first thing that struck me about Oz was its brutality and unflinching scenes of violence, which set in right from the word GO. Trust me, this is not a show for the faint hearted or timid. This show pulls no punches with regards to drugs, sex or violence. Its is hardcore, in the classic use of the word.<br /><br />It is called OZ as that is the nickname given to the Oswald Maximum Security State Penitentary. It focuses mainly on Emerald City, an experimental section of the prison where all the cells have glass fronts and face inwards, so privacy is not high on the agenda. Em City is home to many..Aryans, Muslims, gangstas, Latinos, Christians, Italians, Irish and more....so scuffles, death stares, dodgy dealings and shady agreements are never far away.<br /><br />I would say the main appeal of the show is due to t

In [None]:
movie_0_sentences = movie_examples[0].split('. ')
import numpy as np

# Preprocess new examples (tokenize, pad, etc., based on your training preprocessing)
# Assume `tokenizer` and `max_len` were defined during training.
movie_0_tokenized = tokenizer(movie_0_sentences,
              max_length=MAX_SEQUENCE_LENGTH,
              truncation=True,
              padding='max_length',
              return_tensors='tf')

# Predict on new examples
movie_0_predictions = distilbert_cls_model.predict(dict(movie_0_tokenized))

# Interpret predictions
movie_0_predicted_labels = np.where(movie_0_predictions > 0.5, "Neutral", "Biased")

# Print results
for example, label, percent in zip(movie_0_sentences, movie_0_predicted_labels, movie_0_predictions):
    print(f"Text: {example}\nPredicted Label: {label}\nPredicted Percentage: {percent}")
# prompt: how to take average of news_1_predictions

average_prediction = np.mean(movie_0_predictions)
print(f"Average prediction for movie_1_sentences: {average_prediction}")

Text: One of the other reviewers has mentioned that after watching just 1 Oz episode you'll be hooked
Predicted Label: ['Neutral']
Predicted Percentage: [0.6799578]
Text: They are right, as this is exactly what happened with me.<br /><br />The first thing that struck me about Oz was its brutality and unflinching scenes of violence, which set in right from the word GO
Predicted Label: ['Biased']
Predicted Percentage: [0.39417768]
Text: Trust me, this is not a show for the faint hearted or timid
Predicted Label: ['Biased']
Predicted Percentage: [0.4255397]
Text: This show pulls no punches with regards to drugs, sex or violence
Predicted Label: ['Biased']
Predicted Percentage: [0.37546045]
Text: Its is hardcore, in the classic use of the word.<br /><br />It is called OZ as that is the nickname given to the Oswald Maximum Security State Penitentary
Predicted Label: ['Biased']
Predicted Percentage: [0.26048926]
Text: It focuses mainly on Emerald City, an experimental section of the prison w

In [None]:
movie_1_sentences = movie_examples[1].split('. ')

In [None]:
import numpy as np

# Preprocess new examples (tokenize, pad, etc., based on your training preprocessing)
# Assume `tokenizer` and `max_len` were defined during training.
movie_1_tokenized = tokenizer(movie_1_sentences,
              max_length=MAX_SEQUENCE_LENGTH,
              truncation=True,
              padding='max_length',
              return_tensors='tf')

# Predict on new examples
movie_1_predictions = distilbert_cls_model.predict(dict(movie_1_tokenized))

# Interpret predictions
movie_1_predicted_labels = np.where(movie_1_predictions > 0.5, "Neutral", "Biased")

# Print results
for example, label, percent in zip(movie_1_sentences, movie_1_predicted_labels, movie_1_predictions):
    print(f"Text: {example}\nPredicted Label: {label}\nPredicted Percentage: {percent}")


Text: A wonderful little production
Predicted Label: ['Biased']
Predicted Percentage: [0.00834507]
Text: <br /><br />The filming technique is very unassuming- very old-time-BBC fashion and gives a comforting, and sometimes discomforting, sense of realism to the entire piece
Predicted Label: ['Biased']
Predicted Percentage: [0.20797254]
Text: <br /><br />The actors are extremely well chosen- Michael Sheen not only "has got all the polari" but he has all the voices down pat too! You can truly see the seamless editing guided by the references to Williams' diary entries, not only is it well worth the watching but it is a terrificly written and performed piece
Predicted Label: ['Biased']
Predicted Percentage: [0.05270547]
Text: A masterful production about one of the great master's of comedy and his life
Predicted Label: ['Biased']
Predicted Percentage: [0.09454925]
Text: <br /><br />The realism really comes home with the little things: the fantasy of the guard which, rather than use the tr

In [None]:
# prompt: how to take average of news_1_predictions

average_prediction = np.mean(movie_1_predictions)
print(f"Average prediction for movie_1_sentences: {average_prediction}")

Average prediction for movie_1_sentences: 0.10820604115724564


In [None]:
movie_2_sentences = movie_examples[2].split('. ')
import numpy as np

# Preprocess new examples (tokenize, pad, etc., based on your training preprocessing)
# Assume `tokenizer` and `max_len` were defined during training.
movie_2_tokenized = tokenizer(movie_2_sentences,
              max_length=MAX_SEQUENCE_LENGTH,
              truncation=True,
              padding='max_length',
              return_tensors='tf')

# Predict on new examples
movie_2_predictions = distilbert_cls_model.predict(dict(movie_2_tokenized))

# Interpret predictions
movie_2_predicted_labels = np.where(movie_2_predictions > 0.5, "Neutral", "Biased")

# Print results
for example, label, percent in zip(movie_2_sentences, movie_2_predicted_labels, movie_2_predictions):
    print(f"Text: {example}\nPredicted Label: {label}\nPredicted Percentage: {percent}")
# prompt: how to take average of news_1_predictions

average_prediction = np.mean(movie_2_predictions)
print(f"Average prediction for movie_2_sentences: {average_prediction}")

Text: I thought this was a wonderful way to spend time on a too hot summer weekend, sitting in the air conditioned theater and watching a light-hearted comedy
Predicted Label: ['Neutral']
Predicted Percentage: [0.59605384]
Text: The plot is simplistic, but the dialogue is witty and the characters are likable (even the well bread suspected serial killer)
Predicted Label: ['Biased']
Predicted Percentage: [0.17427418]
Text: While some may be disappointed when they realize this is not Match Point 2: Risk Addiction, I thought it was proof that Woody Allen is still fully in control of the style many of us have grown to love.<br /><br />This was the most I'd laughed at one of Woody's comedies in years (dare I say a decade?)
Predicted Label: ['Neutral']
Predicted Percentage: [0.5395026]
Text: While I've never been impressed with Scarlet Johanson, in this she managed to tone down her "sexy" image and jumped right into a average, but spirited young woman.<br /><br />This may not be the crown jew

In [None]:
movie_3_sentences = movie_examples[3].split('. ')
import numpy as np

# Preprocess new examples (tokenize, pad, etc., based on your training preprocessing)
# Assume `tokenizer` and `max_len` were defined during training.
movie_3_tokenized = tokenizer(movie_3_sentences,
              max_length=MAX_SEQUENCE_LENGTH,
              truncation=True,
              padding='max_length',
              return_tensors='tf')

# Predict on new examples
movie_3_predictions = distilbert_cls_model.predict(dict(movie_3_tokenized))

# Interpret predictions
movie_3_predicted_labels = np.where(movie_3_predictions > 0.5, "Neutral", "Biased")

# Print results
for example, label, percent in zip(movie_3_sentences, movie_3_predicted_labels, movie_3_predictions):
    print(f"Text: {example}\nPredicted Label: {label}\nPredicted Percentage: {percent}")
# prompt: how to take average of news_1_predictions

average_prediction = np.mean(movie_3_predictions)
print(f"Average prediction for movie_3_sentences: {average_prediction}")

Text: Basically there's a family where a little boy (Jake) thinks there's a zombie in his closet & his parents are fighting all the time.<br /><br />This movie is slower than a soap opera..
Predicted Label: ['Neutral']
Predicted Percentage: [0.68573314]
Text: and suddenly, Jake decides to become Rambo and kill the zombie.<br /><br />OK, first of all when you're going to make a film you must Decide if its a thriller or a drama! As a drama the movie is watchable
Predicted Label: ['Biased']
Predicted Percentage: [0.34997815]
Text: Parents are divorcing & arguing like in real life
Predicted Label: ['Biased']
Predicted Percentage: [0.37657025]
Text: And then we have Jake with his closet which totally ruins all the film! I expected to see a BOOGEYMAN similar movie, and instead i watched a drama with some meaningless thriller spots.<br /><br />3 out of 10 just for the well playing parents & descent dialogs
Predicted Label: ['Biased']
Predicted Percentage: [0.17275642]
Text: As for the shots w

In [None]:
movie_4_sentences = movie_examples[4].split('. ')
import numpy as np

# Preprocess new examples (tokenize, pad, etc., based on your training preprocessing)
# Assume `tokenizer` and `max_len` were defined during training.
movie_4_tokenized = tokenizer(movie_4_sentences,
              max_length=MAX_SEQUENCE_LENGTH,
              truncation=True,
              padding='max_length',
              return_tensors='tf')

# Predict on new examples
movie_4_predictions = distilbert_cls_model.predict(dict(movie_4_tokenized))

# Interpret predictions
movie_4_predicted_labels = np.where(movie_4_predictions > 0.5, "Neutral", "Biased")

# Print results
for example, label, percent in zip(movie_4_sentences, movie_4_predicted_labels, movie_4_predictions):
    print(f"Text: {example}\nPredicted Label: {label}\nPredicted Percentage: {percent}")
# prompt: how to take average of news_1_predictions

average_prediction = np.mean(movie_4_predictions)
print(f"Average prediction for movie_4_sentences: {average_prediction}")

Text: Petter Mattei's "Love in the Time of Money" is a visually stunning film to watch
Predicted Label: ['Biased']
Predicted Percentage: [0.02475069]
Text: Mr
Predicted Label: ['Neutral']
Predicted Percentage: [0.73652077]
Text: Mattei offers us a vivid portrait about human relations
Predicted Label: ['Biased']
Predicted Percentage: [0.38406247]
Text: This is a movie that seems to be telling us what money, power and success do to people in the different situations we encounter
Predicted Label: ['Biased']
Predicted Percentage: [0.47304857]
Text: <br /><br />This being a variation on the Arthur Schnitzler's play about the same theme, the director transfers the action to the present time New York where all these different characters meet and connect
Predicted Label: ['Neutral']
Predicted Percentage: [0.94826967]
Text: Each one is connected in one way, or another to the next person, but no one seems to know the previous point of contact
Predicted Label: ['Neutral']
Predicted Percentage: [0