# Text Subjectivity Classification - Using a fine-tuned models downloaded from HuggingFace

Hugging Face model:  
model="cffl/bert-base-styleclassification-subjective-neutral"
https://huggingface.co/cffl/bert-base-styleclassification-subjective-neutral

In [1]:
from transformers import pipeline

classify = pipeline(
    task="text-classification",
    model="cffl/bert-base-styleclassification-subjective-neutral",
    return_all_scores=True,
)

input_text  = "Leonard Bernstein was an American conductor, composer, pianist, music educator, author, and humanitarian."
classify(input_text)

  from .autonotebook import tqdm as notebook_tqdm


[[{'label': 'SUBJECTIVE', 'score': 0.7003715634346008},
  {'label': 'NEUTRAL', 'score': 0.29962843656539917}]]

In [2]:
model_pt = classify.model
total_params = sum(p.numel() for p in model_pt.parameters())
print("Number of parameters: ", total_params)

# total_params_trainable = sum(p.numel() for p in model_pt.parameters() if p.requires_grad)
# print("Number of trainable parameters: ", total_params_trainable)

Number of parameters:  109483778


In [3]:
def classify_subjectivity(input_text):
    result = classify(input_text)
    subj_score = result[0][0]['score']
    neutral_score = result[0][1]['score']
    if subj_score > neutral_score:
        return 1
    else:
        return 0

In [4]:
def compute_metrics(predictions, labels):
    TP = 0
    TN = 0
    FP = 0
    FN = 0
    for i in range(len(predictions)):
        if labels[i] == 1:
            if predictions[i] == 1:
                TP += 1
            else:
                FN += 1
        else:
            if predictions[i] == 1:
                FP += 1
            else:
                TN += 1
    print('TP:', TP)
    print('FN:', FN)
    print('FP:', FP)
    print('TN:', TN)
    precision = TP / (TP + FP)
    recall = TP / (TP + FN)
    accuracy = (TP + TN) / (TP + TN + FP + FN)
    F1 = 2 * precision * recall / (precision + recall)
    print('accuracy:', accuracy)
    print('precision:', precision)
    print('recall:', recall)
    print('F1:', F1)
    print('{:10.4f},{:10.4f},{:10.4f},{:10.4f}'.format(accuracy, precision, recall, F1))

In [5]:
import pandas as pd

datasets = ["data/SUBJ/test.csv", "data/tasksource/test.csv", "data/bard.csv", "data/ai21.csv"]

for dataset in datasets:
    print()
    print(dataset)
    
    df = pd.read_csv(dataset)
    if dataset == "data/tasksource/test.csv":
        # ONLY tasksource
        label2id = {"SUBJ": 1, "OBJ": 0}
        # remove the last column from a dataframe
        df = df.iloc[:, :-1]
        # rename the first column of the dataframe
        df = df.rename(columns={"Sentence": "text", "Label": "label"})
        df['label'] = df['label'].apply(lambda x: label2id[x])

    labels = []
    predictions = []
    for i in range(len(df)):
        # print(df.iloc[i])
        input_text = df.iloc[i]['text']
        label = df.iloc[i]['label']
        labels.append(label)
        predictions.append(classify_subjectivity(input_text))
        # print(input_text, label, classify_subjectivity(input_text))
    compute_metrics(predictions, labels)


data/SUBJ/test.csv
TP: 421
FN: 79
FP: 213
TN: 287
accuracy: 0.708
precision: 0.6640378548895899
recall: 0.842
F1: 0.7425044091710759
    0.7080,    0.6640,    0.8420,    0.7425

data/tasksource/test.csv
TP: 102
FN: 11
FP: 66
TN: 40
accuracy: 0.6484018264840182
precision: 0.6071428571428571
recall: 0.9026548672566371
F1: 0.7259786476868326
    0.6484,    0.6071,    0.9027,    0.7260

data/bard.csv
TP: 37
FN: 13
FP: 19
TN: 31
accuracy: 0.68
precision: 0.6607142857142857
recall: 0.74
F1: 0.6981132075471698
    0.6800,    0.6607,    0.7400,    0.6981

data/ai21.csv
TP: 40
FN: 10
FP: 4
TN: 46
accuracy: 0.86
precision: 0.9090909090909091
recall: 0.8
F1: 0.8510638297872342
    0.8600,    0.9091,    0.8000,    0.8511
