In [3]:
model_name = 'article-bias-2'
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
tokenizer = AutoTokenizer.from_pretrained(model_name)
pipe = pipeline("text-classification", model=model_name, tokenizer=tokenizer)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3).to("cuda")

In [4]:
def truncate_text(text, tokenizer, max_length=512):
    tokens = tokenizer.tokenize(text)
    truncated_tokens = tokens[:max_length - 2]  # Reserve space for special tokens [CLS] and [SEP]
    truncated_text = tokenizer.convert_tokens_to_string(truncated_tokens)
    return truncated_text

In [132]:
model.eval()

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
 

In [14]:
import json
def load_article_json(name):
    with open(f'./data/jsons/{name}.json', 'r') as f:
        data = json.load(f)
        return data

In [187]:
article1 = load_article_json('I3B0cfqnYOw0R0Rp')
article1['bias'], article1['bias_text'], article1['title'], article1['source']

(2,
 'right',
 "PA Lawmaker's Bill Would Ban Teachers From Talking Politics in the Classroom",
 'Fox Online News')

In [175]:
text = article1['content']
text

"A Pennsylvania state lawmaker is pushing a bill that would ban schoolteachers from talking about modern-day civics , politics and public affairs in the classroom .\nState Rep. Will Tallman ( R ) , who is retiring at the end of the year , sent a memo to his colleagues in the state House last Friday seeking support for what he has dubbed the “ Teacher Code of Ethics . ”\nAccording to the memo , the bill would prohibit public elementary or secondary schoolteachers , while operating within the scope of their employment , from endorsing , supporting , or opposing any the following :\n- Candidate or nominee for public office or any local , state , or federal official , regardless of whether such official is elected or appointed ;\n- Local , state , or federal legislation or regulation , regardless of whether such legislation or regulation is pending , proposed , or enacted ;\n- Local , state , or federal court case or judicial action , regardless of whether such court case or judicial actio

In [216]:
text = truncate_text(article1['content'], tokenizer, max_length=500)
text

'a pennsylvania state lawmaker is pushing a bill that would ban schoolteachers from talking about modern - day civics, politics and public affairs in the classroom. state rep. will tallman ( r ), who is retiring at the end of the year, sent a memo to his colleagues in the state house last friday seeking support for what he has dubbed the “ teacher code of ethics. ” according to the memo, the bill would prohibit public elementary or secondary schoolteachers, while operating within the scope of their employment, from endorsing, supporting, or opposing any the following : - candidate or nominee for public office or any local, state, or federal official, regardless of whether such official is elected or appointed ; - local, state, or federal legislation or regulation, regardless of whether such legislation or regulation is pending, proposed, or enacted ; - local, state, or federal court case or judicial action, regardless of whether such court case or judicial action is pending, proposed, 

In [219]:
text += " pro choice pro choice pro choice "

In [166]:
pipe(text)

[{'label': 'LABEL_2', 'score': 0.9992018342018127}]

In [220]:
pipe(text)

[{'label': 'LABEL_2', 'score': 0.9992978572845459}]

In [174]:
pipe(truncate_text(article1['content'], tokenizer, max_length=400))

[{'label': 'LABEL_1', 'score': 0.996159553527832}]

In [184]:
pipe("immigration bad")

[{'label': 'LABEL_0', 'score': 0.7014515399932861}]

In [133]:
pipe("immigration is bad.")

[{'label': 'LABEL_2', 'score': 0.7656301259994507}]

In [134]:
pipe("immigration is great.")

[{'label': 'LABEL_2', 'score': 0.5035411715507507}]

In [135]:
pipe("immigration is great for diversity")

[{'label': 'LABEL_0', 'score': 0.4904172718524933}]

In [136]:
pipe("immigration is great for diversity.")

[{'label': 'LABEL_1', 'score': 0.42974522709846497}]

In [188]:
pipe("healthcare")

[{'label': 'LABEL_0', 'score': 0.6667281985282898}]

In [196]:
pipe(' '.join(["healthcare"] * 50))

[{'label': 'LABEL_0', 'score': 0.9122157096862793}]

In [197]:
pipe(' '.join(["healthcare"] * 500))

[{'label': 'LABEL_1', 'score': 0.6254068613052368}]

In [192]:
pipe("god")

[{'label': 'LABEL_2', 'score': 0.49897608160972595}]

In [198]:
pipe(' '.join(["god"] * 50))

[{'label': 'LABEL_0', 'score': 0.948601245880127}]

In [199]:
pipe(' '.join(["god"] * 500))

[{'label': 'LABEL_0', 'score': 0.6542900800704956}]

In [194]:
pipe("free speech")

[{'label': 'LABEL_2', 'score': 0.6059794425964355}]

In [201]:
pipe(' '.join(["free"] * 50))

[{'label': 'LABEL_0', 'score': 0.8325839638710022}]

In [207]:
pipe("pro choice")

[{'label': 'LABEL_0', 'score': 0.9248437285423279}]

In [210]:
pipe("pro is choice")

[{'label': 'LABEL_0', 'score': 0.8720803260803223}]

In [225]:
pipe(' '.join(["pro-choice is" * 50]))

[{'label': 'LABEL_0', 'score': 0.7543892860412598}]

In [226]:
pipe(' '.join(["pro-life is" * 50]))

[{'label': 'LABEL_0', 'score': 0.4981709420681}]