# Apply Model

In [None]:
# Import packages
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F
import pandas as pd

In [None]:
# Load fine-tuned model and tokenizer
model_name = "./shelfens/camembert_multiple_classifier"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

In [None]:
# Prepare results DataFrame
results_df = pd.DataFrame(columns=["text", "Death", "Police", "All (lethal police violence)", "Predicted as 'All'", "link"])

In [None]:
def predict_prob(text, link):
    text = str(text)
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding="max_length", max_length=512)

    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        probs = torch.sigmoid(logits).squeeze()

    is_all = probs[2].item() >= 0.5  # Threshold of 0.5 to classify 'death in the context of a police intervention' as TRUE

    results_df.loc[len(results_df)] = [
        text,
        probs[0].item(),  # Death probability
        probs[1].item(),  # Police probability
        probs[2].item(),  # All (Death in the context of a police intervention) probability
        is_all,
        link
    ]

In [None]:
df = pd.read_excel("../databases/Pipeline2_toapply.xlsx") #For Pipeline 1: Pipeline1_locations_CH.xlsx
df['text'] = df['Title'].fillna('') + " " + df['Post_Lead'].fillna('')
df = df.dropna(subset=['text'])

In [None]:
for _, row in df.iterrows():
    predict_prob(row['text'], row['Link'])  # Default to empty string if 'link' not present

In [None]:
results_df.to_excel("shelfens/Pipeline2_Outcome.xlsx", index=False) 