In [1]:
from transformers import pipeline
from transformers import AutoModelForSequenceClassification, AutoTokenizer, TrainingArguments, Trainer

import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt

In [2]:
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

Device set to use mps:0


In [3]:
text = ["The fire west of yellowstone has grown to be 50 square acres"]
labels = ["grateful", "political", "frustration", "sarcasm", "solution-focused", "informative or news-focused","fearful or panicked","blaming","seeking help or advice","fire-management-related"]

results = classifier(text,labels,multi_label=True)
print(results)

[{'sequence': 'The fire west of yellowstone has grown to be 50 square acres', 'labels': ['informative or news-focused', 'fire-management-related', 'fearful or panicked', 'seeking help or advice', 'frustration', 'blaming', 'sarcasm', 'grateful', 'solution-focused', 'political'], 'scores': [0.9404984712600708, 0.9354274868965149, 0.836033284664154, 0.7428258061408997, 0.23450025916099548, 0.1476375013589859, 0.07033845037221909, 0.029747506603598595, 0.0037829922512173653, 0.002808493794873357]}]


In [18]:
model_path = "facebook/bart-large-mnli"

tokenizer = AutoTokenizer.from_pretrained(model_path)

id2label = {
    0: "grateful",
    1: "political",
    2: "frustration",
    3: "sarcasm",
    4: "solution-focused",
    5: "informative or news-focused",
    6: "fearful or panicked",
    7: "blaming",
    8: "seeking help or advice",
    # 9: "fire-management-related"
}
label2id = {
    "grateful": 0,
    "political": 1,
    "frustration": 2, 
    "sarcasm": 3,
    "solution-focused": 4,
    "informative or news-focused": 5,
    "fearful or panicked": 6,
    "blaming": 7,
    "seeking help or advice": 8,
    # "fire-management-related": 9
}

model = AutoModelForSequenceClassification.from_pretrained(model_path,
                                                           num_labels=9,
                                                           id2label=id2label,
                                                           label2id=label2id,
                                                           ignore_mismatched_sizes=True)
                                                           

Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-large-mnli and are newly initialized because the shapes did not match:
- classification_head.out_proj.bias: found shape torch.Size([3]) in the checkpoint and torch.Size([9]) in the model instantiated
- classification_head.out_proj.weight: found shape torch.Size([3, 1024]) in the checkpoint and torch.Size([9, 1024]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [19]:
# freeze models 

for name, param in model.base_model.named_parameters():
    param.requires_grad = False

# unfreeze pooler layer

for name, param in model.base_model.named_parameters():
    if "pooler" in name:
        param.requires_grad = True

In [24]:
# read in reddit data

reddit_data = pd.read_csv("reddit_csv_file.csv")

# rename labels and drop humorous

reddit_data = reddit_data.drop(columns=['humorous'])
reddit_data = reddit_data.rename(columns={'speaks of a solution': 'solution-focused', 'spreads news': 'informative or news-focused',
                                          'fear or panic': 'fearful or panicked', 'blame': 'blaming', 'sarcastic': 'sarcasm', 
                                          'seeking help or answer': 'seeking help or advice'})

# seperate text and label data into their own variables

reddit_text_data = reddit_data['text']
reddit_label_data = reddit_data.drop(columns=['text','date'])

# print out reddit data so I can view as I code

reddit_data

Unnamed: 0,text,date,frustration,political,solution-focused,informative or news-focused,fearful or panicked,blaming,sarcasm,grateful,seeking help or advice
0,This is on us. One of us should have remembere...,1/25/25,1,0,0,0,0,0,1,0,0
1,"It’s that easy, huh?",1/25/25,0,0,0,0,0,0,1,0,0
2,"I have to say--the city, the office of emergen...",1/13/25,0,0,0,0,0,0,0,1,0
3,"The Kardashians reside in Calabasas, Californi...",1/13/25,0,1,0,0,0,0,0,0,0
4,"I was evacuated, applied Sunday and got approv...",1/14/25,0,0,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...
521,this is the most reasonable thing I've read on...,01-09-2025,0,0,0,0,0,0,0,0,0
522,Polyester lining.,01-09-2025,0,0,0,0,0,0,0,0,0
523,I don’t disagree with the spirit of the “LAFD ...,01-09-2025,0,0,0,1,0,0,0,0,0
524,Well yes. Then again many if not all might say...,01-09-2025,0,0,0,0,0,1,0,0,0


In [33]:
# function for preprocessing and tokenizing text 

def preprocess_text(full_text):
    list_text = []
    for text in full_text:
        list_text.append(text)
    return tokenizer(list_text, truncation=True)

tokenized_data = preprocess_text(reddit_text_data)

This is on us. One of us should have remembered to just turn the water on! Thank god he reminded us. Problem solved, good job everyone, i'll write a note so we don't forget again.
It’s that easy, huh?
I have to say--the city, the office of emergency management, the fire department et al handled this absolute natural disaster really amazingly well. The loss of life could have been catastrophic. Other fires--much smaller fires in smaller population centers--had 10 times the deaths. Yes. This was a disaster. It will take time and effort to rebuild and repair the economic damage. But, while you can rebuild a home or replant trees or help businesses flourish again, you can't bring back the dead. L.A. has been here before. The Northridge quake. The riots of 1992. Those didn't stop us and this won't either.

  
I don't care what Khloe Kardashian has to say. About anything. She's not an authority. She lives in a fantasy world of ease and comfort.

  
But if you're dead serious about finding a 