# Generating responses using CommunityLM Models for Factuality Analysis

In [3]:
import pandas as pd
import numpy as np
import csv
from llments.LMF.hugging_face import HuggingFaceLM
from llments.eval.sentiment import HuggingFaceSentimentEvaluator
from examples.community_lm.community_lm_utils import compute_group_stance_factscore

In [None]:
def load_model(party: str, device: str = "cuda"):
    """
    Load the appropriate model based on party affiliation.

    Args:
        party: Political party ('democrat' or 'republican')
        device: The device to run the model on ('cuda' or 'cpu')

    Returns:
        model: The CommunityLM model
    """
    model_name = f'CommunityLM/{party}-twitter-gpt2'
    model = HuggingFaceLM(model_name, device=device)
    return model

In [None]:
def generate_responses(
    model,
    politicians: Dict[str, List[str]],
    prompt_template: str,
    max_length: int = 100,
    temperature: float = 1.0,
    num_sequences: int = 200
) -> Dict[str, Dict[str, List[str]]]:
    """
    Generate responses using the provided model.

    Args:
        model: The loaded language model
        tokenizer: The loaded tokenizer
        prompt: Input prompt template
        max_length: Maximum length of generated text
        temperature: Sampling temperature
        num_sequences: Number of responses to generate

    Returns:
        Nested dictionary of responses by party and politician
    """
    responses = {}

    for party, members in politicians.items():
        politician_responses = {}
        for politician in members:
            formatted_prompt = prompt_template.format(politician=politician)
            responses = model.generate(
                formatted_prompt,
                do_sample=True,
                max_length=max_length,
                temperature=temperature,
                num_return_sequences=num_sequences,
            )
            politician_responses[politician] = responses
        responses[party] = politician_responses
    return responses

In [None]:
def save_responses_to_csv(
    responses: Dict[str, Dict[str, List[str]]],
    output_file: str
) -> None:
    """
    Save generated responses to a CSV file.

    Args:
        responses: Nested dictionary of responses
        output_file: Path to output CSV file
    """
    with open(output_file, 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(['Party', 'Politician', 'Response'])

        for party, politicians in responses.items():
            for politician, responses_list in politicians.items():
                for response in responses_list:
                    writer.writerow([party, politician, response])

In [4]:
device = 'cuda:0'

In [5]:
# prompt for the CommunityLM model

prompt_template = """{politician} is a"""
party = 'democrat'    # or republican

In [8]:
# the politicians for the prompt

politicians = {
    "Democrats": [
        "Joe Biden", "Kamala Harris", "Bernie Sanders", "Elizabeth Warren", "Chuck Schumer",
        "Barack Obama", "Nancy Pelosi", "Alexandria Ocasio-Cortez", "Cory Booker", "Amy Klobuchar",
        "Andrew Yang", "Pete Buttigieg", "Hillary Clinton", "Gavin Newsom", "Stacey Abrams",
        "John Kerry", "Al Gore", "Beto O'Rourke", "Sherrod Brown", "Tammy Duckworth",
        "Katie Porter", "Rashida Tlaib", "Ilhan Omar", "Ayanna Pressley", "Adam Schiff",
        "Gretchen Whitmer", "Ro Khanna", "Joaquin Castro", "Julian Castro", "Deb Haaland",
        "Pramila Jayapal", "Kirsten Gillibrand", "Tina Smith", "Jon Ossoff", "Raphael Warnock",
        "Hakeem Jeffries", "Jamie Raskin", "Martin Heinrich", "Chris Murphy", "Mark Warner",
        "Jeff Merkley", "Sheldon Whitehouse", "Ed Markey", "Richard Blumenthal", "Tammy Baldwin",
        "Chris Coons", "Michael Bennet", "Tom Malinowski", "Patty Murray", "Dianne Feinstein",
        "Maxine Waters", "Barbara Lee", "Earl Blumenauer", "Eric Swalwell", "Ruben Gallego",
        "Jared Polis", "Jay Inslee", "Phil Murphy", "Steve Cohen", "Betty McCollum",
        "Mark Takano", "Ted Lieu", "Judy Chu", "Raul Grijalva", "Debbie Wasserman Schultz",
        "Jerry Nadler", "Jan Schakowsky", "Ritchie Torres", "Mondaire Jones", "Abigail Spanberger",
        "Kweisi Mfume", "Emanuel Cleaver", "Steven Horsford", "Susie Lee", "Jason Crow",
        "Colin Allred", "Veronica Escobar", "Sharice Davids", "Andy Kim", "Lucy McBath",
        "Marie Newman", "Jennifer Wexton", "Angie Craig", "Dean Phillips", "Albio Sires",
        "Jim Clyburn", "Cedric Richmond", "Danny Davis", "Yvette Clarke", "Andre Carson",
        "Dwight Evans", "Sheila Jackson Lee", "Al Green", "Sanford Bishop", "Joe Neguse",
        "Terri Sewell", "Val Demings", "Katherine Clark", "Suzan DelBene", "Jerry Brown"
    ],
    "Republicans": [
        "Donald Trump", "Ron DeSantis", "Mitch McConnell", "Kevin McCarthy", "Ted Cruz",
        "Marco Rubio", "Mike Pence", "Nikki Haley", "Lindsey Graham", "Josh Hawley",
        "Marjorie Taylor Greene", "Lauren Boebert", "Matt Gaetz", "Jim Jordan", "Rand Paul",
        "Tom Cotton", "J.D. Vance", "Tim Scott", "Kristi Noem", "Sarah Huckabee Sanders",
        "Mike Pompeo", "Greg Abbott", "Ron Johnson", "Rick Scott", "Steve Scalise",
        "Liz Cheney", "George W. Bush", "Dick Cheney", "John McCain", "Paul Ryan",
        "Chris Christie", "Ben Carson", "Kellyanne Conway", "Sean Spicer", "Scott Walker",
        "Rick Perry", "Jeff Sessions", "John Thune", "Debbie Lesko", "Elise Stefanik",
        "Dan Crenshaw", "Tommy Tuberville", "Jim Banks", "Ken Paxton", "Brian Kemp",
        "Doug Ducey", "Glenn Youngkin", "Martha McSally", "Steve King", "Pat Toomey",
        "Mike Lee", "Tom Emmer", "Don Bacon", "Jason Smith", "Vicky Hartzler",
        "Virginia Foxx", "Mo Brooks", "Louie Gohmert", "Mike Rogers", "Bill Cassidy",
        "Chuck Grassley", "John Cornyn", "Mitch McConnell", "Thom Tillis", "Marsha Blackburn",
        "Roger Wicker", "Shelley Moore Capito", "John Barrasso", "Cynthia Lummis", "Josh Brecheen",
        "Markwayne Mullin", "John Boozman", "Bill Hagerty", "Tommy Tuberville", "Richard Shelby",
        "Ben Sasse", "Deb Fischer", "Roger Marshall", "Jerry Moran", "Steve Daines",
        "Ron Estes", "Kevin Cramer", "Mike Rounds", "Kristi Noem", "Mike Braun",
        "Todd Young", "Rob Portman", "Pat Toomey", "Lisa Murkowski", "Dan Sullivan",
        "Kelly Loeffler", "David Perdue", "Doug Collins", "Brian Kemp", "Nathan Deal",
        "Kim Reynolds", "Mike Parson", "Jim Inhofe", "Sam Brownback", "Jan Brewer",
        "Larry Hogan", "Charlie Baker", "Susana Martinez", "Terry Branstad", "Bobby Jindal"
    ]
}

In [None]:
model = load_model(party, device)
responses = generate_responses(model, politicians, prompt_template)
save_responses_to_csv(responses, "factscore_data/democratic-final.csv")    # or republican

# Sentiment Analysis

In [9]:
evaluator = HuggingFaceSentimentEvaluator(
    "cardiffnlp/twitter-roberta-base-sentiment-latest",
    device=device
)

model.safetensors:   0%|          | 0.00/510M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/929 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/501M [00:00<?, ?B/s]

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

In [None]:
def analyze_party_sentiments(model: str, evaluator) -> None:
    """
    Analyze and display sentiment results for both parties given a model.

    Args:
        party: Name of the CommunityLM model ('democrat' or 'republican')
        evaluator: Sentiment evaluation model/function
    """
    try:
        csv_file = f"factscore_data/{model}-final.csv"
        sentiments = compute_group_stance_factscore(csv_file, evaluator)

        if sentiments:
            for party, sentiment in sentiments.items():
                if sentiment is not None:
                    print(f"Group Sentiment for {party.capitalize()} Party using the {model.capitalize()} model: {sentiment:.2f}%")

    except FileNotFoundError:
        print(f"Error: Could not find file for {model} party at {csv_file}")
    except Exception as e:
        print(f"Error processing {model} party data: {str(e)}")


In [12]:
analyze_party_sentiments('democrat', evaluator)

Group Sentiment for Democratic Party using the Democratic model: 51.29%
Group Sentiment for Republican Party using the Democratic model: 35.95%


In [13]:
analyze_party_sentiments('republican', evaluator)

Group Sentiment for Democratic Party using the Republican model: 38.22%
Group Sentiment for Republican Party using the Republican model: 42.96%
