In [3]:
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM, pipeline

In [4]:
# Model names from Hugging Face
EXTRACTION_MODEL_NAME = "facebook/bart-large-cnn"       # For summarization and extraction
GENERATION_MODEL_NAME = "gpt2"                          # For generating counterfactuals (causal model)
WIKIPEDIA_MODEL_NAME = "deepset/roberta-base-squad2"    # For fact-checking against Wikipedia data

In [5]:
# Load models and tokenizers
extraction_tokenizer = AutoTokenizer.from_pretrained(EXTRACTION_MODEL_NAME)
extraction_model = AutoModelForSeq2SeqLM.from_pretrained(EXTRACTION_MODEL_NAME)

generation_tokenizer = AutoTokenizer.from_pretrained(GENERATION_MODEL_NAME)
generation_model = AutoModelForCausalLM.from_pretrained(GENERATION_MODEL_NAME)  # Fixed: GPT-2 is a causal LM

wikipedia_tokenizer = AutoTokenizer.from_pretrained(WIKIPEDIA_MODEL_NAME)
question_answering = pipeline("question-answering", model=WIKIPEDIA_MODEL_NAME, tokenizer=wikipedia_tokenizer)



Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


In [6]:
# Extraction pipeline
def extract_statements(article: str, max_length: int = 150):
    """Extract key statements and hypotheses from the article."""
    inputs = extraction_tokenizer.encode(article, return_tensors="pt", truncation=True, max_length=1024)
    summary_ids = extraction_model.generate(
        inputs,
        max_length=max_length,
        min_length=50,
        length_penalty=2.0,
        num_beams=4,
        early_stopping=True
    )
    return extraction_tokenizer.decode(summary_ids[0], skip_special_tokens=True)



In [7]:
# Generation pipeline
def generate_counterfactuals(statement: str, num_return_sequences: int = 3, max_length: int = 100):
    """Generate counterfactuals or alternative hypotheses from a statement."""
    prompt = f"Given the statement: '{statement}', generate alternative hypotheses or counterfactuals.\n"
    inputs = generation_tokenizer.encode(prompt, return_tensors="pt")
    outputs = generation_model.generate(
        inputs,
        max_length=max_length,
        num_return_sequences=num_return_sequences,
        do_sample=True,
        temperature=0.7,
        top_p=0.9,
        pad_token_id=generation_tokenizer.eos_token_id  # Prevents tokenization warnings
    )
    return [generation_tokenizer.decode(output, skip_special_tokens=True) for output in outputs]

In [8]:
# Fact-checking pipeline
def assess_contradiction(counterfactual: str, context: str):
    """Assess if the counterfactual contradicts facts using the Wikipedia-based model."""
    question = f"Is it true that {counterfactual.strip()}?"
    response = question_answering(question=question, context=context)
    return {
        "question": question,
        "answer": response.get('answer', 'N/A'),
        "confidence": response.get('score', 0)
    }

In [9]:
# Example usage
if __name__ == "__main__":
    article_text = """
    Companies that prioritize customer satisfaction tend to outperform competitors. Innovation in product development is essential for long-term growth.
    Investing in employee training leads to higher productivity. Market diversification can mitigate financial risks.
    """

    print("Extracting statements...")
    extracted = extract_statements(article_text)
    print(f"Extracted Summary: {extracted}\n")

    print("Generating counterfactuals and assessing contradictions...")
    for statement in extracted.split('. '):
        statement = statement.strip()
        if statement:
            counterfactuals = generate_counterfactuals(statement)
            print(f"\nStatement: {statement}")
            for cf in counterfactuals:
                assessment = assess_contradiction(cf, article_text)
                print(f"- Counterfactual: {cf}\n  -> Wikipedia Assessment: {assessment['answer']} (Confidence: {assessment['confidence']:.2f})")


Extracting statements...
Extracted Summary: Companies that prioritize customer satisfaction tend to outperform competitors. Innovation in product development is essential for long-term growth. Investing in employee training leads to higher productivity. Market diversification can mitigate financial risks for companies that focus on customer satisfaction.

Generating counterfactuals and assessing contradictions...

Statement: Companies that prioritize customer satisfaction tend to outperform competitors
- Counterfactual: Given the statement: 'Companies that prioritize customer satisfaction tend to outperform competitors', generate alternative hypotheses or counterfactuals.

In conclusion, our findings suggest that the current work may be of little practical use to the business as a whole, and that the business may be unable to effectively manage customer satisfaction in a sustainable way.

There is a substantial need for better management practices in order to better address the growing