In [55]:
import collections
import pandas as pd
import csv
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report
import matplotlib.pyplot as plt
from collections import Counter

In [56]:
def load_labels_cesc(file_path):
    labels = []
    try:
        # Read the file into a DataFrame
        df = pd.read_csv(
            file_path,
            sep='\t',  # Adjust this to ' ' if the file is space-separated
            header=None,  # No header row in the file
            quoting=csv.QUOTE_NONE,  # Handle special characters
            engine='python',  # More forgiving parser
            on_bad_lines='skip'  # Skip lines with unexpected formatting
        )
        # Assign column names
        df.columns = ['claim_labels', 'topic_setence', 'claim_candidate', 'id', 'labels']
        claim_labels = df['claim_labels'].tolist()
        labels = df['labels'].tolist()
        return labels, claim_labels
    except Exception as e:
        print(f"Error reading file: {e}")
        return []

In [57]:
from sklearn.metrics import classification_report
import collections

def most_frequent_class_baseline_cesc(train_file, test_file):
    # Load labels and claim labels
    train_labels, train_claim_labels = load_labels_cesc(train_file)
    test_labels, test_claim_labels = load_labels_cesc(test_file)
    
    # Find the most frequent class in the training labels
    label_counts = collections.Counter(train_labels)
    most_frequent_label = label_counts.most_common(1)[0][0]
    
    # Find the most frequent class in the training claim labels
    claim_label_counts = collections.Counter(train_claim_labels)
    most_frequent_claim_label = claim_label_counts.most_common(1)[0][0]
    
    print(f"Most frequent label in training data: {most_frequent_label}")
    print(f"Most frequent claim label in training data: {most_frequent_claim_label}")
    
    # Predict the most frequent class for all test labels and claim labels
    predictions = [most_frequent_label] * len(test_labels)
    claim_predictions = [most_frequent_claim_label] * len(test_claim_labels)
    
    # Evaluate accuracy for labels
    accuracy = sum(1 for pred, true in zip(predictions, test_labels) if pred == true) / len(test_labels)
    print(f"Accuracy (stance labels): {accuracy:.2f}")
    
    # Evaluate accuracy for claim labels
    claim_accuracy = sum(1 for pred, true in zip(claim_predictions, test_claim_labels) if pred == true) / len(test_claim_labels)
    print(f"Accuracy (claim labels): {claim_accuracy:.2f}")
    
    # Generate and print classification reports
    label_report = classification_report(test_labels, predictions, zero_division=0)
    claim_label_report = classification_report(test_claim_labels, claim_predictions, zero_division=0)
    
    print("\nClassification Report (stance labels):")
    print(label_report)
    
    print("\nClassification Report (claim labels):")
    print(claim_label_report)


In [58]:
import random

# Function to calculate random performance baseline
def random_baseline_cesc(train_file, test_file):
    # Load labels for stance and claims
    train_stance_labels, train_claim_labels = load_labels_cesc(train_file)
    test_stance_labels, test_claim_labels = load_labels_cesc(test_file)
    
    # Get unique stance labels and their frequencies from the training set
    stance_label_counts = collections.Counter(train_stance_labels)
    stance_labels = list(stance_label_counts.keys())
    stance_label_probabilities = [stance_label_counts[label] / sum(stance_label_counts.values()) for label in stance_labels]
    
    print(f"Stance Labels: {stance_labels}")
    print(f"Stance Probabilities: {stance_label_probabilities}")
    
    # Predict random stance labels for the test set
    stance_predictions = random.choices(stance_labels, weights=stance_label_probabilities, k=len(test_stance_labels))
    
    # Evaluate stance accuracy and classification report
    stance_accuracy = sum(1 for pred, true in zip(stance_predictions, test_stance_labels) if pred == true) / len(test_stance_labels)
    print(f"\nStance Accuracy: {stance_accuracy:.2f}")
    print("\nClassification Report (stance labels):")
    print(classification_report(test_stance_labels, stance_predictions, zero_division=0))
    
    # Get unique claim labels and their frequencies from the training set
    claim_label_counts = collections.Counter(train_claim_labels)
    claim_labels = list(claim_label_counts.keys())
    claim_label_probabilities = [claim_label_counts[label] / sum(claim_label_counts.values()) for label in claim_labels]
    
    print(f"\nClaim Labels: {claim_labels}")
    print(f"Claim Probabilities: {claim_label_probabilities}")
    
    # Predict random claim labels for the test set
    claim_predictions = random.choices(claim_labels, weights=claim_label_probabilities, k=len(test_claim_labels))
    
    # Evaluate claim accuracy and classification report
    claim_accuracy = sum(1 for pred, true in zip(claim_predictions, test_claim_labels) if pred == true) / len(test_claim_labels)
    print(f"\nClaim Accuracy: {claim_accuracy:.2f}")
    print("\nClassification Report (claim labels):")
    print(classification_report(test_claim_labels, claim_predictions, zero_division=0))




In [59]:
most_frequent_class_baseline_cesc('/Users/swarnachakraborty/Study_Materials/phd/Phd/Fall24/cs7389J/Project/IAM-main/claims/train.txt', '/Users/swarnachakraborty/Study_Materials/phd/Phd/Fall24/cs7389J/Project/IAM-main/claims/test.txt')

Most frequent label in training data: 0
Most frequent claim label in training data: O
Accuracy (stance labels): 0.93
Accuracy (claim labels): 0.93

Classification Report (stance labels):
              precision    recall  f1-score   support

          -1       0.00      0.00      0.00       271
           0       0.93      1.00      0.96      6538
           1       0.00      0.00      0.00       256

    accuracy                           0.93      7065
   macro avg       0.31      0.33      0.32      7065
weighted avg       0.86      0.93      0.89      7065


Classification Report (claim labels):
              precision    recall  f1-score   support

           C       0.00      0.00      0.00       527
           O       0.93      1.00      0.96      6538

    accuracy                           0.93      7065
   macro avg       0.46      0.50      0.48      7065
weighted avg       0.86      0.93      0.89      7065



In [60]:

random_baseline_cesc('/Users/swarnachakraborty/Study_Materials/phd/Phd/Fall24/cs7389J/Project/IAM-main/claims/train.txt', '/Users/swarnachakraborty/Study_Materials/phd/Phd/Fall24/cs7389J/Project/IAM-main/claims/test.txt')


Stance Labels: [0, -1, 1]
Stance Probabilities: [0.9303075039608238, 0.031920639493014547, 0.0377718565461616]

Stance Accuracy: 0.87

Classification Report (stance labels):
              precision    recall  f1-score   support

          -1       0.04      0.04      0.04       271
           0       0.92      0.93      0.93      6538
           1       0.04      0.04      0.04       256

    accuracy                           0.87      7065
   macro avg       0.34      0.34      0.34      7065
weighted avg       0.86      0.87      0.86      7065


Claim Labels: ['O', 'C']
Claim Probabilities: [0.9303075039608238, 0.06969249603917615]

Claim Accuracy: 0.87

Classification Report (claim labels):
              precision    recall  f1-score   support

           C       0.07      0.06      0.07       527
           O       0.93      0.93      0.93      6538

    accuracy                           0.87      7065
   macro avg       0.50      0.50      0.50      7065
weighted avg       0.86

In [61]:
def load_labels_cepe(file_path):
    claim_labels = []
    evidence_labels = []
    try:
        # Read the file into a DataFrame
        df = pd.read_csv(
            file_path,
            sep='\t',  # Adjust this to ' ' if the file is space-separated
            header=None,  # No header row in the file
            quoting=csv.QUOTE_NONE,  # Handle special characters
            engine='python',  # More forgiving parser
            on_bad_lines='skip'  # Skip lines with unexpected formatting
        )
        # Assign column names
        df.columns = ['claim_labels', 'topic_Sentence', 'evidence_label', 'claim_sentence', 'evidence_candidate_sentence', 'article_id', 'full_label']
        
        # Extract the claim_labels and labels columns
        claim_labels = df['claim_labels'].tolist()
        evidence_labels = df['evidence_label'].tolist()
        print(f"num rows: {len(df)}")
        return claim_labels, evidence_labels
    except Exception as e:
        print(f"Error reading file: {e}")
        return [], []


In [62]:

def most_frequent_class_baseline_cepe(train_file, test_file):
# Load claim and evidence labels
    train_claim_labels, train_evidence_labels = load_labels_cepe(train_file)
    test_claim_labels, test_evidence_labels = load_labels_cepe(test_file)

    # Get the most frequent labels
    most_frequent_claim_label = collections.Counter(train_claim_labels).most_common(1)[0][0]
    most_frequent_evidence_label = collections.Counter(train_evidence_labels).most_common(1)[0][0]

    print(f"Most frequent claim label: {most_frequent_claim_label}")
    print(f"Most frequent evidence label: {most_frequent_evidence_label}")

    # Predict the most frequent class for all test instances
    claim_predictions = [most_frequent_claim_label] * len(test_claim_labels)
    evidence_predictions = [most_frequent_evidence_label] * len(test_evidence_labels)

    # Combine claim and evidence labels into single string labels
    true_labels = [f"{c}_{e}" for c, e in zip(test_claim_labels, test_evidence_labels)]
    predicted_labels = [f"{c}_{e}" for c, e in zip(claim_predictions, evidence_predictions)]

    # Calculate and display the classification report
    print("\nClassification Report:")
    print(classification_report(true_labels, predicted_labels, zero_division=0))

In [63]:
def random_baseline_cepe(train_file, test_file):
    # Load claim and evidence labels
    train_claim_labels, train_evidence_labels = load_labels_cepe(train_file)
    test_claim_labels, test_evidence_labels = load_labels_cepe(test_file)
    
    # Get unique labels and their probabilities from the training set
    claim_label_counts = collections.Counter(train_claim_labels)
    evidence_label_counts = collections.Counter(train_evidence_labels)
    
    claim_labels = list(claim_label_counts.keys())
    claim_probabilities = [claim_label_counts[label] / sum(claim_label_counts.values()) for label in claim_labels]
    
    evidence_labels = list(evidence_label_counts.keys())
    evidence_probabilities = [evidence_label_counts[label] / sum(evidence_label_counts.values()) for label in evidence_labels]
    
    print(f"Claim Labels: {claim_labels}")
    print(f"Claim Probabilities: {claim_probabilities}")
    print(f"Evidence Labels: {evidence_labels}")
    print(f"Evidence Probabilities: {evidence_probabilities}")
    
    # Predict random labels for the test set
    claim_predictions = random.choices(claim_labels, weights=claim_probabilities, k=len(test_claim_labels))
    evidence_predictions = random.choices(evidence_labels, weights=evidence_probabilities, k=len(test_evidence_labels))
    
    # Combine claim and evidence labels into single string labels
    true_labels = [f"{c}_{e}" for c, e in zip(test_claim_labels, test_evidence_labels)]
    true_train_labels = [f"{c}_{e}" for c, e in zip(train_claim_labels, train_evidence_labels)]
    predicted_labels = [f"{c}_{e}" for c, e in zip(claim_predictions, evidence_predictions)]
    
        # Count the occurrences of each class
    class_counts = Counter(true_train_labels)

    # Print the number of instances for each class
    for label, count in class_counts.items():
        print(f"Class: {label}, Instances: {count}")

    # Evaluate using sklearn's classification_report
    print("\nClassification Report:")
    unique_labels = list(set(true_labels))  # Get unique combinations of claim and evidence labels
    print(classification_report(true_labels, predicted_labels, labels=unique_labels, zero_division=0))

In [64]:
most_frequent_class_baseline_cepe('/Users/swarnachakraborty/Study_Materials/phd/Phd/Fall24/cs7389J/Project/IAM-main/CEPE/train.txt', '/Users/swarnachakraborty/Study_Materials/phd/Phd/Fall24/cs7389J/Project/IAM-main/CEPE/test.txt')

num rows: 57398
num rows: 8172
Most frequent claim label: O
Most frequent evidence label: O

Classification Report:
              precision    recall  f1-score   support

         C_E       0.00      0.00      0.00       135
         C_O       0.00      0.00      0.00       240
         O_E       0.00      0.00      0.00       973
         O_O       0.84      1.00      0.91      6824

    accuracy                           0.84      8172
   macro avg       0.21      0.25      0.23      8172
weighted avg       0.70      0.84      0.76      8172



In [65]:
random_baseline_cepe('/Users/swarnachakraborty/Study_Materials/phd/Phd/Fall24/cs7389J/Project/IAM-main/CEPE/train.txt', '/Users/swarnachakraborty/Study_Materials/phd/Phd/Fall24/cs7389J/Project/IAM-main/CEPE/test.txt')

num rows: 57398
num rows: 8172
Claim Labels: ['O', 'C']
Claim Probabilities: [0.9544757657061221, 0.045524234293877835]
Evidence Labels: ['O', 'E']
Evidence Probabilities: [0.8732011568347329, 0.12679884316526707]
Class: O_O, Instances: 48513
Class: C_E, Instances: 1006
Class: C_O, Instances: 1607
Class: O_E, Instances: 6272

Classification Report:
              precision    recall  f1-score   support

         O_O       0.83      0.83      0.83      6824
         C_O       0.03      0.04      0.03       240
         C_E       0.00      0.00      0.00       135
         O_E       0.12      0.12      0.12       973

    accuracy                           0.71      8172
   macro avg       0.24      0.25      0.25      8172
weighted avg       0.71      0.71      0.71      8172

