# Phase 8: Inference & System Pipeline

Complete end-to-end system for character backstory consistency checking.

In [None]:
import pandas as pd
import torch
import json
import re
from pathlib import Path
from transformers import AutoTokenizer, AutoModelForSequenceClassification

PROJECT_ROOT = Path("/root/DataDivas_KDSH_2026")
DATA_DIR = PROJECT_ROOT / "Data"
PHASE5_OUTPUT = PROJECT_ROOT / "phase5_output"
PHASE6_OUTPUT = PROJECT_ROOT / "phase6_output"

# Load model selection (prefer phase5_output)
model_selection_path = PHASE5_OUTPUT / "model_selection.json"
if not model_selection_path.exists():
    model_selection_path = DATA_DIR / "model_selection.json"

with open(model_selection_path) as f:
    model_config = json.load(f)

model_name = model_config['primary_model']['huggingface_name']
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

# Prefer model from phase6, then phase5, then Data
best_model_path = PHASE6_OUTPUT / "best_model.pt"
if not best_model_path.exists():
    best_model_path = PHASE5_OUTPUT / "best_model.pt"
if not best_model_path.exists():
    best_model_path = DATA_DIR / "best_model.pt"

model.load_state_dict(torch.load(best_model_path))
model.eval()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# Prefer training results from phase6 then Data
training_results_path = PHASE6_OUTPUT / "training_results.json"
if not training_results_path.exists():
    training_results_path = DATA_DIR / "training_results.json"
if not training_results_path.exists():
    training_results_path = PHASE5_OUTPUT / "training_results.json"
with open(training_results_path) as f:
    training_results = json.load(f)

print("System loaded. Model:", model_name, "F1:", training_results.get('best_f1'))

In [None]:
class CharacterConsistencyChecker:
    def __init__(self, model, tokenizer, device):
        self.model = model
        self.tokenizer = tokenizer
        self.device = device

    def chunk_text(self, text, chunk_size=384):
        words = text.split()
        chunks = []
        for i in range(0, len(words), chunk_size):
            chunks.append(" ".join(words[i:i + chunk_size]))
        return chunks

    def classify(self, backstory, chunk_text):
        input_text = "[CLS] " + backstory + " [SEP] " + chunk_text + " [SEP]"
        encoding = self.tokenizer(input_text, truncation=True, max_length=512, padding='max_length', return_tensors='pt')
        input_ids = encoding['input_ids'].to(self.device)
        attention_mask = encoding['attention_mask'].to(self.device)
        with torch.no_grad():
            logits = self.model(input_ids=input_ids, attention_mask=attention_mask).logits
            probs = torch.softmax(logits, dim=1)
            confidence = torch.max(probs).item()
            prediction = torch.argmax(probs, dim=1).item()
        return {'prediction': prediction, 'confidence': confidence}

    def check_consistency(self, character_name, backstory, full_text):
        chunks = self.chunk_text(full_text)
        chunk_results = [self.classify(backstory, chunk) for chunk in chunks]
        consistent_count = sum(1 for r in chunk_results if r['prediction'] == 1)
        contradict_count = len(chunk_results) - consistent_count
        avg_confidence = sum(r['confidence'] for r in chunk_results) / len(chunk_results)
        final_prediction = 1 if consistent_count > contradict_count else 0
        label = "CONSISTENT" if final_prediction == 1 else "CONTRADICT"
        return {
            'character_name': character_name,
            'label': final_prediction,
            'label_text': label,
            'confidence': avg_confidence,
            'chunk_summary': {'consistent': consistent_count, 'contradict': contradict_count}
        }

checker = CharacterConsistencyChecker(model, tokenizer, device)
print("Checker ready")

In [None]:
test_df = pd.read_parquet(DATA_DIR / "feature_data.parquet")
print("Running inference on", len(test_df), "samples...")

inference_results = []
for idx, row in test_df.iterrows():
    result = checker.check_consistency(row['character_name'], row['backstory'], row['chunk_text'])
    inference_results.append({
        'entry_id': row['entry_id'],
        'character_name': result['character_name'],
        'label': result['label'],
        'label_text': result['label_text'],
        'confidence': result['confidence'],
        'consistent_chunks': result['chunk_summary']['consistent'],
        'contradict_chunks': result['chunk_summary']['contradict']
    })

print("Inference complete")

In [None]:
results_df = pd.DataFrame(inference_results)

print("=" * 50)
RESULTS
=" * 50)
print("Total:", len(results_df))
print("Label distribution:")
for label, count in results_df['label_text'].value_counts().items():
    print("  ", label, ":", count)
print("Confidence: mean=", round(results_df['confidence'].mean(), 2), "max=", round(results_df['confidence'].max(), 2))

results_df.to_csv(DATA_DIR / "final_predictions.csv", index=False)
results_df[['entry_id', 'label', 'confidence']].to_csv(DATA_DIR / "submission.csv", index=False)

print("\nOutputs saved:")
print("  - final_predictions.csv")
print("  - submission.csv")

## System Complete

All 8 phases implemented:

1. Data Ingestion - Pathway tables
2. Character Extraction - Name matching
3. Chunking Strategy - Sentence-aware
4. Feature Engineering - Semantic signals
5. Model Selection - DeBERTa-v3-NLI
6. Training - Binary classifier
7. Evidence Generation - Pattern-based
8. Inference Pipeline - End-to-end

Output: final_predictions.csv, submission.csv, best_model.pt