In [None]:
! pip install wikipedia-api ollama pqdm psycopg2

# Select our targets

This cell selects `page_id` and `page_title` from the `page` table in the `wikipedia` database, and joins it with the `target_entities` table to filter for pages that are marked as targets. The result is a list of page IDs and titles that are relevant for inference.

In [6]:
%%sql
SELECT p.page_id, p.page_title, wb.page_body
FROM page AS p
INNER JOIN target_entities AS te
    ON te.page_id = p.page_id
INNER JOIN wiki_body AS wb
    ON wb.page_id = p.page_id
WHERE p.page_namespace = 0

Unnamed: 0,page_id,page_title,page_body
0,72822336,Erika_Kirk,Erika Lane Kirk (née Frantzve; born November 2...
1,278119,Charlie_Sheen,"Carlos Irwin Estévez (born September 3, 1965),..."
2,4848272,Donald_Trump,"Donald John Trump (born June 14, 1946) is an A..."
3,62365610,Nick_Fuentes,"Nicholas Joseph Fuentes (born August 18, 1998)..."
4,62202727,Kash_Patel,"Kashyap Pramod Patel (born February 25, 1980) ..."
...,...,...,...
6777,89635,Kerri_Green,Kerri Green (born 1967) is an American actress...
6778,192481,50_Cent,"Curtis James Jackson III (born July 6, 1975), ..."
6779,215566,Pink_(singer),Alecia Beth Moore-Hart (née Moore; born Septem...
6780,27611,Sarah_Michelle_Gellar,Sarah Michelle Prinze (née Gellar GHEL-ər; bo...


# Generic Housekeeping

This sets up ambient config and various data structures for inference and result handling.

In [8]:
import ollama
import pydantic
import math
import dataclasses
import uuid
import random
from tqdm.notebook import tqdm
import pandas as pd
import wikipediaapi
from sqlalchemy.orm import Mapped, mapped_column, declarative_base
from sqlalchemy.dialects.postgresql import insert
from sqlalchemy import String, Integer, Float, Boolean, UUID, create_engine
import time

tqdm.pandas()

DATABASE_URL = 'mysql://root@localhost:3306/wikipedia'
DATABASE_ENGINE = create_engine(DATABASE_URL)

RESULT_URL = 'postgresql://rickmark@localhost:5432/cncld'
RESULT_ENGINE = create_engine(RESULT_URL, isolation_level="AUTOCOMMIT")

In [9]:
@dataclasses.dataclass
class InferenceContext:
    page_id: int
    page_title: str
    page_type: str
    recent_news: list[str]
    wikipedia_body: str

    def __str__(self):
        return f"{self.page_title}\n\n{self.wikipedia_body}\n\n# Recent News:\n{'\n'.join(self.recent_news)}"

class CancelInferenceResult(pydantic.BaseModel):
    is_toxic: bool
    revocable: bool
    rationale: str
    penance: str | None

@dataclasses.dataclass()
class CancelResult:
    context: InferenceContext
    result: CancelInferenceResult
    confidence: float

Base = declarative_base()
@dataclasses.dataclass()
class Result(Base):
    __tablename__ = 'the_list'

    id: Mapped[uuid.UUID] = mapped_column(UUID, primary_key=True, default=uuid.uuid4, index=True)
    title: Mapped[str] = mapped_column(String)
    canceled: Mapped[bool] = mapped_column(Boolean)
    revocable: Mapped[bool] = mapped_column(Boolean)
    confidence: Mapped[float] = mapped_column(Float)
    rationale: Mapped[str] = mapped_column(String)
    penance: Mapped[str | None] = mapped_column(String, nullable=True)
    dimension: Mapped[str] = mapped_column(String)


    def __str__(self):
        return f"{self.title}: {'Canceled' if self.canceled else 'Not Canceled'} (confidence: {self.confidence:.2f})\nRationale: {self.rationale}{"\nPenance: " + self.penance if self.canceled else ""}\n\n"

PROMPTS = {
    "lgbt":
    {
        "system": """
You are a competent, reasonable, investigative journalist.  You are a supporter of LGBT rights and evaluate the subjects of your investigation for their toxicity to the LGBT community.  You do not exclude trans individuals from this definition.  You are fair and understand that people can change over time.  You provide thoughtful rationale to your opinions and also provide an explanation of what if anything the subject of the investigation could do to no longer be considered toxic to the LGBT community.
        """,
        "task": """
# Task

Given the following context, determine whether the subject of the investigation is toxic to the LGBT community.  Explain why you made this decision.  Determine whether the subject of the investigation can have this decision revoked.  Explain what steps would need to be taken to revoke this decision.

# Context

        """
    },
    "pinko": {
        "system": """
You are a competent, reasonable, investigative journalist.  You are a moral and fiscal conservative that supports true family values candidates.  You do not support those individuals which only make these statements in public but who's actions consistently run contrary to those values.  You know that people can change and past behavior is not the only indicator of a persons values.  Issues for political candidates strongly include their voting history as well as various policies championed by them.  Prioritize financial and public policy over social issues, but do not exclude them.  Do not promote or support outright hate speech.
        """,
        "task": """
# Task

Given the following context, determine whether the subject of the investigation is a supporter of overly dangerous liberal ideals (outright communism), or is outright hypocritical in their expressed views.  Explain why you made this decision.  Determine whether the subject of the investigation can have this decision revoked.  Explain what steps would need to be taken to revoke this decision.

# Context

        """
    }
}


# Enrichment

Block enriches each page with its Wikipedia body text. (And in the future news articles, etc.)

In [10]:
# Initialize the Wikipedia API wrapper
# You should set a meaningful User-Agent, as per MediaWiki API guidelines.
wiki_wiki = wikipediaapi.Wikipedia(
    user_agent='CnclD/1.0 (rickmark@outlook.com)',
    language='en'
)

def get_body(body_row):
    try:
        page = wiki_wiki.page(body_row['page_title'])

        if page.exists():
            return page.summary

    except Exception:
        return None

df = target_entities.copy()
df['page_body'] = df.progress_apply(get_body, axis=1)
df = df[df['page_body'].notnull()]

  0%|          | 0/6782 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [8]:
# Checkpoint to the database
df.to_sql('wiki_body', DATABASE_ENGINE, if_exists='replace', index=False, chunksize=5000)

9386

In [14]:
# Map to the input data shape for inference

def map_row(inference_row):
    return InferenceContext(
        page_id=inference_row['page_id'],
        page_title=inference_row['page_title'].replace("_", " "),
        page_type='person',
        recent_news=[],
        wikipedia_body=inference_row['page_body'],
    )

INPUT_CONTEXTS = []
for index, row in tqdm(df.iterrows(), total=len(df)):
    INPUT_CONTEXTS.append(map_row(row))
INPUT_CONTEXTS

  0%|          | 0/6782 [00:00<?, ?it/s]

[InferenceContext(page_id=72822336, page_title='Erika Kirk', page_type='person', recent_news=[], wikipedia_body='Erika Lane Kirk (née Frantzve; born November 20, 1988) is an American businesswoman, nonprofit executive, and podcaster. She is the chairwoman and CEO of the conservative organization Turning Point USA (TPUSA), which her late husband Charlie Kirk co-founded and led until his assassination in September 2025. She was appointed as his successor following his death.\nKirk previously won Miss Arizona USA in 2012 and participated in Miss USA 2012. She is the founder of Everyday Heroes Like You, a nonprofit supporting under-recognized charities, and BIBLEin365, a faith-based initiative that promotes reading the Bible. She also hosts the Midweek Rise Up podcast. She and Charlie began dating in 2019, married in 2021, and had two children together.'),
 InferenceContext(page_id=278119, page_title='Charlie Sheen', page_type='person', recent_news=[], wikipedia_body='Carlos Irwin Estévez 

# Perform Inference

This block processes each target with the prompt and creates resulting judgement

In [12]:
def process_input(single_input: InferenceContext, prompt_class) -> CancelResult:
    client = ollama.Client(timeout=600.0)
    response = client.chat(
        model="llama4",
        messages=[
            {"role": "system", "content": PROMPTS[prompt_class]['system']},
            {"role": "user", "content": PROMPTS[prompt_class]['task'] + str(single_input)}
        ],
        logprobs=True,
        options={
            'num_predict': 2048
        },
        format=CancelInferenceResult.model_json_schema()
    )

    parsed_response = CancelInferenceResult.model_validate_json(response.message.content)

    return CancelResult(
        context=input_context,
        result=parsed_response,
        confidence=math.exp(response.logprobs[6].logprob)
    )

def process_result(result_input_context: InferenceContext, prompt_class) -> Result:
    inference_result_context = process_input(result_input_context, prompt_class)
    return Result(
        title=inference_result_context.context.page_title.replace('_', ' '),
        canceled=inference_result_context.result.is_toxic,
        revocable=inference_result_context.result.revocable,
        rationale=inference_result_context.result.rationale,
        penance=inference_result_context.result.penance if inference_result_context.result.revocable else None,
        confidence=inference_result_context.confidence,
        dimension=prompt_class
    )


In [None]:
RESULTS = []
random.shuffle(INPUT_CONTEXTS)

with RESULT_ENGINE.connect() as conn:
    for input_context in tqdm(INPUT_CONTEXTS):
        if input_context.wikipedia_body is None:
            continue

        try:
            print(f"Processing {input_context.page_title} ({len(input_context.wikipedia_body)} bytes)")
            inference_result = process_result(input_context, 'pinko')
            print(str(inference_result))
            RESULTS.append(inference_result)

            data = {k: v for k, v in inference_result.__dict__.items() if not k.startswith('_')}

            stmt = insert(inference_result.__table__).values(data)
            stmt = stmt.on_conflict_do_update(
                index_elements=[inference_result.__table__.c.title],
                set_= dict(
                    canceled=stmt.excluded.canceled,
                    revocable=stmt.excluded.revocable,
                    rationale=stmt.excluded.rationale,
                    penance=stmt.excluded.penance,
                    confidence=stmt.excluded.confidence,
                    dimension=stmt.excluded.dimension
                )
            )
            conn.execute(stmt)
            time.sleep(0.5)
        except Exception as e:
            print(f"Error processing {input_context.page_title}: {e}")

RESULT_DF = pd.DataFrame(RESULTS)
RESULT_DF

  0%|          | 0/6782 [00:00<?, ?it/s]

Processing Zac Taylor (883 bytes)
Zac Taylor: Not Canceled (confidence: 0.00)
Rationale: The provided context about Zac Taylor does not indicate that he holds liberal or communist views, nor does it suggest he is hypocritical about his expressed views. The information given focuses on his career as a football coach and player, without mentioning political stances or policies that could be considered liberal or communist. Therefore, based on the information available, there is no evidence to classify Zac Taylor as a supporter of overly dangerous liberal ideals or as being outright hypocritical in his expressed views.


Processing Kaya Scodelario (564 bytes)
Kaya Scodelario: Not Canceled (confidence: 0.00)
Rationale: The provided context about Kaya Scodelario does not indicate that she holds liberal or communist views, nor does it suggest she is hypocritical about her expressed views. The information given focuses on her career as an actress and her roles in various films and series. The