In [6]:
import json
import os
import re
import time
from datetime import datetime

import mwclient

# --- CONFIGURATION ---
SITE = mwclient.Site('www.wikidata.org')
# Example: P569 (Date of Birth) is a classic source of "Logical" (Type A) constraints
# You can add more properties here: P570 (Date of Death), P21 (Sex or Gender)
TARGET_PROPERTIES = ['P569'] 



## Index candidates

Wikidata generates static pages listing current violations (e.g., Wikidata:Database reports/Constraint violations/Summary).

Strategy: Parse the history of these report pages. If an Entity $X$ appears in the "Violation Report" on Date $T_1$ and disappears on Date $T_2$, a repair event occurred between $T_1$ and $T_2$.


In [7]:
def get_report_page_title(property_id):
    """
    Returns the standard location where Wikidata bots publish violation lists.
    """
    return f"Wikidata:Database reports/Constraint violations/{property_id}"


def extract_qids(text):
    """
    Extracts all Q-IDs (e.g., Q12345) from the report page text.
    """
    if not text:
        return set()
    # Regex to find [[Q123]] patterns which is standard in report pages
    return set(re.findall(r"\[\[(Q\d+)\]\]", text))


def mine_repairs(property_id, max_items=50):
    print(f"[*] Mining history for {property_id}...")
    page = SITE.pages[get_report_page_title(property_id)]

    if not page.exists:
        print(f"[!] Report page for {property_id} not found.")
        return []

    candidates = []

    # We iterate backwards through history
    # We need to compare Rev_N (Older) vs Rev_N+1 (Newer)
    # If Q-ID is in Old but NOT in New -> It was fixed (or deleted)

    revisions = list(page.revisions(max_items=max_items, prop="content|timestamp|ids"))
    print(f"    Found {len(revisions)} revisions to analyze.")

    # Iterate pairwise
    for i in range(len(revisions) - 1):
        newer_rev = revisions[i]
        older_rev = revisions[i + 1]

        # Extract QIDs
        qids_old = extract_qids(older_rev.get("*", ""))
        qids_new = extract_qids(newer_rev.get("*", ""))

        fixed_qids = qids_old - qids_new

        if fixed_qids:
            timestamp = datetime(*newer_rev["timestamp"][:6]).isoformat()
            print(
                f"    [{timestamp}] Found {len(fixed_qids)} fixes (Rev {older_rev['revid']} -> {newer_rev['revid']})"
            )

            for qid in fixed_qids:
                candidates.append(
                    {
                        "qid": qid,
                        "property_id": property_id,
                        "fix_date": timestamp,
                        "report_revision_old": older_rev["revid"],
                        "report_revision_new": newer_rev["revid"],
                    }
                )

    return candidates


Store in a json file with schema

```
[
  {
    "qid": "Q137258824",
    "property_id": "P569",
    "fix_date": "2025-12-10T09:36:47",
    "report_revision_old": 2439960515,
    "report_revision_new": 2440403690
  },
  ...
]
```

In [13]:
filename = "repair_candidates.json"
all_candidates = []

if os.path.exists(filename):
    with open(filename) as f:
        all_candidates = json.load(f)
    if isinstance(all_candidates, list) and all_candidates:
        print(f"\n[+] {filename} Exists. Loaded from disk. Found {len(all_candidates)} candidates.")
    else:
        all_candidates = []

if not all_candidates:
    print(f"\n[!] {filename} does not exist. Starting fresh...")
    for prop in TARGET_PROPERTIES:
        all_candidates.extend(mine_repairs(prop, max_items=20))  # Start small for testing

    with open(filename, "w") as f:
        json.dump(all_candidates, f, indent=2)

    print(f"\n[+] Done. Found {len(all_candidates)} candidates. Saved to {filename}")


[+] repair_candidates.json Exists. Loaded from disk. Found 304 candidates.


## Fetching the Repair

Once we have the Candidate Entity ID and the Time Window ($T_1, T_2$), we use the Wikibase REST API.

Endpoint: GET /w/rest.php/v1/page/{page_title}/history

We are looking for the specific revision $R$ where the constraint status changed.

In [14]:
import gzip
import json
import os
import time
from datetime import datetime, timedelta, timezone
from pathlib import Path

import ijson
import requests

HEADERS = {"User-Agent": "WikidataRepairEval/1.0 (PhD Research; mailto:miguel.vazquez@wu.ac.at)"}
API_ENDPOINT = "https://www.wikidata.org/w/api.php"
REST_HISTORY_URL = "https://www.wikidata.org/w/rest.php/v1/page/{qid}/history"
ENTITY_DATA_URL = "https://www.wikidata.org/wiki/Special:EntityData/{qid}.json"

STRICT_PERSISTENCE = True
API_TIMEOUT = 30
REVISION_LOOKBACK_DAYS = 30
MAX_HISTORY_PAGES = 8
MAX_PROPERTY_VALUES = 12
MAX_NEIGHBOR_EDGES = 50
LATEST_DUMP_PATH = Path("latest-all.json.gz")
WORLD_STATE_FILE = Path("world_state.json")

RUN_ID = datetime.utcnow().strftime("%Y%m%dT%H%M%S")
LOG_DIR = Path("logs")
LOG_DIR.mkdir(exist_ok=True)
STATS_FILE = LOG_DIR / f"fetcher_stats_{RUN_ID}.jsonl"
SUMMARY_FILE = LOG_DIR / f"run_summary_{RUN_ID}.json"


class StatsLogger:
    def __init__(self, stats_path):
        self.stats_path = stats_path
        self.run_id = RUN_ID

    def log(self, record):
        enriched = {"run_id": self.run_id}
        enriched.update(record)
        with open(self.stats_path, "a", encoding="utf-8") as fh:
            fh.write(json.dumps(enriched, ensure_ascii=True))
            fh.write("\n")


def pick_label(entity, lang="en"):
    if not entity:
        return None
    labels = entity.get("labels", {})
    if lang in labels:
        return labels[lang].get("value")
    if labels:
        first = next(iter(labels.values()))
        return first.get("value")
    return None


def pick_description(entity, lang="en"):
    if not entity:
        return None
    descriptions = entity.get("descriptions", {})
    if lang in descriptions:
        return descriptions[lang].get("value")
    if descriptions:
        first = next(iter(descriptions.values()))
        return first.get("value")
    return None


def chunked(iterable, size):
    batch = []
    for item in iterable:
        batch.append(item)
        if len(batch) == size:
            yield batch
            batch = []
    if batch:
        yield batch


class WorldStateBuilder:
    def __init__(self, dump_path):
        self.dump_path = Path(dump_path)
        self.has_dump = self.dump_path.exists()

    def build(self, entries):
        if not entries:
            return {}
        if not self.has_dump:
            print(f"[!] Context Builder skipped: dump not found at {self.dump_path}")
            return {}

        focus_ids = {entry["qid"] for entry in entries}
        property_ids = {entry["property"] for entry in entries}
        combined_targets = focus_ids | property_ids

        print(f"[*] Context Builder: streaming dump for {len(combined_targets)} focus/constraint entities...")
        combined_entities = self._load_entities_from_dump(combined_targets)
        focus_entities = {eid: combined_entities[eid] for eid in focus_ids if eid in combined_entities}
        property_entities = {pid: combined_entities[pid] for pid in property_ids if pid in combined_entities}

        missing_focus = focus_ids - set(focus_entities)
        if missing_focus:
            print(f"    [!] Missing {len(missing_focus)} focus entities in dump.")

        neighbor_ids = self._collect_neighbor_targets(focus_entities.values())
        print(f"[*] Context Builder: streaming dump for {len(neighbor_ids)} neighbor entities...")
        neighbor_entities = self._load_entities_from_dump(neighbor_ids)
        missing_neighbors = neighbor_ids - set(neighbor_entities)
        if missing_neighbors:
            print(f"    [!] {len(missing_neighbors)} neighbors not found in dump. Falling back to Action API for labels.")
            neighbor_entities.update(self._fetch_labels_via_api(missing_neighbors))

        world_states = {}
        for entry in entries:
            focus_entity = focus_entities.get(entry["qid"])
            if not focus_entity:
                continue
            property_entity = property_entities.get(entry["property"])
            context = self._assemble_world_state(
                focus_entity,
                neighbor_entities,
                entry["property"],
                property_entity,
            )
            world_states[entry["id"]] = context
        return world_states

    def _load_entities_from_dump(self, target_ids):
        found = {}
        if not target_ids or not self.has_dump:
            return found
        target_ids = set(target_ids)
        try:
            with gzip.open(self.dump_path, "rb") as fh:
                for entity_id, entity in ijson.kvitems(fh, "entities"):
                    if entity_id in target_ids:
                        entity["id"] = entity_id
                        found[entity_id] = entity
                        if len(found) == len(target_ids):
                            break
        except Exception as exc:
            print(f"    [!] Context Builder stream error: {exc}")
        return found

    def _collect_neighbor_targets(self, entities):
        neighbors = set()
        for entity in entities:
            edges = 0
            claims = entity.get("claims", {})
            for pid, statements in claims.items():
                for claim in statements:
                    if edges >= MAX_NEIGHBOR_EDGES:
                        break
                    datavalue = claim.get("mainsnak", {}).get("datavalue")
                    if not datavalue:
                        continue
                    value = datavalue.get("value")
                    if isinstance(value, dict) and value.get("entity-type") in {"item", "property"}:
                        target_id = value.get("id")
                        if target_id:
                            neighbors.add(target_id)
                            edges += 1
                if edges >= MAX_NEIGHBOR_EDGES:
                    break
        return neighbors

    def _fetch_labels_via_api(self, ids):
        resolved = {}
        for batch in chunked(list(ids), 50):
            params = {
                "action": "wbgetentities",
                "ids": "|".join(batch),
                "props": "labels|descriptions",
            }
            data = get_json(params)
            if not data or "entities" not in data:
                continue
            for entity_id, entity in data["entities"].items():
                if not entity or "missing" in entity:
                    continue
                resolved[entity_id] = {
                    "id": entity_id,
                    "labels": entity.get("labels", {}),
                    "descriptions": entity.get("descriptions", {}),
                }
        return resolved

    def _assemble_world_state(self, focus_entity, neighbor_entities, property_id, property_entity):
        focus_node = {
            "qid": focus_entity.get("id"),
            "label": pick_label(focus_entity),
            "description": pick_description(focus_entity),
            "properties": self._extract_properties(focus_entity),
        }
        neighborhood_snapshot = self._build_neighborhood_snapshot(focus_entity, neighbor_entities)
        constraint_metadata = self._extract_constraints(property_id, property_entity)
        return {
            "focus_node": focus_node,
            "neighborhood_snapshot": neighborhood_snapshot,
            "constraint_metadata": constraint_metadata,
        }

    def _extract_properties(self, entity):
        properties = {}
        claims = entity.get("claims", {})
        for pid, statements in claims.items():
            values = []
            for claim in statements:
                snak = claim.get("mainsnak", {})
                if snak.get("snaktype") != "value":
                    continue
                datavalue = snak.get("datavalue")
                if not datavalue:
                    continue
                values.append(format_datavalue(datavalue.get("value")))
                if len(values) >= MAX_PROPERTY_VALUES:
                    break
            if values:
                properties[pid] = values
        return properties

    def _build_neighborhood_snapshot(self, entity, neighbor_entities):
        edges = []
        edge_count = 0
        claims = entity.get("claims", {})
        for pid, statements in claims.items():
            for claim in statements:
                if edge_count >= MAX_NEIGHBOR_EDGES:
                    break
                snak = claim.get("mainsnak", {})
                datavalue = snak.get("datavalue")
                if not datavalue:
                    continue
                value = datavalue.get("value")
                if isinstance(value, dict) and value.get("entity-type") in {"item", "property"}:
                    target_id = value.get("id")
                    neighbor = neighbor_entities.get(target_id)
                    edges.append(
                        {
                            "property_id": pid,
                            "target_qid": target_id,
                            "target_label": pick_label(neighbor),
                            "target_description": pick_description(neighbor),
                        }
                    )
                    edge_count += 1
            if edge_count >= MAX_NEIGHBOR_EDGES:
                break
        return {"outgoing_edges": edges}

    def _extract_constraints(self, property_id, property_entity):
        if not property_entity:
            return {"property_id": property_id, "constraints": []}
        constraints = []
        constraint_claims = property_entity.get("claims", {}).get("P2302", [])
        for claim in constraint_claims:
            snak = claim.get("mainsnak", {})
            datavalue = snak.get("datavalue")
            constraint_qid = None
            if datavalue:
                value = datavalue.get("value")
                if isinstance(value, dict):
                    constraint_qid = value.get("id")
            qualifiers = claim.get("qualifiers", {})
            qualifier_parts = []
            for qualifier_pid, qualifier_values in qualifiers.items():
                rendered = []
                for qualifier in qualifier_values:
                    dv = qualifier.get("datavalue")
                    if dv:
                        rendered.append(format_datavalue(dv.get("value")))
                if rendered:
                    qualifier_parts.append(f"{qualifier_pid}: {', '.join(rendered)}")
            summary = "; ".join(qualifier_parts) if qualifier_parts else "No qualifiers recorded."
            constraints.append(
                {
                    "constraint_type": constraint_qid,
                    "rule_summary": summary,
                }
            )
        return {
            "property_id": property_id,
            "constraints": constraints,
        }

def parse_iso8601(raw_ts):
    if not raw_ts:
        return None
    normalized = raw_ts[:-1] + "+00:00" if raw_ts.endswith("Z") else raw_ts
    try:
        dt = datetime.fromisoformat(normalized)
    except ValueError:
        return None
    if dt.tzinfo is None:
        dt = dt.replace(tzinfo=timezone.utc)
    return dt


def format_timestamp(dt):
    return dt.astimezone(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")


def compute_revision_window(report_date):
    end_dt = parse_iso8601(report_date)
    if not end_dt:
        return None, None
    start_dt = end_dt - timedelta(days=REVISION_LOOKBACK_DAYS)
    return format_timestamp(start_dt), format_timestamp(end_dt)


def format_datavalue(value):
    if isinstance(value, dict):
        if "time" in value:
            return value["time"]
        if "id" in value:
            return value["id"]
        if "text" in value:
            lang = value.get("language")
            return f"{value['text']}@{lang}" if lang else value["text"]
        if "amount" in value:
            unit = value.get("unit", "")
            return f"{value['amount']} {unit}".strip()
        if "latitude" in value and "longitude" in value:
            return f"{value['latitude']},{value['longitude']}"
    return str(value)


def summarize_claims(claims):
    if not claims:
        return ("MISSING",), ["MISSING"]
    signature_parts = []
    display_values = []
    for claim in claims:
        snak = claim.get("mainsnak", {})
        snak_type = snak.get("snaktype", "").upper() or "UNKNOWN"
        if snak_type == "VALUE":
            value_str = format_datavalue(snak.get("datavalue", {}).get("value"))
        else:
            value_str = snak_type
        signature_parts.append(f"{snak_type}:{value_str}")
        display_values.append(value_str)
    if not signature_parts:
        return ("MISSING",), ["MISSING"]
    return tuple(sorted(signature_parts)), display_values


def classify_action(previous_signature, current_signature):
    if current_signature == ("MISSING",):
        return "DELETE"
    if previous_signature == ("MISSING",):
        return "CREATE"
    return "UPDATE"


def get_json(params=None, *, endpoint=API_ENDPOINT, with_format=True):
    query = dict(params or {})
    if with_format:
        query.setdefault("format", "json")
        query.setdefault("formatversion", 2)
    for attempt in range(4):
        try:
            response = requests.get(
                endpoint,
                headers=HEADERS,
                params=query if query else None,
                timeout=API_TIMEOUT,
            )
            if response.status_code == 200:
                return response.json()
            if response.status_code == 429:
                sleep_for = 2**attempt
                print(f"    [!] Rate limited. Sleeping {sleep_for}s...")
                time.sleep(sleep_for)
            else:
                print(f"    [!] HTTP {response.status_code} for {endpoint}")
        except Exception as exc:
            print(f"    [!] Exception: {exc}")
        time.sleep(1)
    return None


def get_current_state(qid, property_id):
    params = {
        "action": "wbgetentities",
        "ids": qid,
        "props": "claims",
    }
    data = get_json(params)
    if not data or "entities" not in data:
        return None
    entity = data["entities"].get(qid)
    if not entity or "missing" in entity:
        return None
    claims = entity.get("claims", {}).get(property_id, [])
    signature, values = summarize_claims(claims)
    if signature == ("MISSING",):
        return None
    return values


def fetch_revision_history(qid, start_time, end_time):
    start_dt = parse_iso8601(start_time) if start_time else None
    end_dt = parse_iso8601(end_time) if end_time else None
    revisions = []
    carry_revision = None
    endpoint = REST_HISTORY_URL.format(qid=qid)
    next_endpoint = endpoint
    params = {"limit": 200}
    batches = 0
    truncated_by_window = False
    reached_page_limit = False
    api_calls = 0

    while next_endpoint and batches < MAX_HISTORY_PAGES:
        data = get_json(
            params=params if next_endpoint == endpoint else None,
            endpoint=next_endpoint,
            with_format=False,
        )
        if not data or "revisions" not in data:
            break
        api_calls += 1
        for rev in data.get("revisions", []):
            rev_ts = rev.get("timestamp")
            rev_dt = parse_iso8601(rev_ts)
            if not rev_dt:
                continue
            if end_dt and rev_dt > end_dt:
                continue
            if start_dt and rev_dt < start_dt:
                truncated_by_window = True
                if not carry_revision:
                    carry_revision = rev
                next_endpoint = None
                break
            revisions.append(rev)
        if next_endpoint is None:
            break
        older_url = data.get("older")
        if not older_url:
            break
        next_endpoint = older_url
        batches += 1
        params = None
        if start_dt and revisions:
            oldest_dt = parse_iso8601(revisions[-1]["timestamp"])
            if oldest_dt and oldest_dt < start_dt:
                truncated_by_window = True
                break
    if carry_revision:
        revisions.append(carry_revision)
    revisions.sort(key=lambda rev: rev["timestamp"])
    if next_endpoint and batches >= MAX_HISTORY_PAGES:
        reached_page_limit = True

    history_meta = {
        "qid": qid,
        "start_time": start_time,
        "end_time": end_time,
        "lookback_days": REVISION_LOOKBACK_DAYS,
        "max_history_pages": MAX_HISTORY_PAGES,
        "api_calls": api_calls,
        "batches_used": batches,
        "revisions_scanned": len(revisions),
        "earliest_revision": revisions[0]["timestamp"] if revisions else None,
        "latest_revision": revisions[-1]["timestamp"] if revisions else None,
        "truncated_by_window": truncated_by_window,
        "reached_page_limit": reached_page_limit,
        "carry_revision_used": carry_revision is not None,
    }

    return revisions, history_meta


def get_claims_for_revision(qid, property_id, revision_id):
    endpoint = ENTITY_DATA_URL.format(qid=qid)
    data = get_json(
        params={"revision": revision_id},
        endpoint=endpoint,
        with_format=False,
    )
    if not data or "entities" not in data:
        return []
    entity = data["entities"].get(qid)
    if not entity or "missing" in entity:
        return []
    return entity.get("claims", {}).get(property_id, [])


def extract_user(revision):
    user = revision.get("user")
    if isinstance(user, dict):
        return user.get("name", "unknown")
    return user or "unknown"


def find_repair_revision(qid, property_id, start_time, end_time):
    revisions, history_meta = fetch_revision_history(qid, start_time, end_time)
    if not revisions:
        return None, history_meta

    previous_signature = None
    previous_snapshot = None

    for rev in revisions:
        revision_id = rev.get("id") or rev.get("revid")
        if not revision_id:
            continue
        current_claims = get_claims_for_revision(qid, property_id, revision_id)
        current_signature, current_snapshot = summarize_claims(current_claims)
        if previous_signature is not None and current_signature != previous_signature:
            return {
                "repair_revision_id": revision_id,
                "timestamp": rev.get("timestamp"),
                "action": classify_action(previous_signature, current_signature),
                "old_value": previous_snapshot,
                "new_value": current_snapshot,
                "author": extract_user(rev),
            }, history_meta
        previous_signature = current_signature
        previous_snapshot = current_snapshot

    return None, history_meta


def process_pipeline(max_candidates=None):
    input_file = "repair_candidates.json"
    if not os.path.exists(input_file):
        print(f"[!] {input_file} not found.")
        return

    with open(input_file, "r") as f:
        candidates = json.load(f)

    stats_logger = StatsLogger(STATS_FILE)
    summary = {
        "run_id": stats_logger.run_id,
        "lookback_days": REVISION_LOOKBACK_DAYS,
        "max_history_pages": MAX_HISTORY_PAGES,
        "total_candidates": len(candidates),
        "processed": 0,
        "persistence_failed": 0,
        "bad_fix_date": 0,
        "repairs_found": 0,
        "no_diff": 0,
        "no_history": 0,
    }

    print(f"[*] Loaded {len(candidates)} candidates. Using REST history.")

    dataset = []
    for i, item in enumerate(candidates):
        if max_candidates is not None and i >= max_candidates:
            break
        qid = item["qid"]
        pid = item["property_id"]

        if not qid.startswith("Q"):
            continue

        print(f"\n[{i + 1}/{len(candidates)}] Analyzing {qid} ({pid})...")
        summary["processed"] += 1

        record_base = {
            "qid": qid,
            "property": pid,
        }
        curr_val = None
        if STRICT_PERSISTENCE:
            curr_val = get_current_state(qid, pid)
            if not curr_val:
                print("    [x] Dropped: Persistence check failed (Entity/Prop missing).")
                summary["persistence_failed"] += 1
                stats_logger.log(
                    {
                        **record_base,
                        "result": "persistence_failed",
                        "reason": "missing_current_value",
                    }
                )
                continue

        report_date = item["fix_date"]
        start_time, end_time = compute_revision_window(report_date)
        if not end_time:
            print("    [x] Dropped: Could not parse fix_date.")
            summary["bad_fix_date"] += 1
            stats_logger.log(
                {
                    **record_base,
                    "result": "bad_fix_date",
                    "report_date": report_date,
                }
            )
            continue

        fix_event, history_meta = find_repair_revision(
            qid,
            pid,
            start_time=start_time,
            end_time=end_time,
        )

        if fix_event:
            print(f"    [+] FOUND REPAIR! {fix_event['old_value']} -> {fix_event['new_value']}")
            summary["repairs_found"] += 1
            entry = {
                "id": f"repair_{qid}_{fix_event['repair_revision_id']}",
                "qid": qid,
                "property": pid,
                "type": "TBD",
                "violation_context": {
                    "value": fix_event["old_value"],
                },
                "repair_target": {
                    "action": fix_event["action"],
                    "value": fix_event["new_value"],
                    "revision_id": fix_event["repair_revision_id"],
                },
                "persistence_check": {
                    "status": "passed",
                    "current_value_2025": curr_val,
                },
            }
            dataset.append(entry)
            stats_logger.log(
                {
                    **record_base,
                    "result": "repair_found",
                    "history": history_meta,
                    "repair_revision_id": fix_event["repair_revision_id"],
                    "action": fix_event["action"],
                }
            )
        else:
            print("    [-] No clean diff found.")
            if history_meta:
                summary["no_diff"] += 1
            else:
                summary["no_history"] += 1
            stats_logger.log(
                {
                    **record_base,
                    "result": "no_diff" if history_meta else "no_history",
                    "history": history_meta,
                }
            )

        if i % 10 == 0:
            with open("wikidata_repair_eval_raw.json", "w") as out:
                json.dump(dataset, out, indent=2)

    world_states = {}
    if dataset:
        builder = WorldStateBuilder(LATEST_DUMP_PATH)
        world_states = builder.build(dataset)
        if world_states:
            for entry in dataset:
                context = world_states.get(entry["id"])
                if context:
                    entry["world_state"] = context
            with open(WORLD_STATE_FILE, "w", encoding="utf-8") as world_file:
                json.dump(world_states, world_file, indent=2)

    with open("wikidata_repair_eval_raw.json", "w") as out:
        json.dump(dataset, out, indent=2)

    with open(SUMMARY_FILE, "w", encoding="utf-8") as summary_file:
        json.dump(summary, summary_file, indent=2)

    print(f"\n[+] Extraction Complete. Saved {len(dataset)} verified repairs.")


In [15]:
process_pipeline()

[*] Loaded 304 candidates. Using Action API.

[1/304] Analyzing Q223311 (P569)...
    [-] No clean diff found.

[2/304] Analyzing Q134708713 (P569)...
    [x] Dropped: Persistence check failed (Entity/Prop missing).

[3/304] Analyzing Q1930855 (P569)...
    [x] Dropped: Persistence check failed (Entity/Prop missing).

[4/304] Analyzing Q8975832 (P569)...
    [-] No clean diff found.

[5/304] Analyzing Q18356450 (P569)...
    [x] Dropped: Persistence check failed (Entity/Prop missing).

[6/304] Analyzing Q137286504 (P569)...
    [-] No clean diff found.

[7/304] Analyzing Q57584664 (P569)...
    [-] No clean diff found.

[8/304] Analyzing Q4110950 (P569)...
    [-] No clean diff found.

[9/304] Analyzing Q9531806 (P569)...
    [x] Dropped: Persistence check failed (Entity/Prop missing).

[10/304] Analyzing Q137264893 (P569)...
    [-] No clean diff found.

[11/304] Analyzing Q744961 (P569)...
    [x] Dropped: Persistence check failed (Entity/Prop missing).

[12/304] Analyzing Q23894233 

KeyboardInterrupt: 