* [#21](https://github.com/salgo60/ifkdb/issues/21)
* denna Notebook [21_check_P11905.ipynb](https://github.com/salgo60/ifkdb/blob/main/Notebook/21_check_P11905.ipynb)

Kollar kategorier och om P11905 finns i Wikidata

In [1]:
from datetime import datetime
start_time  = datetime.now()
print("Last run: ", start_time)

Last run:  2026-02-27 05:38:42.634937


In [24]:
import requests
import time

CATEGORY = "Kategori:Fotbollsspelare_i_IFK_Göteborg"
WIKI_API = "https://sv.wikipedia.org/w/api.php"
WD_API = "https://www.wikidata.org/w/api.php"

HEADERS = {
    "User-Agent": "IFKGBG-Category-Script/1.0 (https://github.com/salgo60/ifkdb/issues/21; salgo60@msn.com)"
}

session = requests.Session()
session.headers.update(HEADERS)


def get_category_members(category):
    """Hämta alla artiklar i kategorin via MediaWiki API."""
    members = []
    cmcontinue = None

    while True:
        params = {
            "action": "query",
            "list": "categorymembers",
            "cmtitle": category,
            "cmlimit": "500",
            "format": "json"
        }

        if cmcontinue:
            params["cmcontinue"] = cmcontinue

        r = session.get(WIKI_API, params=params)
        r.raise_for_status()
        data = r.json()

        members.extend(data["query"]["categorymembers"])

        if "continue" in data:
            cmcontinue = data["continue"]["cmcontinue"]
            time.sleep(0.1)
        else:
            break

    return members


def get_wikidata_ids(titles):
    """Hämta Wikidata Q-nummer via pageprops."""
    wd_map = {}
    for i in range(0, len(titles), 50):
        batch = titles[i:i+50]
        params = {
            "action": "query",
            "prop": "pageprops",
            "ppprop": "wikibase_item",
            "titles": "|".join(batch),
            "format": "json"
        }

        r = session.get(WIKI_API, params=params)
        r.raise_for_status()
        pages = r.json()["query"]["pages"]

        for page in pages.values():
            title = page.get("title")
            qid = page.get("pageprops", {}).get("wikibase_item")
            if title and qid:
                wd_map[title] = qid

        time.sleep(0.1)

    return wd_map


def get_p11905(qids):
    """Hämta P11905 från Wikidata."""
    p11905_map = {}

    for i in range(0, len(qids), 50):
        batch = qids[i:i+50]
        params = {
            "action": "wbgetentities",
            "ids": "|".join(batch),
            "props": "claims",
            "format": "json"
        }

        r = session.get(WD_API, params=params)
        r.raise_for_status()
        entities = r.json()["entities"]

        for qid, entity in entities.items():
            claims = entity.get("claims", {})
            if "P11905" in claims:
                value = claims["P11905"][0]["mainsnak"]["datavalue"]["value"]
                p11905_map[qid] = value

        time.sleep(0.1)

    return p11905_map


# ---- Körning ----

print("Hämtar kategori...")
members = get_category_members(CATEGORY)
titles = [m["title"] for m in members if m["ns"] == 0]

print(f"{len(titles)} artiklar hittades")

print("Hämtar Wikidata Q-nummer...")
wd_map = get_wikidata_ids(titles)

print("Hämtar P11905...")
p11905_map = get_p11905(list(wd_map.values()))

with_p = []
without_p = []

for title, qid in wd_map.items():
    if qid in p11905_map:
        pid = p11905_map[qid]
        link = f"https://ifkdb.se/spelare/{pid}"
        with_p.append((title, qid, pid, link))
    else:
        without_p.append((title, qid))

# ---- Resultat ----

print("\n=== HAR P11905 ===")
for title, qid, pid, link in sorted(with_p):
    print(f"{title} | {qid} | {pid} | {link}")

print("\n=== SAKNAR P11905 ===")
for title, qid in sorted(without_p):
    print(f"{title} | {qid}")

Hämtar kategori...
431 artiklar hittades
Hämtar Wikidata Q-nummer...
Hämtar P11905...

=== HAR P11905 ===
Abdul Razak | Q247730 | AbdulRazak_868 | https://ifkdb.se/spelare/AbdulRazak_868
Abundance Salaou | Q113660322 | Abundance-Salaou_949 | https://ifkdb.se/spelare/Abundance-Salaou_949
Adam Bergmark Wiberg | Q37012842 | Adam-Bergmark+Wiberg_1013 | https://ifkdb.se/spelare/Adam-Bergmark+Wiberg_1013
Adam Carlén | Q69043930 | Adam-Carlén_958 | https://ifkdb.se/spelare/Adam-Carlén_958
Adam Ingi Benediktsson | Q109819079 | Adam+Ingi-Benediktsson_937 | https://ifkdb.se/spelare/Adam+Ingi-Benediktsson_937
Adam Johansson | Q349871 | AdamJohansson_790 | https://ifkdb.se/spelare/AdamJohansson_790
Adil Titi | Q63801194 | AdilTiti_891 | https://ifkdb.se/spelare/AdilTiti_891
Alai Ghasem | Q113710061 | Alai-Ghasem_17 | https://ifkdb.se/spelare/Alai-Ghasem_17
Alejandro Lago | Q786134 | AlejandroLago_793 | https://ifkdb.se/spelare/AlejandroLago_793
Alexander Faltsetas | Q4718827 | AlexanderFaltsetas_7

In [4]:
import pandas as pd

# ---- DataFrame: Har P11905 ----
df_with = pd.DataFrame(with_p, columns=[
    "Namn",
    "Wikidata_QID",
    "IFKDB_ID",
    "IFKDB_länk"
]).sort_values("Namn")

# ---- DataFrame: Saknar P11905 ----
df_without = pd.DataFrame(without_p, columns=[
    "Namn",
    "Wikidata_QID"
]).sort_values("Namn")

# ---- Sammanfattning ----
total = len(wd_map)
count_with = len(df_with)
count_without = len(df_without)

print("=" * 60)
print("IFK Göteborg – Spelare i Wikipedia-kategori")
print("=" * 60)
print(f"Totalt antal spelare:      {total}")
print(f"Har P11905 (IFKDB-ID):     {count_with}")
print(f"Saknar P11905:             {count_without}")
print(f"Täckningsgrad:             {round((count_with/total)*100, 1)} %")
print("=" * 60)

print("\n=== HAR P11905 ===")
display(df_with)

print("\n=== SAKNAR P11905 ===")
display(df_without)

IFK Göteborg – Spelare i Wikipedia-kategori
Totalt antal spelare:      431
Har P11905 (IFKDB-ID):     373
Saknar P11905:             58
Täckningsgrad:             86.5 %

=== HAR P11905 ===


Unnamed: 0,Namn,Wikidata_QID,IFKDB_ID,IFKDB_länk
261,Abdul Razak,Q247730,AbdulRazak_868,https://ifkdb.se/spelare/AbdulRazak_868
262,Abundance Salaou,Q113660322,Abundance-Salaou_949,https://ifkdb.se/spelare/Abundance-Salaou_949
43,Adam Carlén,Q69043930,Adam-Carlén_958,https://ifkdb.se/spelare/Adam-Carlén_958
44,Adam Ingi Benediktsson,Q109819079,Adam+Ingi-Benediktsson_937,https://ifkdb.se/spelare/Adam+Ingi-Benediktsso...
130,Adam Johansson,Q349871,AdamJohansson_790,https://ifkdb.se/spelare/AdamJohansson_790
...,...,...,...,...
82,Yngve Brodd,Q722121,YngveBrodd_130,https://ifkdb.se/spelare/YngveBrodd_130
347,Zoran Stojcevski,Q6191106,ZoranStojcevski_648,https://ifkdb.se/spelare/ZoranStojcevski_648
173,Åke Hallman,Q10726271,ÅkeHallman_248,https://ifkdb.se/spelare/ÅkeHallman_248
260,Åke Norén,Q120962166,Åke-Norén_532,https://ifkdb.se/spelare/Åke-Norén_532



=== SAKNAR P11905 ===


Unnamed: 0,Namn,Wikidata_QID
7,Adam Bergmark Wiberg,Q37012842
31,Alexander Nadj,Q2010411
32,Alfons Nygaard,Q113746575
33,Allan Mohideen,Q20758191
26,Anders Kristiansen,Q11957879
39,Andreas Peterson,Q29246748
40,Andreas Pyndt,Q95981100
52,Andreas Öhman (fotbollsspelare),Q66848813
45,August Strömberg (fotbollsspelare),Q16633451
8,Bragi Bergsson,Q23914734


In [8]:
import requests

IFKDB_LIST = "https://ifkdb.se/spelare/"

session = requests.Session()

session.headers.update({
    "User-Agent": (
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
        "AppleWebKit/537.36 (KHTML, like Gecko) "
        "Chrome/121.0.0.0 Safari/537.36"
    ),
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
    "Accept-Language": "sv-SE,sv;q=0.9,en;q=0.8",
    "Referer": "https://ifkdb.se/",
    "Connection": "keep-alive",
})

r = session.get(IFKDB_LIST)
print(r.status_code)

403


In [9]:
# First load homepage
session.get("https://ifkdb.se/")

# Then load players page
r = session.get(IFKDB_LIST)
print(r.status_code)

403


In [7]:
r = session.get(IFKDB_LIST)
print(r.status_code)
print(r.headers)

403
{'Connection': 'Keep-Alive', 'Keep-Alive': 'timeout=5, max=100', 'Cache-Control': 'private, no-cache, no-store, must-revalidate, max-age=0', 'Pragma': 'no-cache', 'Content-Type': 'text/html', 'Content-Length': '1242', 'Date': 'Fri, 27 Feb 2026 04:49:38 GMT', 'Server': 'LiteSpeed', 'Vary': 'User-Agent'}


In [10]:
import cloudscraper

scraper = cloudscraper.create_scraper()
r = scraper.get(IFKDB_LIST)
print(r.status_code)

403


In [11]:
print(r.text)

<!DOCTYPE html>
<html style="height:100%">
<head>
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no" />
<title> 403 Forbidden
</title><style>@media (prefers-color-scheme:dark){body{background-color:#000!important}}</style></head>
<body style="color: #444; margin:0;font: normal 14px/20px Arial, Helvetica, sans-serif; height:100%; background-color: #fff;">
<div style="height:auto; min-height:100%; ">     <div style="text-align: center; width:800px; margin-left: -400px; position:absolute; top: 30%; left:50%;">
        <h1 style="margin:0; font-size:150px; line-height:150px; font-weight:bold;">403</h1>
<h2 style="margin-top:20px;font-size: 30px;">Forbidden
</h2>
<p>Access to this resource on the server is denied!</p>
</div></div><div style="color:#f0f0f0; font-size:12px;margin:auto;padding:0px 30px 0px 30px;position:relative;clear:both;height:100px;margin-top:-101px;background-color:#474747;border-top: 1px solid rgba(0,0,0,0.15);box-shadow: 0 1px 0 rgba(2

In [12]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

options = Options()
options.add_argument("--headless=new")
options.add_argument("--start-maximized")

driver = webdriver.Chrome(options=options)
driver.get("https://ifkdb.se/spelare/")

html = driver.page_source
print(len(html))

driver.quit()

1221


In [19]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

options = Options()
options.add_argument("--headless=new")
options.add_argument("--start-maximized")

driver = webdriver.Chrome(options=options)

driver.get("https://ifkdb.se/listor/allaspelare")

html = driver.page_source

# Save to file
with open("ifkdb_allaspelare.html", "w", encoding="utf-8") as f:
    f.write(html)

print("Saved:", len(html), "characters")

driver.quit()

Saved: 477872 characters


In [20]:
from bs4 import BeautifulSoup

with open("ifkdb_allaspelare.html", "r", encoding="utf-8") as f:
    soup = BeautifulSoup(f, "html.parser")

# Example: find the sortable table
table = soup.find("table")

rows = table.find_all("tr")
print("Rows:", len(rows))

Rows: 1014


In [22]:
df_ifkdb.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 0 entries
Empty DataFrame


In [21]:
# ==============================
# IFKDB → Wikidata Reconciliation
# ==============================

import requests
import pandas as pd
import unicodedata
import re
from bs4 import BeautifulSoup
from datetime import datetime

# --------------------------------
# CONFIG
# --------------------------------

IFKDB_HTML_FILE = "ifkdb_allaspelare.html"
WIKIDATA_ENDPOINT = "https://query.wikidata.org/sparql"
CLUB_QID = "Q201567"  # IFK Göteborg senior team
CONFIDENCE_THRESHOLD = 0.8

# --------------------------------
# 1️⃣ STRICTER SPARQL QUERY
# --------------------------------

sparql_query = f"""
SELECT ?player ?playerLabel ?birth ?start ?end ?article
WHERE {{
  ?player wdt:P31 wd:Q5 ;
          wdt:P106 wd:Q937857 .

  ?player p:P54 ?membership .
  ?membership ps:P54 wd:{CLUB_QID} .

  FILTER NOT EXISTS {{ ?player wdt:P11905 ?id }}

  OPTIONAL {{ ?membership pq:P580 ?start }}
  OPTIONAL {{ ?membership pq:P582 ?end }}

  OPTIONAL {{
    ?player p:P569 ?birthStatement .
    ?birthStatement ps:P569 ?birth .
    FILTER NOT EXISTS {{ ?birthStatement wikibase:rank wikibase:DeprecatedRank }}
  }}

  OPTIONAL {{
    ?article schema:about ?player ;
             schema:isPartOf <https://sv.wikipedia.org/> .
  }}

  SERVICE wikibase:label {{ bd:serviceParam wikibase:language "sv,en". }}
}}
ORDER BY ?playerLabel
"""

print("Querying Wikidata...")

response = requests.get(
    WIKIDATA_ENDPOINT,
    params={"query": sparql_query, "format": "json"},
    headers={"User-Agent": "IFKDB reconciliation notebook"}
)

data = response.json()

rows = []
for item in data["results"]["bindings"]:
    rows.append({
        "player": item["player"]["value"],
        "playerLabel": item["playerLabel"]["value"],
        "birth": item.get("birth", {}).get("value"),
        "start": item.get("start", {}).get("value"),
        "end": item.get("end", {}).get("value"),
        "article": item.get("article", {}).get("value")
    })

df_wd = pd.DataFrame(rows)

print("Wikidata candidates:", len(df_wd))

# --------------------------------
# 2️⃣ PARSE IFKDB HTML
# --------------------------------

print("Parsing IFKDB HTML...")

with open(IFKDB_HTML_FILE, "r", encoding="utf-8") as f:
    soup = BeautifulSoup(f, "html.parser")

players = []

for a in soup.find_all("a", href=True):
    match = re.search(r"/spelare/(\d+)", a["href"])
    if match:
        players.append({
            "ifkdb_id": match.group(1),
            "name_ifkdb": a.get_text(strip=True)
        })

df_ifkdb = pd.DataFrame(players).drop_duplicates()

print("IFKDB players found:", len(df_ifkdb))

# --------------------------------
# 3️⃣ NORMALIZATION
# --------------------------------

def normalize_name(name):
    if pd.isna(name):
        return ""
    name = unicodedata.normalize("NFKD", name)
    name = name.encode("ascii", "ignore").decode("utf-8")
    return name.lower().strip()

df_wd["norm"] = df_wd["playerLabel"].apply(normalize_name)
df_ifkdb["norm"] = df_ifkdb["name_ifkdb"].apply(normalize_name)

# --------------------------------
# 4️⃣ MERGE
# --------------------------------

df_merge = df_wd.merge(
    df_ifkdb,
    on="norm",
    how="left",
    indicator=True
)

df_merge["name_match"] = df_merge["_merge"] == "both"
df_merge["birth_year_wd"] = pd.to_datetime(
    df_merge["birth"], errors="coerce"
).dt.year

# --------------------------------
# 5️⃣ CONFIDENCE SCORE
# --------------------------------

def compute_score(row):
    score = 0

    if row["name_match"]:
        score += 0.7

    if pd.notna(row["start"]):
        score += 0.1

    if pd.notna(row["birth_year_wd"]):
        score += 0.1

    return score

df_merge["confidence"] = df_merge.apply(compute_score, axis=1)

def status_label(row):
    if row["confidence"] >= CONFIDENCE_THRESHOLD:
        return "SAFE"
    elif row["confidence"] >= 0.6:
        return "REVIEW"
    else:
        return "CONFLICT"

df_merge["status"] = df_merge.apply(status_label, axis=1)

# --------------------------------
# 6️⃣ GENERATE QUICKSTATEMENTS
# --------------------------------

print("Generating QuickStatements...")

qs_lines = []

for _, row in df_merge.iterrows():
    if row["status"] == "SAFE" and pd.notna(row["ifkdb_id"]):
        qid = row["player"].split("/")[-1]
        qs_lines.append(
            f'{qid}\tP11905\t"{row["ifkdb_id"]}"\tS854\thttps://ifkdb.se/spelare/{row["ifkdb_id"]}'
        )

qs_filename = "ifkdb_reviewed_quickstatements.tsv"

with open(qs_filename, "w") as f:
    f.write("\n".join(qs_lines))

print("QuickStatements created:", len(qs_lines))

# --------------------------------
# 7️⃣ HTML REPORT
# --------------------------------

print("Generating HTML report...")

html_table = df_merge.sort_values("confidence", ascending=False).to_html(index=False)

report_html = f"""
<html>
<head>
<meta charset="utf-8">
<style>
body {{ font-family: Arial; }}
.SAFE {{ background:#d4edda; }}
.REVIEW {{ background:#fff3cd; }}
.CONFLICT {{ background:#f8d7da; }}
</style>
</head>
<body>
<h1>IFKDB → Wikidata Reconciliation Report</h1>
<p>Generated: {datetime.now()}</p>
{html_table}
</body>
</html>
"""

report_filename = "ifkdb_reconciliation_report.html"

with open(report_filename, "w", encoding="utf-8") as f:
    f.write(report_html)

print("Report saved:", report_filename)

# --------------------------------
# 8️⃣ SAVE AUDIT CSV
# --------------------------------

df_merge.to_csv("ifkdb_reconciliation_audit.csv", index=False)

print("Audit CSV saved.")

print("Done.")

Querying Wikidata...
Wikidata candidates: 69
Parsing IFKDB HTML...
IFKDB players found: 0


KeyError: 'name_ifkdb'

In [23]:
players

[]

In [2]:
end_time = datetime.now()

duration = end_time - start_time

print("\n===== Körningsrapport =====")
print("Starttid :", start_time.strftime("%Y-%m-%d %H:%M:%S"))
print("Sluttid  :", end_time.strftime("%Y-%m-%d %H:%M:%S"))

total_seconds = int(duration.total_seconds())
hours = total_seconds // 3600
minutes = (total_seconds % 3600) // 60
seconds = total_seconds % 60

print(f"Körtid   : {hours}h {minutes}m {seconds}s")
print("===========================\n")


===== Körningsrapport =====
Starttid : 2026-02-27 05:38:42
Sluttid  : 2026-02-27 05:38:56
Körtid   : 0h 0m 14s

