## P54 högre rank
* [#54](https://github.com/salgo60/ifkdb/issues/19)
* Denna notebook [19_54.ipynb](https://github.com/salgo60/ifkdb/blob/main/Notebook/19_54.ipynb)

In [1]:
from datetime import datetime
start_time  = datetime.now()
print("Last run: ", start_time)

Last run:  2026-02-27 00:09:56.561850


In [2]:
from SPARQLWrapper import SPARQLWrapper, JSON
import time
from collections import defaultdict

players = [
    "Q247652","Q454740","Q454915","Q460924","Q893285","Q1033719",
    "Q1184663","Q1334998","Q1372325","Q1703522","Q2350103","Q2543707",
    "Q2652968","Q29318629","Q3132576","Q4018373","Q4088566","Q4125587",
    "Q5371332","Q9140631","Q12341192","Q15303035","Q16233813","Q19587474",
    "Q20582751","Q23778051","Q23759917","Q2478694","Q26129311","Q26775349",
    "Q37823353","Q47075606","Q59914139","Q63482320","Q64009388","Q64605847",
    "Q71982865","Q97400347","Q98085094","Q115371638"
]

endpoint = "https://query.wikidata.org/sparql"
sparql = SPARQLWrapper(endpoint)
sparql.setReturnFormat(JSON)

player_languages = {}
language_counter = defaultdict(int)

for qid in players:
    query = f"""
    SELECT ?lang WHERE {{
      wd:{qid} ^schema:about ?article .
      ?article schema:isPartOf [ wikibase:wikiGroup "wikipedia" ] ;
               schema:inLanguage ?lang .
    }}
    """
    
    sparql.setQuery(query)
    results = sparql.query().convert()
    
    langs = set(r["lang"]["value"] for r in results["results"]["bindings"])
    player_languages[qid] = langs
    
    for l in langs:
        language_counter[l] += 1
    
    time.sleep(0.5)  # respekt för WDQS rate limits

# --- Gemensamma språk (intersection) ---
all_language_sets = list(player_languages.values())
common_languages = set.intersection(*all_language_sets)

print("\nSpråk som ALLA spelare har gemensamt:\n")
print(sorted(common_languages))

# --- Språk sorterade efter hur många spelare som finns där ---
print("\nSpråk sorterade efter antal spelare:\n")
sorted_langs = sorted(language_counter.items(), key=lambda x: x[1], reverse=True)

for lang, count in sorted_langs:
    print(f"{lang}: {count}/{len(players)}")


Språk som ALLA spelare har gemensamt:

['en', 'sv']

Språk sorterade efter antal spelare:

sv: 40/40
en: 40/40
it: 39/40
arz: 34/40
fr: 32/40
ru: 30/40
ar: 29/40
de: 28/40
nl: 25/40
uk: 25/40
es: 24/40
pl: 23/40
fa: 21/40
tr: 18/40
hu: 17/40
nb: 17/40
zh: 17/40
da: 16/40
ko: 15/40
fi: 15/40
he: 15/40
vi: 15/40
pt: 14/40
uz: 14/40
ro: 12/40
ja: 12/40
id: 12/40
azb: 11/40
ca: 10/40
et: 10/40
lv: 8/40
sr: 8/40
hy: 7/40
cs: 7/40
bg: 6/40
th: 6/40
kk: 5/40
ha: 5/40
sw: 5/40
el: 5/40
bn: 5/40
mg: 4/40
be: 4/40
eu: 4/40
ka: 4/40
is: 4/40
mk: 4/40
ms: 4/40
hr: 4/40
ast: 3/40
sq: 3/40
ckb: 3/40
az: 3/40
be-tarask: 3/40
lt: 2/40
ne: 2/40
en-simple: 2/40
vo: 2/40
jv: 2/40
af: 2/40
eo: 1/40
dag: 1/40
ht: 1/40
nn: 1/40
yue: 1/40
kg: 1/40
sk: 1/40
mad: 1/40
bew: 1/40
knc: 1/40


För ett objekt (t.ex. Qxxxx):

Vem ändrade en P54-statement så att den fick preferred rank?

In [3]:
import requests
import time
import pandas as pd

Q_LIST = [
"Q247652","Q454740","Q454915","Q460924","Q893285","Q1033719",
"Q1184663","Q1334998","Q1372325","Q1703522","Q2350103",
"Q2543707","Q2652968","Q29318629","Q3132576","Q4018373",
"Q4088566","Q4125587","Q5371332","Q9140631","Q12341192",
"Q15303035","Q16233813","Q19587474","Q20582751","Q23778051",
"Q23759917","Q2478694","Q26129311","Q26775349","Q37823353",
"Q47075606","Q59914139","Q63482320","Q64009388","Q64605847",
"Q71982865","Q97400347","Q98085094","Q115371638"
]

session = requests.Session()
session.headers.update({
    "User-Agent": "P54-rank-audit/1.0 (salgo60@msn.com)"
})

API = "https://www.wikidata.org/w/api.php"

def is_bot_user(username):
    params = {
        "action": "query",
        "format": "json",
        "list": "users",
        "ususers": username,
        "usprop": "groups"
    }

    r = session.get(API, params=params)
    data = r.json()

    users = data.get("query", {}).get("users", [])
    if not users:
        return False

    groups = users[0].get("groups", [])
    return "bot" in groups

def find_rank_sets(qid):
    rvcontinue = None
    results = []

    while True:
        params = {
            "action": "query",
            "format": "json",
            "prop": "revisions",
            "titles": qid,
            "rvprop": "ids|timestamp|user|comment",
            "rvlimit": "max",
            "rvdir": "newer"
        }

        if rvcontinue:
            params["rvcontinue"] = rvcontinue

        r = session.get(API, params=params)
        data = r.json()

        page = next(iter(data["query"]["pages"].values()))
        revisions = page.get("revisions", [])

        for rev in revisions:
            comment = rev.get("comment", "")

            if (
                "wbsetclaim-update" in comment and
                "P54" in comment and
                "2||1" in comment
            ):
                results.append({
                    "QID": qid,
                    "Timestamp": rev["timestamp"],
                    "User": rev["user"],
                    "RevisionID": rev["revid"],
                    "Comment": comment
                })

        if "continue" in data:
            rvcontinue = data["continue"]["rvcontinue"]
            time.sleep(0.3)
        else:
            break

    return results


all_results = []

for q in Q_LIST:
    print("Checking", q)
    res = find_rank_sets(q)
    all_results.extend(res)
    time.sleep(0.5)

df = pd.DataFrame(all_results)
df = df.sort_values(["QID", "Timestamp"])

bot_cache = {}

def get_bot_flag(user):
    if user not in bot_cache:
        bot_cache[user] = is_bot_user(user)
    return bot_cache[user]

df["IsBot"] = df["User"].apply(get_bot_flag)
df

Checking Q247652
Checking Q454740
Checking Q454915
Checking Q460924
Checking Q893285
Checking Q1033719
Checking Q1184663
Checking Q1334998
Checking Q1372325
Checking Q1703522
Checking Q2350103
Checking Q2543707
Checking Q2652968
Checking Q29318629
Checking Q3132576
Checking Q4018373
Checking Q4088566
Checking Q4125587
Checking Q5371332
Checking Q9140631
Checking Q12341192
Checking Q15303035
Checking Q16233813
Checking Q19587474
Checking Q20582751
Checking Q23778051
Checking Q23759917
Checking Q2478694
Checking Q26129311
Checking Q26775349
Checking Q37823353
Checking Q47075606
Checking Q59914139
Checking Q63482320
Checking Q64009388
Checking Q64605847
Checking Q71982865
Checking Q97400347
Checking Q98085094
Checking Q115371638


Unnamed: 0,QID,Timestamp,User,RevisionID,Comment,IsBot
43,Q1033719,2015-06-17T11:44:00Z,PreferentialBot,222701993,/* wbsetclaim-update:2||1 */ [[Property:P54]]:...,False
44,Q1033719,2026-02-26T19:12:26Z,Jssfrk,2467228102,/* wbsetclaim-update:2||1 */ [[Property:P54]]:...,False
45,Q1033719,2026-02-26T19:14:00Z,Jssfrk,2467228567,/* wbsetclaim-update:2||1|1 */ [[Property:P54]...,False
46,Q1033719,2026-02-26T19:15:36Z,Jssfrk,2467229001,/* wbsetclaim-update:2||1 */ [[Property:P54]]:...,False
47,Q1033719,2026-02-26T19:15:53Z,Jssfrk,2467229088,/* wbsetclaim-update:2||1|1 */ [[Property:P54]...,False
...,...,...,...,...,...,...
279,Q98085094,2024-12-26T09:01:31Z,Jssfrk,2291410998,/* wbsetclaim-update:2||1|1 */ [[Property:P54]...,False
280,Q98085094,2024-12-26T09:01:32Z,Jssfrk,2291411004,/* wbsetclaim-update:2||1|1 */ [[Property:P54]...,False
281,Q98085094,2024-12-26T09:01:34Z,Jssfrk,2291411007,/* wbsetclaim-update:2||1|1 */ [[Property:P54]...,False
282,Q98085094,2026-01-03T16:25:33Z,MattMellow,2450932414,/* wbsetclaim-update:2||1 */ [[Property:P54]]:...,False


In [4]:
pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
pd.set_option("display.width", None)
pd.set_option("display.max_colwidth", None)
pd.set_option("display.expand_frame_repr", False)

df[["QID","Timestamp","User","RevisionID","Comment"]]

Unnamed: 0,QID,Timestamp,User,RevisionID,Comment
43,Q1033719,2015-06-17T11:44:00Z,PreferentialBot,222701993,/* wbsetclaim-update:2||1 */ [[Property:P54]]: [[Q18232]]
44,Q1033719,2026-02-26T19:12:26Z,Jssfrk,2467228102,/* wbsetclaim-update:2||1 */ [[Property:P54]]: [[Q18232]]
45,Q1033719,2026-02-26T19:14:00Z,Jssfrk,2467228567,/* wbsetclaim-update:2||1|1 */ [[Property:P54]]: [[Q11492669]]
46,Q1033719,2026-02-26T19:15:36Z,Jssfrk,2467229001,/* wbsetclaim-update:2||1 */ [[Property:P54]]: [[Q11492669]]
47,Q1033719,2026-02-26T19:15:53Z,Jssfrk,2467229088,/* wbsetclaim-update:2||1|1 */ [[Property:P54]]: [[Q318348]]
48,Q1033719,2026-02-26T19:16:14Z,Jssfrk,2467229177,/* wbsetclaim-update:2||1 */ [[Property:P54]]: [[Q18232]]
49,Q1033719,2026-02-26T19:16:44Z,Jssfrk,2467229307,/* wbsetclaim-update:2||1|3 */ [[Property:P54]]: [[Q18232]]
50,Q1033719,2026-02-26T19:17:01Z,Jssfrk,2467229393,/* wbsetclaim-update:2||1|4 */ [[Property:P54]]: [[Q18232]]
51,Q1033719,2026-02-26T19:17:42Z,Jssfrk,2467229564,/* wbsetclaim-update:2||1 */ [[Property:P54]]: [[Q204881]]
52,Q1033719,2026-02-26T19:19:04Z,Jssfrk,2467229930,/* wbsetclaim-update:2||1|4 */ [[Property:P54]]: [[Q1423118]]


In [5]:
pd.reset_option("all")

  pd.reset_option("all")
  pd.reset_option("all")


In [6]:
df["IsBot"].value_counts()

IsBot
False    295
Name: count, dtype: int64

In [7]:
df.groupby(["User", "IsBot"]).size()

User                                    IsBot
186.94.50.110                           False     1
190.198.9.152                           False     1
190.205.78.191                          False     1
190.77.186.84                           False     2
2.101.112.116                           False     2
2001:B07:AD4:1DEF:4C7:F6E6:DCBD:500A    False     1
201.242.133.13                          False     1
24.184.98.153                           False     1
2800:484:AD78:2400:55:614D:6708:5D95    False     2
2A00:23C5:5515:7C01:7C5F:D9C:2026:40BF  False     1
2A01:E0A:22D:D3E0:6CB0:F525:E95E:E74C   False     1
64.43.133.48                            False     2
64.43.50.71                             False     6
80.174.6.128                            False     1
80.28.182.120                           False     1
93.34.226.131                           False     1
Ailurofil                               False     6
Andrei Stroe                            False    65
Artsiom91         

In [8]:
end_time = datetime.now()

duration = end_time - start_time

print("\n===== Körningsrapport =====")
print("Starttid :", start_time.strftime("%Y-%m-%d %H:%M:%S"))
print("Sluttid  :", end_time.strftime("%Y-%m-%d %H:%M:%S"))

total_seconds = int(duration.total_seconds())
hours = total_seconds // 3600
minutes = (total_seconds % 3600) // 60
seconds = total_seconds % 60

print(f"Körtid   : {hours}h {minutes}m {seconds}s")
print("===========================\n")


===== Körningsrapport =====
Starttid : 2026-02-27 00:09:56
Sluttid  : 2026-02-27 00:11:07
Körtid   : 0h 1m 11s



In [9]:
import requests

session = requests.Session()
session.headers.update({
    "User-Agent": "P54-rank-investigator/1.0 (salgo60@msn.com)"
})

url = "https://www.wikidata.org/wiki/Special:EntityData/Q247652.json"

r = session.get(url, timeout=30)

print("Status:", r.status_code)
print("Content-Type:", r.headers.get("Content-Type"))

if "application/json" not in r.headers.get("Content-Type", ""):
    print("⚠️ Not JSON response")
    print(r.text[:500])
else:
    data = r.json()
    print("Claims P54 count:",
          len(data["entities"]["Q247652"]["claims"].get("P54", [])))

Status: 200
Content-Type: application/json; charset=UTF-8
Claims P54 count: 8
