In [9]:
# This session uses code from https://github.com/Lodifice/mfnf-pdf-export which is licensed under Apache License 2.0

import re
from datetime import date, datetime

In [10]:
import hashlib
import collections
import shelve

def sha256(text):
    return hashlib.sha256(text.encode("utf8")).hexdigest()

def stablehash(obj):
    if callable(getattr(obj, "_stablehash", None)):
        return obj._stablehash()
    elif isinstance(obj, str):
        return sha256(obj)
    elif isinstance(obj, collections.abc.Sequence):
        return sha256(";".join([stablehash(x) for x in obj]))
    elif isinstance(obj, collections.abc.Mapping):
        return stablehash(["<" + stablehash(k) + ";" + stablehash(v) + ">" for k, v in obj.items()])
    else:
        print(obj)
        raise NotImplementedError()
        
class CachedFunction:
    def __init__(self, db):
        self.db = db

    def __call__(self, func):
        def new_func(*args, **kwargs):
            key = stablehash([func.__name__, args, kwargs])

            if key in self.db:
                return self.db[key]
            else:
                result = func(*args, **kwargs)

                self.db[key] = result

                return result

        return new_func

DB = shelve.open("cache.db", "c", writeback=True)
cached_function = CachedFunction(DB)

In [11]:
def remove_prefix(text, prefix):
    """Removes the prefix `prefix` from string `text` in case it is present."""
    return text[len(prefix):] if text.startswith(prefix) else text

def remove_suffix(text, suffix):
    """Removes the suffix `suffix` from string `text` in case it is present."""
    return text[:len(text)-len(suffix)] if text.endswith(suffix) else text

In [12]:
import requests

from functools import reduce
from urllib.parse import quote

def select_singleton(x):
    return next(iter(x.values()))

def merge(obj1, obj2):
    """Merges two objects depending of the type of the first argument.
    >>> merge(None, 42)
    42
    >>> merge(None, None) is None
    True
    >>> merge([1, 2], [6, 7])
    [1, 2, 6, 7]
    >>> d = merge({ "a": 1, "b": 2}, {"b": 3, "c": 4})
    >>> d == {"a": 1, "b": 3, "c": 4}
    True
    """
    if obj1 is None:
        return obj2
    elif isinstance(obj1, list):
        return obj1 + obj2
    elif isinstance(obj2, dict):
        result = obj1.copy()
        result.update(obj2)
        return result
    else:
        raise NotImplementedError()

def query_path(obj, path):
    return reduce(lambda x, y: y(x) if callable(y) else x[y], path, obj)

class MediaWikiAPI():
    """Implements an API for content stored on a MediaWiki."""

    def __init__(self, domain="de.wikibooks.org", req=requests.Session()):
        """Initializes the object.
        Arguments:
        domain -- domain of the MediaWiki, e.g. `"de.wikibooks.org"`
        req    -- an session object of the `request` framework
        """
        self.domain = domain
        self.req = req

    def _stablehash(self):
        return stablehash((self.__class__.__name__, self.domain))

    @property
    def _index_url(self):
        """Returns the URL to the server's `index.php` file."""
        return "https://" + self.domain + "/w/index.php"

    @property
    def _api_url(self):
        """Returns the URL to the server's `api.php` file."""
        return "https://" + self.domain + "/w/api.php"

    @property
    def _rest_api_url(self):
        """Returns the URL to the server's REST API endpoints."""
        return "https://" + self.domain + "/api/rest_v1"

    def _index_call(self, params):
        """Make an HTTP request to the server's `index.php` file."""
        req = self.req.get(self._index_url, params=params)

        req.raise_for_status()

        return req.text

    def _api_call(self, endpoint, data={}, domain=None):
        """Call an REST API endpoint."""
        if domain is None:
            api_url = self._rest_api_url
        else:
            api_url = "https://" + domain + "/api/rest_v1"
        
        endpoint_url = "/".join([api_url] + endpoint)

        result = self.req.get(endpoint_url, data=data)

        return result

    def query(self, params, path_to_result):
        params["format"] = "json"
        params["action"] = "query"
        path_to_result = ["query"] + path_to_result
        result = None

        while True:
            api_result = self.req.get(self._api_url, params=params).json()

            if "error" in api_result:
                message = "Error while making API call."

                raise ConnectionError(api_result.get("info", message))

            result = merge(result, query_path(api_result, path_to_result))

            if "continue" in api_result:
                params.update(api_result["continue"])
            else:
                return result

    @cached_function
    def get_revisions(self, title):
        if title.startswith("c:"):
            return []
        
        params = {"prop": "revisions", "rvprop": "size|user|timestamp|userid|ids|comment", "titles": title,
                  "rvlimit": "max"}

        try:
            return self.query(params, ["pages", select_singleton, "revisions"])
        except KeyError as e:
            if str(e) == "'revisions'":
                return []
            else:
                print(title)
                raise
    
    def revisions(self, title):
        if title is None:
            return []
        
        result = self.get_revisions(title)
        
        oldsize = 0
        for rev in reversed(result):
            rev["date"] = datetime.strptime(rev["timestamp"], "%Y-%m-%dT%H:%M:%SZ").date()
            rev["diffsize"] = rev["size"] - oldsize
            oldsize = rev["size"]
            rev["weight"] = max(100, rev["diffsize"])

        return result
    
    @cached_function
    def get_all_pageviews(self, title):
        endpoint = ["metrics", "pageviews", "per-article", self.domain, "all-access", "user",
                    quote(title, safe=""), "daily", "20150701", "20171006"]
        
        req = self._api_call(endpoint, domain="wikimedia.org")
        
        if req.status_code == requests.codes.ok:
            return req.json()["items"]
        else:
            return []
    
    def get_content(self, title):
        return self._index_call({"action": "raw", "title": title})
    
    def pageviews_of(self, title, start, end):
        if len(end) == 8:
            end += "00"
            
        return sum((x["views"] for x in self.get_all_pageviews(title) if x["timestamp"] >= start and x["timestamp"] <= end))
    
    def pageviews(self, title, start, end):
        return sum((self.pageviews_of(x, start, end) for x in self.all_titles(title)))
    
    def all_titles(self, title):
        """Returns a set of all titles the article `title` had in the past."""
        result = set()

        result.add(title)

        re_link = "\\[\\[([^\\]]+)\\]\\]"
        re1 = ".*verschob die Seite %s nach %s.*" % (re_link, re_link)
        re2 = ".*hat „%s“ nach „%s“ verschoben.*" % (re_link, re_link)
        regs = [ re.compile(re1), re.compile(re2) ]

        for comment in (x["comment"] for x in self.get_revisions(title)):
            for reg in regs:
                m = reg.match(comment)

                if m:
                    result.add(m.group(1))
                    result.add(m.group(2))

        return result
    
wb = MediaWikiAPI()

In [13]:
from sitemap import parse_sitemap

mfnf = parse_sitemap(wb.get_content("Mathe für Nicht-Freaks: Sitemap"))
mfnf["title"] = "Mathe für Nicht-Freaks"
grund = mfnf["children"][1]
ana1 = mfnf["children"][2]
la1 = mfnf["children"][3]

# Seitenaufrufe

In den folgenden Statistiken sind Seitenaufrufe durch Bots und Spiders rausgerechnet.

In [14]:
def pageviews(node, start, end):
    result = wb.pageviews(node["title"], start, end) if node["title"] else 0
    
    return result + sum((pageviews(x, start, end) for x in node["children"]))

## Seitenaufrufe im Wintersemester 2016/17

In [15]:
def ws16(what, node):
    print(what, pageviews(node, "20161001", "20170331"))

ws16("Gesamt:", mfnf)
ws16("Grundlagenbuch:", grund)
ws16("Analysis 1:", ana1)
ws16("Lineare Algebra 1:", la1)

Gesamt: 1205677
Grundlagenbuch: 524177
Analysis 1: 571766
Lineare Algebra 1: 60159


## Seitenaufrufe im Sommersemester 2017

In [16]:
print("Gesamt:", pageviews(mfnf, "20170401", "20170930"))
print("Grundlagenbuch:", pageviews(grund, "20170401", "20170930"))
print("Analysis 1:", pageviews(ana1, "20170401", "20170930"))
print("Lineare Algebra 1:", pageviews(la1, "20170401", "20170930"))

Gesamt: 666645
Grundlagenbuch: 243700
Analysis 1: 337241
Lineare Algebra 1: 49434


## Seitenaufrufe im WiSe 2017/18 und SoSe 18

In [17]:
def ws16(what, node):
    print(what, pageviews(node, "20161001", "20170930"))

ws16("Gesamt:", mfnf)
ws16("Grundlagenbuch:", grund)
ws16("Analysis 1:", ana1)
ws16("Lineare Algebra 1:", la1)

Gesamt: 1872322
Grundlagenbuch: 767877
Analysis 1: 909007
Lineare Algebra 1: 109593


## Wachstum der Seitenaufrufe im Zeitraum 01.07.2017 - 30.09.2017 im Vergleich zum Vorjahr

In [18]:
def w(what, node):
    p = (pageviews(node, "20170701", "20170930") / pageviews(node, "20160701", "20160930") - 1)*100
    print(what, ("%.1f" % p) + "%")

w("Gesamt:", mfnf)
w("Grundlagenbuch:", grund)
w("Analysis 1:", ana1)
w("Lineare Algebra 1:", la1)

Gesamt: 39.3%
Grundlagenbuch: 8.1%
Analysis 1: 62.7%
Lineare Algebra 1: 273.3%


# Autorenbeiträge

In [19]:
def revisions(node, start, end):
    result = [x for x in wb.revisions(node["title"]) if x["date"] >= start and x["date"] <= end]
    
    return result + sum((revisions(x, start, end) for x in node["children"]), [])

## Anzahl der Bearbeitungen im Wintersemester 2017/18

In [20]:
def ws16(what, node):
    print(what, len(revisions(node, date(2016, 10, 1), date(2017, 3, 31))))

ws16("Gesamt:", mfnf)
ws16("Grundlagenbuch:", grund)
ws16("Analysis 1:", ana1)
ws16("Lineare Algebra 1:", la1)

Gesamt: 2783
Grundlagenbuch: 86
Analysis 1: 1769
Lineare Algebra 1: 690


## Anzahl der Bearbeitungen im Sommersemester 2018

In [21]:
def w(what, node):
    print(what, len(revisions(node, date(2017, 4, 1), date(2017, 9, 30))))

w("Gesamt:", mfnf)
w("Grundlagenbuch:", grund)
w("Analysis 1:", ana1)
w("Lineare Algebra 1:", la1)

Gesamt: 2740
Grundlagenbuch: 40
Analysis 1: 1833
Lineare Algebra 1: 479


## Anzahl der Bearbeitungen im WiSe 17/18 und SoSe 18 insgesamt

In [22]:
def w(what, node):
    print(what, len(revisions(node, date(2016, 10, 1), date(2017, 9, 30))))

w("Gesamt:", mfnf)
w("Grundlagenbuch:", grund)
w("Analysis 1:", ana1)
w("Lineare Algebra 1:", la1)

Gesamt: 5523
Grundlagenbuch: 126
Analysis 1: 3602
Lineare Algebra 1: 1169


## Wachstum in den Bearbeitungen im Vergleich zum Vorjahr

Wachstum der Bearbeitungen im Zeitraum 01.10.2016-30.09.2017 im Vergleich zum Zeitraum 01.10.2015-30.09.2016

In [23]:
def w(what, node):
    p = (len(revisions(node, date(2016, 10, 1), date(2017, 9, 30))) / len(revisions(node, date(2015, 10, 1), date(2016, 9, 30))) - 1)*100
    print(what, ("%.1f" % p) + "%")

w("Gesamt:", mfnf)
w("Grundlagenbuch:", grund)
w("Analysis 1:", ana1)
w("Lineare Algebra 1:", la1)

Gesamt: 49.1%
Grundlagenbuch: -38.8%
Analysis 1: 104.2%
Lineare Algebra 1: 0.9%


# Anzahl der Autoren

In [24]:
from functools import reduce

def authors(node):
    result = set(((x["user"], x.get("anon", False) == "") for x in wb.revisions(node["title"])))
    
    result = reduce(lambda x, y: x | y, [authors(x) for x in node["children"]], result)
    
    return result

## Gesamtzahl der Autoren und Autorinnen mit anonymen Autoren / Autorinnen

Anonnyme Autoren / Autorinnen sind Personen, die sich nicht angemeldet haben und für die Ihre IP-Adresse hinterlegt wurde. Da eine Personen unter mehreren IP-Adressen am Projekt mitgewirkt haben kann ist die folgende Statistik eine obere Grenze für die Anzahl an Autoren / Autorinnen im Projekt.

In [25]:
def w(what, node):
    print(what, len(authors(node)))

w("Gesamt:", mfnf)
w("Grundlagenbuch:", grund)
w("Analysis 1:", ana1)
w("Lineare Algebra 1:", la1)

Gesamt: 719
Grundlagenbuch: 304
Analysis 1: 380
Lineare Algebra 1: 53


# Anzahl der Autoren ohne anonyme Autoren

Dies ist eine untere Grenze für die Anzahl der tatsächlichen Autoren / Autorinnen im Projekt

In [26]:
def w(what, node):
    print(what, len([x for x in authors(node) if not x[1]]))

w("Gesamt:", mfnf)
w("Grundlagenbuch:", grund)
w("Analysis 1:", ana1)
w("Lineare Algebra 1:", la1)

Gesamt: 176
Grundlagenbuch: 92
Analysis 1: 99
Lineare Algebra 1: 20


# Hinzugefügte Bytes im Zeitraum 1.10.2016 – 30.9.2017

In [27]:
def w(what, node):
    print(what, sum(x["diffsize"] for x in revisions(node, date(2016, 10, 1), date(2017, 9, 30))))

w("Gesamt:", mfnf)
w("Grundlagenbuch:", grund)
w("Analysis 1:", ana1)
w("Lineare Algebra 1:", la1)

Gesamt: 1713655
Grundlagenbuch: 8848
Analysis 1: 992793
Lineare Algebra 1: 244569


# Geschätzte Zahl hinzugefügter Wörter im Zeitraum 1.10.2016 – 30.9.2017

Hier nehmen wir an, dass je 8 Bytes (ungefähr 7Buchstaben) ein Wort ergeben.

In [53]:
def w(what, node):
    print(what, int(sum(x["diffsize"] for x in revisions(node, date(2016, 10, 1), date(2017, 9, 30))) / 8))

w("Gesamt:", mfnf)
w("Grundlagenbuch:", grund)
w("Analysis 1:", ana1)
w("Lineare Algebra 1:", la1)

Gesamt: 214206
Grundlagenbuch: 1106
Analysis 1: 124099
Lineare Algebra 1: 30571


# Liste der wichtigsten Autoren / Autorinnen der Analysis 1

In [29]:
from collections import defaultdict

def foo(node):
    authors = defaultdict(int)
    
    for rev in wb.revisions(node["title"]):
        if "anon" not in rev:
            authors[rev["user"]] += rev["weight"]
    
    for x in (foo(y) for y in node["children"]):
        for a,w in x.items():
            authors[a] += w
    
    return authors

for x,y in sorted(foo(ana1).items(), key=lambda x: x[1], reverse=True):
    print(x,y)

Stephan Kulla 1214763
Who2010 1078675
Auswahlaxiom 689800
EulerschesPi 95839
Taschee 42328
Michael D'Erchie 28537
Ch1nie 27676
Mrvnfrtz 26916
Christoph Kehle 25624
Morrison69 21011
Mattlocke2.0 20877
Paul Stapor 20404
Morpurgo10 16333
GraffL 16107
Sven87a 16038
0-Brane 14358
Griever~dewikibooks 11054
Fabiangabel 9732
Phoible 7748
S jwiese 7740
Vpt93 7522
Braun~dewikibooks 7042
Einhalbmvquadrat 6618
Mingliaozi 6383
Matheoldie 5175
Meitnerium266 5111
Beezle73 4922
Ceranilo 4777
Apfelmus 4312
Mathpro01 4000
Konrad Rind 3861
SerloBot 3400
BenniSERLO 2838
Agnessa power 2800
PhilippHanemann 2800
BeateAsenbeck 2779
Daniel5Ko 2763
Benjamin Wolba 2758
Evalain 2468
JennKi 2148
Juetho 1900
Fabian Wietschorke 1500
MJ Studies 1495
MrScoville 1421
KatharinaKircher 1372
Jetstune 1100
W.e.r.n 1027
Peter Gröbner 1000
KaiJay 825
Dirk Hünniger 800
Flauschi 704
Florianwicher 700
Theresa Plomer 690
Claudia4 688
Maths CA 525
Letsluk 500
Nico Benti 500
Tratormo 500
Farbstift Rot 480
Boehm 400
LoSchizzatore 4

## Utils-Code

In [51]:
def article_titles(node):
    result = [node["title"]] if node["title"] else []
    
    return result + sum((article_titles(child) for child in node["children"]), [])
    
def is_created_between(title, start, end):
    if title.startswith("c:") or title.startswith(":"):
        return False
    
    rev = wb.revisions(title)
    
    if len(rev) == 0:
        return False
    
    try:
        date = wb.revisions(title)[-1]["date"]
    except IndexError:
        print(title)
    
    return date >= start and date <= end

def new_articles(node, start, end):
    return [x for x in article_titles(node) if is_created_between(x, start, end)]

## Anzahl neuer Artikel im Zeitraum 1.10.2016-30.9.2017

In [52]:
def w(what, node):
    print(what, len(new_articles(node, date(2016, 10, 1), date(2017, 9, 30))))

w("Gesamt:", mfnf)
w("Grundlagenbuch:", grund)
w("Analysis 1:", ana1)
w("Lineare Algebra 1:", la1)

Gesamt: 95
Grundlagenbuch: 1
Analysis 1: 33
Lineare Algebra 1: 18


## Anzahl der Autoren / Autorinnen im Zeitraum 1.10.2016-30.9.2017 (mit anonymen Autoren / Autorinnen)

In [56]:
def foo(node):
    return set( (rev["user"] for rev in revisions(node, date(2016, 10, 1), date(2017, 9, 30))) )

def w(what, node):
    print(what, len(foo(node)))

w("Gesamt:", mfnf)
w("Grundlagenbuch:", grund)
w("Analysis 1:", ana1)
w("Lineare Algebra 1:", la1)

Gesamt: 224
Grundlagenbuch: 38
Analysis 1: 163
Lineare Algebra 1: 31


## Anzahl der Autoren / Autorinnen im Zeitraum 1.10.2016-30.9.2017 (ohne anonyme Autoren / Autorinnen)

In [57]:
def foo(node):
    return set( (rev["user"] for rev in revisions(node, date(2016, 10, 1), date(2017, 9, 30)) if "anon" not in rev) )

def w(what, node):
    print(what, len(foo(node)))

w("Gesamt:", mfnf)
w("Grundlagenbuch:", grund)
w("Analysis 1:", ana1)
w("Lineare Algebra 1:", la1)

Gesamt: 62
Grundlagenbuch: 21
Analysis 1: 42
Lineare Algebra 1: 12
