# Hilfsfunktionen

In [1]:
import getpass

API_KEY = getpass.getpass("Simple Analytics API Key: ")

Simple Analytics API Key: ········


In [2]:
import json
import mysql.connector

from IPython.display import display, Markdown, HTML

db = mysql.connector.connect(
    host="localhost",
    user="root",
    password="secret",
    port="3306",
    database="serlo",
    charset="latin1"
)

def query(sql):
    c = db.cursor()
    c.execute(sql)
    
    return c.fetchall()

def querySingleton(sql):
    return [ x[0] for x in query(sql) ]

def cached(func):
    cache = dict()
    
    def returned_func(*args):
        key = json.dumps(args)
        
        if key not in cache:
            cache[key] = func(*args)
            
        return cache[key]
    
    return returned_func

In [3]:
@cached
def get_taxonomy_children(taxonomy_id):
    return querySingleton(f"""
        select term_taxonomy.id from term_taxonomy
        join uuid on uuid.id = term_taxonomy.id
        where term_taxonomy.parent_id = {taxonomy_id}
            and uuid.trashed = 0
        order by id
    """)

get_taxonomy_children(16157)

[16158,
 16159,
 16160,
 16161,
 16162,
 16163,
 75049,
 76750,
 85477,
 87814,
 87827,
 87860]

In [4]:
@cached
def get_name(taxonomy_id):
    return querySingleton(f"""
        select term.name from term_taxonomy
        join term on term.id = term_taxonomy.term_id
        where term_taxonomy.id = {taxonomy_id}
    """)[0]

get_name(209228)

'Aufgaben zu Wertpapieren'

In [5]:
@cached
def get_entity_children(taxonomy_id):
    return querySingleton(f"""
        select term_taxonomy_entity.entity_id from term_taxonomy
        join term_taxonomy_entity on term_taxonomy_entity.term_taxonomy_id = term_taxonomy.id
        join uuid on uuid.id = term_taxonomy_entity.entity_id
        where term_taxonomy.id = {taxonomy_id}
        and uuid.trashed = 0
        order by term_taxonomy_entity.position
    """)

get_entity_children(16174)

[2111,
 18685,
 1717,
 1993,
 107488,
 107903,
 121533,
 121535,
 129709,
 19336,
 1525,
 1677,
 1495,
 1871,
 2153,
 1507,
 1639,
 1633,
 1679,
 1671]

In [6]:
import requests
import time

@cached
def get_alias(uuid_id, attempt=0):
    req = requests.get(f"https://de.serlo.org/{uuid_id}", allow_redirects=False)
    
    if "Location" not in req.headers:
        if attempt <= 2:
            time.sleep(5)
            return get_alias(uuid_id, attempt = attempt+1)
        else:
            return None
    else:
        return req.headers["Location"][len("https://de.serlo.org"):]
    
get_alias(16174)

'/mathe/16174/grundrechenarten-und-rechengesetze'

In [7]:
@cached
def get_page_views(uuid_id):
    alias = get_alias(uuid_id)
    
    if alias == None:
        return {"pageviews": 0, "visitors": 0}
    
    req = requests.get("https://simpleanalytics.com/de.serlo.org.json", params={
        "version": "5",
        "fields": "pageviews,visitors",
        "page": alias
    }, headers = {
        "Content-Type": "json",
        "Api-Key": API_KEY
    })
    
    return req.json()

get_page_views(16174)

{'__ok': 'INFO: With this boolean you can quickly check if you can expect a response',
 'ok': True,
 'docs': 'https://docs.simpleanalytics.com/api',
 '__info': "INFO: Disable these info fields by adding 'info=false' to the URL.",
 'info': True,
 'hostname': 'de.serlo.org',
 'url': 'https://de.serlo.org',
 '__path': 'INFO: You are using a path in the URLcounts are relative to only that path.',
 'path': '/mathe/16174/grundrechenarten-und-rechengesetze',
 'start': '2021-06-13T22:00:00.000Z',
 'end': '2021-07-14T22:59:59.999Z',
 '__version': "INFO: API version. You are using the latest version. You can specify versions by added 'version=5' to your URL (valid versions: 1, 2, 3, 4, 5)",
 'version': 5,
 '__timezone': 'INFO: You can change your timezone in your website settings',
 'timezone': 'Europe/Berlin',
 '__pageviews': 'INFO: Just a total of all pageviews',
 'pageviews': 10,
 '__visitors': 'INFO: Just a total of all visitors',
 'visitors': 7,
 '__generated_in_ms': 'INFO: Time needed to g

In [8]:
def iter_all_uuids(taxonomy_id):
    yield taxonomy_id
    yield from get_entity_children(taxonomy_id)
    
    for child in get_taxonomy_children(taxonomy_id):
        yield from iter_all_uuids(child)
    
list(iter_all_uuids(175157))

[175157,
 16174,
 2111,
 18685,
 1717,
 1993,
 107488,
 107903,
 121533,
 121535,
 129709,
 19336,
 1525,
 1677,
 1495,
 1871,
 2153,
 1507,
 1639,
 1633,
 1679,
 1671,
 21804,
 9189,
 9145,
 4181,
 4185,
 6607,
 6491,
 9161,
 9165,
 9173,
 9149,
 6857,
 6861,
 6865,
 6877,
 2217,
 7331,
 6881,
 6873,
 207224,
 209278,
 209282,
 209288,
 209337,
 218600,
 22251,
 9741,
 9549,
 9757,
 9743,
 9563,
 9157,
 9153,
 120853,
 120854,
 120860,
 208844,
 209276,
 22257,
 9627,
 13651,
 9641,
 10613,
 22258,
 9513,
 9045,
 6593,
 6487,
 10771,
 4969,
 6929,
 10543,
 10719,
 10775,
 4973,
 10039,
 120847,
 121178,
 121192,
 209656,
 209685,
 40879,
 4347,
 4349,
 6837,
 6813,
 7157,
 10323,
 8697,
 6833,
 6829,
 6825,
 6821,
 6817,
 175163,
 175166,
 175167]

In [9]:
@cached
def get_total(taxonomy_id):
    uuids = set(iter_all_uuids(taxonomy_id))
    views = [get_page_views(uuid_id) for uuid_id in uuids]
    
    return {
        "pageviews": sum([x["pageviews"] for x in views]),
        "visitors": sum([x["visitors"] for x in views]),
    }

get_total(175157)

{'pageviews': 4712, 'visitors': 2705}

# Auswertung

In [11]:
def show(taxonomy_id, max_depth=2, depth=0):
    line = ""
    
    if depth > 0:
        line += "  " * depth
        line += "|- "
        
    line += get_name(taxonomy_id)
    line += ": "
    
    total = get_total(taxonomy_id)
    
    line += str({
        "pageviews": total["pageviews"],
        "visitors": total["visitors"]
    })
    
    print(line)
    
    if depth <= max_depth:
        for child in get_taxonomy_children(taxonomy_id):
            show(child, depth=depth+1, max_depth=max_depth)
    
    
show(16157)
show(16259)

Realschule: {'pageviews': 210894, 'visitors': 166167}
  |- Klasse 5: {'pageviews': 25573, 'visitors': 19903}
    |- Ganze Zahlen: {'pageviews': 16, 'visitors': 0}
    |- GrÃ¶Ãen: {'pageviews': 10155, 'visitors': 9446}
      |- Aufgaben zu GrÃ¶Ãen und Einheiten: {'pageviews': 9, 'visitors': 0}
    |- Auswertung von Daten: {'pageviews': 148, 'visitors': 60}
      |- Aufgaben zu Daten und Zufallsexperimente: {'pageviews': 5, 'visitors': 1}
    |- Geometrische Grundvorstellungen und Grundbegriffe: {'pageviews': 9039, 'visitors': 6503}
      |- Aufgaben zu geometrischen Grundformen und Grundbegriffen: {'pageviews': 15, 'visitors': 0}
    |- Umfang und FlÃ¤cheninhalt ebener Figuren: {'pageviews': 4440, 'visitors': 3386}
    |- NatÃ¼rliche Zahlen: {'pageviews': 4712, 'visitors': 2705}
      |- Grundrechenarten und Rechengesetze: {'pageviews': 4617, 'visitors': 2705}
      |- NatÃ¼rliche Zahlen im Dezimalsystem: {'pageviews': 13, 'visitors': 0}
      |- Andere Zahlsysteme: {'pageviews': 3, '

Mittelschule: {'pageviews': 213785, 'visitors': 97801}
  |- Klasse 5: {'pageviews': 1561, 'visitors': 708}
    |- NatÃ¼rliche Zahlen: {'pageviews': 345, 'visitors': 4}
      |- Grundrechenarten im Bereich der natÃ¼rlichen Zahlen: {'pageviews': 87, 'visitors': 4}
      |- Der Zahlenraum Ã¼ber eine Milliarde hinaus: {'pageviews': 91, 'visitors': 0}
    |- Gleichungen und Formeln: {'pageviews': 95, 'visitors': 1}
      |- Aufgaben zu Termen und Gleichungen: {'pageviews': 33, 'visitors': 1}
    |- Ganze Zahlen: {'pageviews': 82, 'visitors': 30}
      |- Aufgaben zu Zahlenstrahl und Zahlengerade: {'pageviews': 17, 'visitors': 13}
      |- Aufgaben zur Addition und Subtraktion ganzer Zahlen: {'pageviews': 10, 'visitors': 6}
      |- Sachaufgaben zu ganzen Zahlen: {'pageviews': 10, 'visitors': 7}
      |- Aufgaben zum GrÃ¶Ãenvergeich ganzer Zahlen: {'pageviews': 8, 'visitors': 2}
    |- GrÃ¶Ãen im Alltag: {'pageviews': 34, 'visitors': 0}
    |- Geometrische Figuren und Lagebeziehungen: {'pa