In [1]:
from datetime import date
import pandas as pd
import mysql.connector
pd.options.mode.chained_assignment = None

from IPython.display import display, Markdown, HTML

pd.set_option('display.max_rows', 100)



In [2]:
db = mysql.connector.connect(
    host="localhost",
    user="root",
    password="secret",
    port="3306",
    database="serlo",
    charset="latin1"
)

def cached(func):
    cache = dict()
    
    def return_func(arg):
        if (arg in cache):
            return cache[arg]
        else:
            result = func(arg)
            cache[arg] = result
            return result
    
    return return_func

def query(sql):
    c = db.cursor()
    c.execute(sql)
    
    return c.fetchall()

def querySingleton(sql):
    return [ x[0] for x in query(sql) ]

@cached
def getParent(termId):
    return querySingleton("""
        select parent_id from term_taxonomy where id = %s;
    """ % termId)[0]

def getTermName(termId):
    return querySingleton("""
        select term.name from term_taxonomy
        join term on term.id = term_taxonomy.term_id
        where term_taxonomy.id = %s;
    """ % termId)[0]

@cached
def getSubject(termId):
    if int(termId) in [79733, 81317, 20852, 87814, 87827, 85477, 87860, 75049, 76750, 87496, 75678, 91252, 91253]:
        return "Prüfungsbereich Mathematik"
    if int(termId) in [106082]:
        return getTermName(termId)
    
    parent = getParent(termId)
    grandparent = getParent(parent)
    
    if (parent == 106081):
        return getTermName(termId)
    
    return getSubject(parent) if grandparent != None else getTermName(termId)

@cached
def getSubjectFromUuid(uuid):
    taxonomyTerms = querySingleton(f"""
        select term_taxonomy_id from term_taxonomy_entity
        where term_taxonomy_entity.entity_id  = {uuid};
    """)
    
    if len(taxonomyTerms) > 0:
        return getSubject(taxonomyTerms[0])

    parents = querySingleton(f"""
        select parent_id from entity_link
        where entity_link.child_id  = {uuid};
    """)
    
    if len(parents) > 0:
        return getSubjectFromUuid(parents[0])
    
    return None


In [3]:
import json

json.dumps(querySingleton("""
select distinct(entity_link.parent_id ) from event_log join entity_link on entity_link.child_id = event_log.uuid_id where event_log.event_id = 4 and event_log.date > Date("2020-02-01");
"""));

In [4]:
def read_event_log():
    df = pd.read_sql("""
        select event_log.id, event_log.actor_id, event_log.date, user.username, event_log.event_id, event.name, event_parameter_uuid.uuid_id from event_log
        join user on user.id = event_log.actor_id                    
        join event_parameter on event_parameter.log_id = event_log.id                            
        join event_parameter_uuid on event_parameter_uuid.event_parameter_id = event_parameter.id
        join event on event.id = event_log.event_id
        and event_log.date > "2022-01-01"
        and user.username != "Legacy";
    """, db)
    df.set_index("id", inplace=True)
    df.rename(columns={"uuid_id": "uuid"}, inplace=True)
    df["subject"] = df["uuid"].map(getSubjectFromUuid)
    return df

event_log = read_event_log()
event_log.head(10)

Unnamed: 0_level_0,actor_id,date,username,event_id,name,uuid,subject
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
444870,34105,2022-01-04 11:24:19,Knorrke,9,discussion/comment/create,234449,
444980,34105,2022-01-04 19:52:57,Knorrke,9,discussion/comment/create,234485,
444992,34105,2022-01-04 20:03:07,Knorrke,9,discussion/comment/create,234484,
445060,164877,2022-01-05 15:17:01,Marc_Ho,9,discussion/comment/create,234534,
445088,234578,2022-01-05 15:21:21,Outaspace,9,discussion/comment/create,234602,
445118,169563,2022-01-05 19:09:20,Peter,9,discussion/comment/create,234581,
445121,169563,2022-01-05 19:12:14,Peter,9,discussion/comment/create,234602,
446280,235229,2022-01-10 14:10:12,Strebbbiiiii,9,discussion/comment/create,235228,
446281,235210,2022-01-10 14:11:30,Destreber,9,discussion/comment/create,235228,
446282,235229,2022-01-10 14:12:45,Strebbbiiiii,9,discussion/comment/create,235228,


In [5]:
display(Markdown(f"### Aktueller Stand der Statistik: {date.today()}"))

### Aktueller Stand der Statistik: 2022-11-29

# 1. Top Bearbeitungen in 2022

In [6]:
edits_df = event_log[event_log["event_id"]== 5]
top_edits_df =  edits_df.groupby(by = ['actor_id', 'username'], as_index = False).count()
top_edits_df = top_edits_df.nlargest(100, 'uuid')
top_edits_df = top_edits_df.drop(['date', 'subject', 'event_id', 'name'], axis=1)
top_edits_df = top_edits_df.rename(columns={"uuid": "edits"})            

In [7]:
top_edits_df.reset_index()

Unnamed: 0,index,actor_id,username,edits
0,78,240298,Felix_Eccardt,4882
1,11,64900,Kowalsky,1789
2,38,148701,Sascha_Lill_95,1125
3,16,87602,Karin,1051
4,40,163773,LinaMaria,1038
5,43,169563,Peter,627
6,72,235184,korbi_d,545
7,50,197401,Annika-Hemlein,473
8,12,70932,metzgaria,450
9,14,73435,kathongi,449


# 2. Top Reviewer* Innen in 2022

In [8]:
reviews_df = event_log[(event_log["event_id"]== 6) |  (event_log["event_id"] == 11)]
top_reviews_df =  reviews_df.groupby(by = ['actor_id', 'username'], as_index = False).count()
top_reviews_df = top_reviews_df.nlargest(100, 'uuid')
top_reviews_df = top_reviews_df.drop(['date', 'subject', 'event_id', 'name'], axis=1)
top_reviews_df = top_reviews_df.rename(columns={"uuid": "reviews"})            

In [9]:
top_reviews_df.reset_index()

Unnamed: 0,index,actor_id,username,reviews
0,38,240298,Felix_Eccardt,4279
1,12,87602,Karin,2115
2,25,169563,Peter,1712
3,23,148701,Sascha_Lill_95,1542
4,8,64900,Kowalsky,1188
5,24,163773,LinaMaria,1085
6,9,70932,metzgaria,899
7,10,73435,kathongi,491
8,30,197401,Annika-Hemlein,470
9,29,196775,Corinna,368


# 3. Top Kommentator* Innen in 2022

In [10]:
comments_df = event_log[event_log["event_id"]== 9]
top_comments_df =  comments_df.groupby(by = ['actor_id', 'username'], as_index = False).count()
top_comments_df = top_comments_df.nlargest(100, 'uuid')
top_comments_df = top_comments_df.drop(['date', 'subject', 'event_id', 'name'], axis=1)
top_comments_df = top_comments_df.rename(columns={"uuid": "comments"})         

In [11]:
top_comments_df.reset_index()

Unnamed: 0,index,actor_id,username,comments
0,16,169563,Peter,127
1,5,64900,Kowalsky,45
2,7,73435,kathongi,20
3,0,266,Renate,12
4,11,146667,BerndF,8
5,6,70932,metzgaria,7
6,56,259265,DB_BR_88,4
7,4,34105,Knorrke,4
8,18,224659,bchij,3
9,15,164877,Marc_Ho,3


# 4. Top Architekt* Innen in 2022

In [12]:
architects_df = event_log[(event_log["event_id"]== 1) |  (event_log["event_id"] == 2) | (event_log["event_id"]== 12) |  (event_log["event_id"] == 15)|  (event_log["event_id"] == 17)]
top_architects_df =  architects_df.groupby(by = ['actor_id', 'username'], as_index = False).count()
top_architects_df = top_architects_df.nlargest(100, 'uuid')
top_architects_df = top_architects_df.drop(['date', 'subject', 'event_id', 'name'], axis=1)
top_architects_df = top_architects_df.rename(columns={"uuid": "taxonomy edits"})         

In [13]:
top_architects_df.reset_index()

Unnamed: 0,index,actor_id,username,taxonomy edits
0,51,240298,Felix_Eccardt,439
1,7,70932,metzgaria,316
2,10,87602,Karin,293
3,25,163773,LinaMaria,216
4,8,73435,kathongi,149
5,52,240311,Selin_11,142
6,6,64900,Kowalsky,110
7,23,148701,Sascha_Lill_95,99
8,38,225065,Habiba,88
9,55,241896,Adonis,53


# 5. Anzahl der Bearbeitungen in 2022

In [14]:
len(edits_df)

16478

# 6. Anzahl der Bearbeitungen pro Fach in 2022

In [15]:
edits_per_subject_df = edits_df.groupby(by = ["subject"], as_index = False).count()
edits_per_subject_df = edits_per_subject_df.drop(["actor_id", "date", "username", "event_id", "name"], axis = 1)
edits_per_subject_df = edits_per_subject_df.rename(columns = {"uuid": "Anzahl Bearbeitungen"})

In [16]:
edits_per_subject_df.sort_values(by = ["Anzahl Bearbeitungen"], ascending = False)

Unnamed: 0,subject,Anzahl Bearbeitungen
21,Mathe,7336
20,Math,1562
32,Testbereich!,1467
27,Prüfungsbereich Mathematik,1122
14,Informatik,779
24,Nachhaltigkeit,778
2,Community,507
19,MatemÃ¡ticas,354
0,Biologie,298
4,Deutsch als Fremdsprache,262
