In [47]:
from datetime import date, datetime
import pandas as pd
import matplotlib.pyplot as plt
import mysql.connector
pd.options.mode.chained_assignment = None

from IPython.display import display, Markdown, HTML


display(Markdown(f"### Letztes Update: {date.today()}"))

### Letztes Update: 2023-01-23

In [2]:
db = mysql.connector.connect(
    host="localhost",
    user="root",
    password="secret",
    port="3306",
    database="serlo",
    charset="latin1"
)

def cached(func):
    cache = dict()
    
    def return_func(arg):
        if (arg in cache):
            return cache[arg]
        else:
            result = func(arg)
            cache[arg] = result
            return result
    
    return return_func

def query(sql):
    c = db.cursor()
    c.execute(sql)
    
    return c.fetchall()

def querySingleton(sql):
    return [ x[0] for x in query(sql) ]

@cached
def getParent(termId):
    return querySingleton("""
        select parent_id from term_taxonomy where id = %s;
    """ % termId)[0]

def getTermName(termId):
    return querySingleton("""
        select term.name from term_taxonomy
        join term on term.id = term_taxonomy.term_id
        where term_taxonomy.id = %s;
    """ % termId)[0]

@cached
def getSubject(termId):
    if int(termId) in [79733, 81317, 20852, 87814, 87827, 85477, 87860, 75049, 76750, 87496, 75678, 91252, 91253]:
        return "Prüfungsbereich Mathematik"
    if int(termId) in [106082]:
        return getTermName(termId)
    
    parent = getParent(termId)
    grandparent = getParent(parent)
    
    if (parent == 106081):
        return getTermName(termId)
    
    return getSubject(parent) if grandparent != None else getTermName(termId)

@cached
def getSubjectFromUuid(uuid):
    taxonomyTerms = querySingleton(f"""
        select term_taxonomy_id from term_taxonomy_entity
        where term_taxonomy_entity.entity_id  = {uuid};
    """)
    
    if len(taxonomyTerms) > 0:
        return getSubject(taxonomyTerms[0])

    parents = querySingleton(f"""
        select parent_id from entity_link
        where entity_link.child_id  = {uuid};
    """)
    
    if len(parents) > 0:
        return getSubjectFromUuid(parents[0])
    
    return None

In [3]:
import json

json.dumps(querySingleton("""
select distinct(entity_link.parent_id ) from event_log join entity_link on entity_link.child_id = event_log.uuid_id where event_log.event_id = 4 and event_log.date > Date("2020-02-01");
"""));

In [4]:
def read_event_log():
    df = pd.read_sql("""
        select event_log.id, event_log.actor_id, event_log.date, user.username, event_parameter_uuid.uuid_id from event_log
        join user on user.id = event_log.actor_id
        join event_parameter on event_parameter.log_id = event_log.id
        join event_parameter_uuid on event_parameter_uuid.event_parameter_id = event_parameter.id
        where event_log.event_id = 5
        and year(event_log.date) > 2018
        and user.username != "Legacy"
    """, db)
    df.set_index("id", inplace=True)
    df.rename(columns={"uuid_id": "uuid"}, inplace=True)
    df["subject"] = df["uuid"].map(getSubjectFromUuid)
    return df

event_log = read_event_log()
event_log.head(100)

Unnamed: 0_level_0,actor_id,date,username,uuid,subject
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
252170,95850,2019-01-02 10:35:03,Leo1,127338,Chemie
252172,95850,2019-01-02 10:58:42,Leo1,127338,Chemie
252180,95849,2019-01-02 11:46:55,hernlmax,63496,Chemie
252185,95849,2019-01-02 11:51:49,hernlmax,127428,Chemie
252187,95849,2019-01-02 11:52:47,hernlmax,127428,Chemie
...,...,...,...,...,...
252503,121732,2019-01-04 16:48:53,Jonathan,127576,Mathe
252505,121732,2019-01-04 16:49:01,Jonathan,127602,Mathe
252510,95854,2019-01-04 16:50:43,markus_janker,127600,Chemie
252512,95854,2019-01-04 16:57:43,markus_janker,127600,Chemie


# Bearbeitungen am 08.01.

In [54]:
event_log['date'] = pd.to_datetime(event_log['date']).dt.date

In [62]:
date_object_0801 = datetime.strptime("2023-01-08", '%Y-%m-%d').date()
df_0801 = event_log[event_log["date"] == date_object_0801]

In [63]:
df_0801 = df_0801.groupby(by = ['subject', 'username'], as_index = False).count()
df_0801.drop(['date', 'actor_id'], axis=1, inplace=True)  
df_0801 = df_0801.rename(columns={"uuid": "edits"})
df_0801

Unnamed: 0,subject,username,edits
0,Community,Tina_B,3
1,Informatik,hwlang,6
2,Lerntipps,Felix_Eccardt,14
3,Lerntipps,Nadine_Gabel,1
4,Mathe,Felix_Eccardt,2
5,Mathe,Kowalsky,44
6,Physik,uebermario,1


# Bearbeitungen am 15.01.

In [64]:
date_object_1501 = datetime.strptime("2023-01-15", '%Y-%m-%d').date()
df_1501 = event_log[event_log["date"] == date_object_1501]

In [65]:
df_1501 = df_1501.groupby(by = ['subject', 'username'], as_index = False).count()
df_1501.drop(['date', 'actor_id'], axis=1, inplace=True)  
df_1501 = df_1501.rename(columns={"uuid": "edits"})
df_1501

Unnamed: 0,subject,username,edits
0,Deutsch,raha,1
1,Informatik,hwlang,1
2,Latein,raha,1
3,Mathe,KevinH,1
4,Mathe,Kowalsky,3
5,Mathe,Peter,2
6,Testbereich!,Kowalsky,8
7,Testbereich!,LinaMaria,2
