In [1]:
import pandas as pd
import mysql.connector

from IPython.display import display, Markdown, HTML

db = mysql.connector.connect(
    host="localhost",
    user="root",
    password="secret",
    port="3306",
    database="serlo"
)

def cached(func):
    cache = dict()
    
    def return_func(arg="__default"):
        if (arg in cache):
            return cache[arg]
        else:
            if arg == "__default":
                result = func()
            else:
                result = func(arg)
            cache[arg] = result
            return result
    
    return return_func

def query(sql):
    c = db.cursor()
    c.execute(sql)
    
    return c.fetchall()

def querySingleton(sql):
    return [ x[0] for x in query(sql) ]



In [2]:
@cached
def getParent(termId):
    return querySingleton("""
        select parent_id from term_taxonomy where id = %s;
    """ % termId)[0]

def getTermName(termId):
    return querySingleton("""
        select term.name from term_taxonomy
        join term on term.id = term_taxonomy.term_id
        where term_taxonomy.id = %s;
    """ % termId)[0]

@cached
def getSubject(termId):
    if int(termId) in [79733, 81317, 20852, 87814, 87827, 85477, 87860, 75049, 76750, 87496, 75678, 91252, 91253]:
        return "Prüfungsbereich Mathematik"
    if int(termId) in [106082]:
        return getTermName(termId)
    
    parent = getParent(termId)
    grandparent = getParent(parent)
    
    if (parent == 106081):
        return getTermName(termId)
    
    return getSubject(parent) if grandparent != None else getTermName(termId)

@cached
def getSubjectFromUuid(uuid):
    taxonomyTerms = querySingleton(f"""
        select term_taxonomy_id from term_taxonomy_entity
        where term_taxonomy_entity.entity_id  = {uuid};
    """)
    
    if len(taxonomyTerms) > 0:
        return getSubject(taxonomyTerms[0])

    parents = querySingleton(f"""
        select parent_id from entity_link
        where entity_link.child_id  = {uuid};
    """)
    
    if len(parents) > 0:
        return getSubjectFromUuid(parents[0])
    
    return None

@cached
def get_entities():
    return query("""
        select entity.id, entity_revision.author_id, user.username, entity_revision.date from entity 
        join entity_revision 
            on entity.current_revision_id = entity_revision.id 
        join user on user.id = entity_revision.author_id
            where entity_revision.date > '2021-05-31'
            and entity.current_revision_id IS NOT NULL limit 20;
    """)

get_entities()[0]
result_df = pd.DataFrame(get_entities())

# Read in User Data and Eventlog

In [7]:
user_df = pd.read_sql("""
        select id, username,date, logins from user where date > '2021-05-31';
    """, db)

In [8]:
# Liste aller neuen Nutzer*Innen, die seit Juni 2021 dazu gekommen sind
new_user_list = list(user_df.id.unique())

In [9]:
def read_event_log():
    df = pd.read_sql("""
        select event_log.id, event_log.actor_id, event_log.date, user.username, event_parameter_uuid.uuid_id from event_log
        join user on user.id = event_log.actor_id
        join event_parameter on event_parameter.log_id = event_log.id
        join event_parameter_uuid on event_parameter_uuid.event_parameter_id = event_parameter.id
        where event_log.event_id = 5
        and year(event_log.date) > 2018
        and user.username != "Legacy"
    """, db)
    df.set_index("id", inplace=True)
    df.rename(columns={"uuid_id": "uuid"}, inplace=True)
    df["subject"] = df["uuid"].map(getSubjectFromUuid)
    return df

event_log = read_event_log()
event_log.head(100)

Unnamed: 0_level_0,actor_id,date,username,uuid,subject
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
252170,95850,2019-01-02 10:35:03,Leo1,127338,Chemie
252172,95850,2019-01-02 10:58:42,Leo1,127338,Chemie
252180,95849,2019-01-02 11:46:55,hernlmax,63496,Chemie
252185,95849,2019-01-02 11:51:49,hernlmax,127428,Chemie
252187,95849,2019-01-02 11:52:47,hernlmax,127428,Chemie
...,...,...,...,...,...
252493,95854,2019-01-04 16:20:53,markus_janker,127596,Chemie
252498,121732,2019-01-04 16:29:27,Jonathan,127594,Mathe
252503,121732,2019-01-04 16:48:53,Jonathan,127576,Mathe
252505,121732,2019-01-04 16:49:01,Jonathan,127602,Mathe


## 1. Bearbeitete Inhalte seit 01.06. (Liste von Uuids)

In [54]:
# Alle Bearbeitungen seit Juni 
edits_df = event_log['2021-05-31' < event_log['date']] 
edits_list = list(edits_df.uuid.unique())
print(f"Anzahl bearbeiteter Inhalte: {len(edits_list)}")
print(edits_list)

Anzahl bearbeiteter Inhalte: 8140
[212969, 212956, 209911, 212984, 212986, 170411, 200268, 32357, 58148, 9805, 62609, 6521, 6523, 6525, 6527, 6529, 14395, 14415, 14455, 14419, 14459, 14431, 14427, 14411, 14447, 14451, 14423, 14435, 14443, 14463, 14391, 198175, 213036, 212976, 147713, 1601, 213046, 212771, 212766, 212769, 117669, 213081, 213086, 21439, 211859, 213105, 213107, 209254, 203177, 212396, 202602, 204793, 204795, 204797, 205768, 205770, 205166, 205168, 211704, 9461, 204791, 205156, 205058, 205063, 205065, 205067, 205180, 205182, 211708, 205191, 205193, 204892, 204893, 204894, 204895, 204896, 204897, 205084, 205206, 205208, 213139, 213172, 2213, 169072, 213202, 213226, 58771, 211912, 213256, 210119, 212940, 212030, 212032, 205458, 5119, 5121, 205760, 205762, 205767, 205769, 205128, 205130, 205132, 205134, 205663, 205731, 205733, 205687, 210061, 205665, 209964, 210136, 97763, 12327, 213334, 213337, 175218, 2137, 205129, 213368, 213372, 213376, 213380, 12549, 213383, 58152, 16957

# Berechnungen für Einordnung Inhalte Mittelschule

In [46]:
@cached
def get_name(taxonomy_term_id):
    return querySingleton(f"""
        select term.name from term_taxonomy
        join term on term.id = term_taxonomy.term_id
        where term_taxonomy.id = {taxonomy_term_id}
    """)

get_name(16259)

['Mittelschule']

In [62]:
@cached
def get_taxonomy_children(taxonomy_id):
    return querySingleton(f"""
        select term_taxonomy.id from term_taxonomy
        join uuid on uuid.id = term_taxonomy.id
        where term_taxonomy.parent_id = {taxonomy_id}
            and uuid.trashed = 0
        order by id
    """)

In [68]:
@cached
def get_entity_children(taxonomy_id):
    return querySingleton(f"""
        select term_taxonomy_entity.entity_id from term_taxonomy
        join term_taxonomy_entity on term_taxonomy_entity.term_taxonomy_id = term_taxonomy.id
        join uuid on uuid.id = term_taxonomy_entity.entity_id
        where term_taxonomy.id = {taxonomy_id}
        and uuid.trashed = 0
        order by term_taxonomy_entity.position
    """)

In [69]:
def iter_taxonomy_ids(taxonomy_id):
    yield taxonomy_id
    
    for child in get_taxonomy_children(taxonomy_id):
        yield from iter_taxonomy_ids(child)

taxonomy_ids = list(iter_taxonomy_ids(16259))
entity_ids = sum([ get_entity_children(x) for x in taxonomy_ids ], [])

## 2. Inhalte davon in Mittelschullehrplan eingebunden

In [73]:
#Welche der im Zeitraum bearbeiteten ids sind auch in der Liste der entity_ids von Mittelschule
mittelschule_list = set(edits_list) & set(entity_ids)
print(mittelschule_list)

{1541, 1543, 205324, 2063, 216592, 1555, 1557, 1561, 227365, 227374, 2101, 214583, 216635, 216637, 227394, 216647, 214613, 116824, 216665, 22110, 214625, 206442, 226933, 216694, 226935, 226936, 2171, 226942, 226943, 1667, 2191, 226964, 226970, 226980, 226986, 226991, 1717, 205498, 1753, 1755, 1763, 36069, 19699, 227063, 1785, 227086, 227087, 214802, 227095, 7981, 1841, 111923, 1843, 1847, 4409, 1849, 4411, 227132, 227134, 14657, 36162, 8001, 1863, 133457, 227155, 1885, 5985, 227194, 4995, 22405, 29577, 227212, 8081, 227220, 227231, 216995, 1957, 217008, 37826, 217028, 227278, 1999, 217039, 217054, 141791, 227297, 216575}


In [75]:
print(f"Anzahl der Inhalte in Mittelschule eingeordnet: {len(mittelschule_list)}")

Anzahl der Inhalte in Mittelschule eingeordnet: 87


## 3. Anzahl und Namen der AutorInnen, die seit Juni 2021 Bearbeitungen gemacht haben

In [61]:
#Liste aller AutorInnen, die Bearbeitungen seit Juni 2021 ausgeführt haben.
author_list = list(edits_df.actor_id.unique())
print(f"Anzahl der AutorInnen: {len(author_list)}")
#Namen der AutorInnen, die die Bearbeitungen ausgeführt haben
user_df[user_df['id'].isin(author_list)]

Anzahl der AutorInnen: 122


Unnamed: 0,id,username,date,logins
8,213072,LeonieTrautmann,2021-06-01 09:11:52,71
106,215665,Carina_Faude,2021-06-21 09:44:27,42
336,220175,annacarina,2021-07-20 11:33:51,2
393,221179,Natan,2021-07-29 12:46:38,1
460,222056,ABC19Z,2021-08-09 16:59:31,81
484,222299,acarolinabotelho,2021-08-13 00:25:07,4
606,223662,Vijay,2021-08-29 17:30:30,35
607,223665,Lavanya,2021-08-29 17:35:24,15
630,223881,senayildiz,2021-09-01 18:15:20,4
635,223895,tamara_proebster,2021-09-02 12:01:22,12


In [14]:
#Berechnung aktiver Autor*Innen von heute aus 
days = 90
edits = 10
result_df = pd.DataFrame()
week = 0
  
lower_date = pd.Timestamp.today() - pd.Timedelta(days = days + week*7)
upper_date = pd.Timestamp.today() - pd.Timedelta(days = week*7)
 
df1 = event_log[lower_date < event_log['date']] 
df2 = df1.groupby(by = ['actor_id'], as_index = False).count()
df3 = df2[df2['uuid'] >= edits]
df3 = df3.drop(['date', 'username', 'uuid'], axis = 1)

active_authors_df = df3.rename(columns={"actor_id": "active_authors"})
#active_authors_df = active_authors_df.sort_values(by = ['active_authors'], ascending=False)
#display(Markdown(f"### Anzahl aktiver Autor:Innnen pro Fach"))
editing_users_list = list(df2.actor_id)
active_authors_list = list(active_authors_df.active_authors)

In [50]:
new_editing_users_list = set(editing_users_list) & set(new_user_list)

In [49]:
new_active_authors_list = set(active_authors_list) & set(new_user_list)

## 4.1 Liste aller neuen NutzerInnen seit Juni 2021 mit mindestens einer Bearbeitung

In [19]:
user_df[user_df['id'].isin(new_editing_users_list)]

Unnamed: 0,id,username,date,logins
460,222056,ABC19Z,2021-08-09 16:59:31,81
635,223895,tamara_proebster,2021-09-02 12:01:22,12
740,224659,bchij,2021-09-15 17:39:52,7
786,225065,Habiba,2021-09-21 10:47:10,6
787,225067,Max_,2021-09-21 10:48:24,6
790,225081,vanessa2021,2021-09-21 15:16:35,9
815,225323,randomName23,2021-09-23 17:45:37,14
910,226308,Kaje,2021-10-04 14:55:24,5
976,226932,Flo123,2021-10-11 14:40:15,23
1235,230048,olivia__,2021-11-19 17:30:22,4


## 4.2 Liste aller neuen aktiven AutorInnen seit Juni 2021

In [18]:
user_df[user_df['id'].isin(new_active_authors_list)]

Unnamed: 0,id,username,date,logins
635,223895,tamara_proebster,2021-09-02 12:01:22,12
740,224659,bchij,2021-09-15 17:39:52,7
786,225065,Habiba,2021-09-21 10:47:10,6
910,226308,Kaje,2021-10-04 14:55:24,5
976,226932,Flo123,2021-10-11 14:40:15,23
1311,231271,RalfReinecke,2021-11-30 16:56:44,10
1419,232733,ThomasG,2021-12-13 19:52:19,2
1470,233549,LucaWellhausen,2021-12-21 10:55:25,8
1519,234394,uebermario,2022-01-03 11:19:35,2
1568,235184,korbi_d,2022-01-10 11:37:39,3
