In [12]:
import pandas as pd
import mysql.connector

from IPython.display import display, Markdown, HTML

db = mysql.connector.connect(
    host="localhost",
    user="root",
    password="secret",
    port="3306",
    database="serlo",
    charset="latin1"
)

def cached(func):
    cache = dict()
    
    def return_func(arg):
        if (arg in cache):
            return cache[arg]
        else:
            result = func(arg)
            cache[arg] = result
            return result
    
    return return_func

def query(sql):
    c = db.cursor()
    c.execute(sql)
    
    return c.fetchall()

def querySingleton(sql):
    return [ x[0] for x in query(sql) ]

@cached
def getParent(termId):
    return querySingleton("""
        select parent_id from term_taxonomy where id = %s;
    """ % termId)[0]

def getTermName(termId):
    return querySingleton("""
        select term.name from term_taxonomy
        join term on term.id = term_taxonomy.term_id
        where term_taxonomy.id = %s;
    """ % termId)[0]

@cached
def getSubject(termId):
    if int(termId) in [79733, 81317, 20852, 87814, 87827, 85477, 87860, 75049, 76750, 87496, 75678, 91252, 91253]:
        return "Prüfungsbereich Mathematik"
    if int(termId) in [106082]:
        return getTermName(termId)
    
    parent = getParent(termId)
    grandparent = getParent(parent)
    
    if (parent == 106081):
        return getTermName(termId)
    
    return getSubject(parent) if grandparent != None else getTermName(termId)

@cached
def getSubjectFromUuid(uuid):
    taxonomyTerms = querySingleton(f"""
        select term_taxonomy_id from term_taxonomy_entity
        where term_taxonomy_entity.entity_id  = {uuid};
    """)
    
    if len(taxonomyTerms) > 0:
        return getSubject(taxonomyTerms[0])

    parents = querySingleton(f"""
        select parent_id from entity_link
        where entity_link.child_id  = {uuid};
    """)
    
    if len(parents) > 0:
        return getSubjectFromUuid(parents[0])
    
    return None

#display(getSubjectFromUuid(127338))
#display(getSubjectFromUuid(63496))
#display(getSubjectFromUuid(1))
#display(getSubjectFromUuid(170741))
#display(getSubjectFromUuid(167497))

In [13]:
import json

json.dumps(querySingleton("""
select distinct(entity_link.parent_id ) from event_log join entity_link on entity_link.child_id = event_log.uuid_id where event_log.event_id = 4 and event_log.date > Date("2020-02-01");
"""));

**Entity Types:**
id | name                       <br>
+----+-----------------------------<br>
|  1 | taxonomy/term/associate     <br>
|  2 | taxonomy/term/create        <br>
|  3 | license/object/set          <br>
|  4 | entity/create               <br>
|  5 | entity/revision/add         <br>
|  6 | entity/revision/checkout   <br>
|  7 | entity/link/create          <br>
|  8 | discussion/create           <br>
|  9 | discussion/comment/create  <br>
| 10 | uuid/trash                  <br>
| 11 | entity/revision/reject      <br>
| 12 | taxonomy/term/update        <br>
| 13 | uuid/restore                <br>
| 14 | discussion/comment/archive <br>
| 15 | taxonomy/term/parent/change <br>
| 16 | discussion/restore          <br>
| 17 | taxonomy/term/dissociate    <br>
| 18 | entity/link/remove          <br>      


**Für diese Analyse ausgewählt:**<br>
  1 | taxonomy/term/associate<br>
  2 | taxonomy/term/create<br>
  6 | entity/revision/checkout<br>
 11 | entity/revision/reject<br>
 12 | taxonomy/term/update<br>
 13 | uuid/restore<br>
 15 | taxonomy/term/parent/change<br>
 16 | discussion/restore<br>
 17 | taxonomy/term/dissociate<br> 

In [14]:
def read_event_log():
    df = pd.read_sql("""
        select event_log.id, event_log.actor_id, event_log.date, user.username, event_parameter_uuid.uuid_id from event_log
        join user on user.id = event_log.actor_id
        join event_parameter on event_parameter.log_id = event_log.id
        join event_parameter_uuid on event_parameter_uuid.event_parameter_id = event_parameter.id
        where event_log.event_id = 1 
            or event_log.event_id = 2 
            or event_log.event_id = 6 
            or event_log.event_id = 11
            or event_log.event_id = 12
            or event_log.event_id = 13 
            or event_log.event_id = 15
            or event_log.event_id = 16
            or event_log.event_id = 17
        and year(event_log.date) > 2018
        and user.username != "Legacy"
    """, db)
    df.set_index("id", inplace=True)
    df.rename(columns={"uuid_id": "uuid"}, inplace=True)
    df["subject"] = df["uuid"].map(getSubjectFromUuid)
    return df

event_log = read_event_log()
event_log.head(100)

Unnamed: 0_level_0,actor_id,date,username,uuid,subject
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,6,2014-03-01 20:36:33,Legacy,1199,
2,6,2014-03-01 20:36:34,Legacy,1200,
3,6,2014-03-01 20:36:34,Legacy,1201,
4,6,2014-03-01 20:36:34,Legacy,1202,
5,6,2014-03-01 20:36:34,Legacy,1203,
...,...,...,...,...,...
316,6,2014-03-01 20:37:02,Legacy,1501,Mathe
319,6,2014-03-01 20:37:02,Legacy,1503,Mathe
321,6,2014-03-01 20:37:08,Legacy,1503,Mathe
324,6,2014-03-01 20:37:08,Legacy,1505,Mathe


In [15]:
event_log.username.unique()

array(['Legacy', 'arekkas', 'inyono', 'blacksleet', 'bunk', 'RobinKibart',
       'Lena09', 'Hannes', 'Felix', 'wasmeier', 'haberlm', 'franzi',
       'AK1701', 'Simon', 'Renate', 'devuser', 'motto1', 'Manuel_Voigt',
       'Tinsaye', 'wolfgang', 'Annette', 'jkempff', 'Laura', 'roehren',
       'tinsayeab', 'Martin230', 'HokkaidokÃ\x83Â¼rbis', 'hjiang',
       'Agnes', 'hfreye', 'Chembe', 'jakob', 'Kati_L', 'porks', 'ME',
       'almalla1', 'Genie', 'tornowm', 'Kulla', 'NiCi', 'ClaF', 'Gent',
       'tacurran', 'Monika', 'Katha26', 'mats', 'daFischer', 'philer',
       'Nish', 'Nessa', 'LorenzHuber', 'Amadeus', 'philipp_gadow',
       'Henry', 'CutterSlade', 'Petz', 'manu89', 'horvatkevin', 'Petrus',
       'BLachner', '-anna-', 'irgendwer', 'Disputation', 'anja',
       'SerloMZL', 'jobheld', 'Emma', 'flowfx', 'arbaro', 'sportler85',
       'basser51', 'LILLIA12', 'elapinae', 'Pi-rat', 'edeljor', 'botho',
       'solva', 'deutschonline', 'Friedemann', 'renade', 'Mononoke',
       'Win

In [16]:
days = 90
edits = 10
cutoff_date = pd.Timestamp.today() - pd.Timedelta(days=days)
#print (cutoff_date)

df1 = event_log[event_log['date'] > cutoff_date]
#Delete User "Legacy"
df1.drop(df1[df1.actor_id == 6].index, inplace=True)
df2 = df1.groupby(by = ['subject', 'actor_id'], as_index = False).count()
df3 = df2[df2['uuid'] >= edits]
df3 = df3.drop(['date', 'username', 'uuid'], axis = 1)
df4 = df3.groupby(by = ['subject']).count()
active_reviewers_df = df4.rename(columns={"actor_id": "active_reviewers"})
active_reviewers_df = active_reviewers_df.sort_values(by = ['active_reviewers'], ascending=False)
display(Markdown(f"### Anzahl aktiver Reviewer:Innnen pro Fach"))
active_reviewers_df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


### Anzahl aktiver Reviewer:Innnen pro Fach

Unnamed: 0_level_0,active_reviewers
subject,Unnamed: 1_level_1
Mathe,9
Sandkasten,9
Biologie,4
Nachhaltigkeit,4
Englisch,3
Prüfungsbereich Mathematik,3
Chemie,2
Informatik,2
Community,1
Deutsch als Fremdsprache,1
