In [1]:
import pandas as pd
import mysql.connector

from IPython.display import display, Markdown, HTML

db = mysql.connector.connect(
    host="localhost",
    user="root",
    password="secret",
    port="3306",
    database="serlo"
)



In [2]:
def cached(func):
    cache = dict()
    
    def return_func(arg):
        if (arg in cache):
            return cache[arg]
        else:
            result = func(arg)
            cache[arg] = result
            return result
    
    return return_func

def query(sql):
    c = db.cursor()
    c.execute(sql)
    
    return c.fetchall()

def querySingleton(sql):
    return [ x[0] for x in query(sql) ]

@cached
def getParent(termId):
    return querySingleton("""
        select parent_id from term_taxonomy where id = %s;
    """ % termId)[0]

def getTermName(termId):
    return querySingleton("""
        select term.name from term_taxonomy
        join term on term.id = term_taxonomy.term_id
        where term_taxonomy.id = %s;
    """ % termId)[0]

@cached
def getSubject(termId):
    if int(termId) in [79733, 81317, 20852, 87814, 87827, 85477, 87860, 75049, 76750, 87496, 75678, 91252, 91253]:
        return "Prüfungsbereich Mathematik"
    if int(termId) in [106082]:
        return getTermName(termId)
    
    parent = getParent(termId)
    grandparent = getParent(parent)
    
    if (parent == 106081):
        return getTermName(termId)
    
    return getSubject(parent) if grandparent != None else getTermName(termId)

@cached
def getSubjectFromUuid(uuid):
    taxonomyTerms = querySingleton(f"""
        select term_taxonomy_id from term_taxonomy_entity
        where term_taxonomy_entity.entity_id  = {uuid};
    """)
    
    if len(taxonomyTerms) > 0:
        return getSubject(taxonomyTerms[0])

    parents = querySingleton(f"""
        select parent_id from entity_link
        where entity_link.child_id  = {uuid};
    """)
    
    if len(parents) > 0:
        return getSubjectFromUuid(parents[0])
    
    return None


In [13]:
def read_event_log():
    df = pd.read_sql("""
        select event_log.id, event_log.actor_id, event_log.date, user.username, event_parameter_uuid.uuid_id, type.name from event_log
        join user on user.id = event_log.actor_id
        join event_parameter on event_parameter.log_id = event_log.id
        join event_parameter_uuid on event_parameter_uuid.event_parameter_id = event_parameter.id
        join entity on entity.id = event_parameter_uuid.uuid_id
        join type on type.id = entity.type_id
        where event_log.event_id = 5
        and year(event_log.date) > 2018
        and user.username != "Legacy"
    """, db)
    df.set_index("id", inplace=True)
    df.rename(columns={"uuid_id": "uuid", "name": "typename"}, inplace=True)
    df["subject"] = df["uuid"].map(getSubjectFromUuid)
    return df

event_log = read_event_log()
event_log.head(10)

Unnamed: 0_level_0,actor_id,date,username,uuid,typename,subject
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
304197,87602,2020-03-20 13:31:20,Karin,2327,text-exercise,Mathe
295900,73435,2020-02-24 16:15:57,kathongi,2365,text-exercise,Mathe
369636,191821,2021-01-25 18:57:07,koehlertim,2365,text-exercise,Mathe
323584,73435,2020-05-19 10:55:35,kathongi,2369,text-exercise,Mathe
354730,73435,2020-11-02 17:10:24,kathongi,2585,text-exercise,Mathe
323475,154641,2020-05-18 15:31:22,JulianWR,2589,text-exercise,Mathe
354986,73435,2020-11-04 10:35:13,kathongi,2593,text-exercise,Mathe
295904,73435,2020-02-24 16:18:29,kathongi,2823,text-exercise,Mathe
299517,150960,2020-03-05 12:28:46,pro100met,2845,text-exercise,Mathe
326497,73435,2020-06-05 11:56:48,kathongi,2867,text-exercise,Mathe


In [14]:
deleted_entity_list = querySingleton(f"""
        SELECT * FROM uuid WHERE discriminator = 'entity' AND trashed = 1;;
    """)

In [15]:
lerntipps_df = event_log[(event_log.subject == "Lerntipps")]
lerntipps_df.head()

Unnamed: 0_level_0,actor_id,date,username,uuid,typename,subject
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
309153,142379,2020-04-03 12:15:28,MarK97,119345,text-exercise,Lerntipps
309163,142379,2020-04-03 12:16:24,MarK97,119352,text-exercise,Lerntipps
309185,142379,2020-04-03 12:17:24,MarK97,119437,text-exercise,Lerntipps
309199,142379,2020-04-03 12:18:26,MarK97,119441,text-exercise,Lerntipps
255885,116723,2019-01-17 14:28:08,ThomasBrunner,119758,text-exercise,Lerntipps


In [16]:
days=90
cutoff_date = pd.Timestamp.today() - pd.Timedelta(days=days)
print (cutoff_date)

df1 = lerntipps_df[lerntipps_df['date'] > cutoff_date] 
display(Markdown(f"### Aktivitäten in den letzten 90 Tagen"))

2021-04-23 14:10:05.431483


### Aktivitäten in den letzten 90 Tagen

In [17]:
lerntipps_df.sort_values('date', ascending = False )['date'].head(10)

id
404800   2021-05-21 13:06:25
399127   2021-05-06 12:29:20
392223   2021-04-22 10:11:05
391466   2021-04-20 17:12:21
391465   2021-04-20 16:59:30
391459   2021-04-20 16:53:24
391457   2021-04-20 16:51:04
391455   2021-04-20 16:50:59
388138   2021-04-09 13:43:17
377277   2021-03-03 22:25:03
Name: date, dtype: datetime64[ns]

In [18]:
event_log.sort_values('date', ascending = False ).head(10)

Unnamed: 0_level_0,actor_id,date,username,uuid,typename,subject
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
419819,196775,2021-07-22 01:15:38,Corinna,214672,article,Nachhaltigkeit
419818,196775,2021-07-22 00:58:24,Corinna,116795,article,Nachhaltigkeit
419817,215665,2021-07-21 18:02:12,Carina_Faude,30056,text-solution,Mathe
419816,215665,2021-07-21 18:02:11,Carina_Faude,30044,text-exercise,Mathe
419815,215665,2021-07-21 17:30:52,Carina_Faude,220370,article,Deutsch
419814,211658,2021-07-21 17:17:49,MelMel,215858,text-solution,Prüfungsbereich Mathematik
419813,196775,2021-07-21 17:16:08,Corinna,203177,article,Nachhaltigkeit
419809,195449,2021-07-21 17:01:57,Mathes,220367,text-exercise,Sandkasten
419811,195449,2021-07-21 17:01:57,Mathes,220374,text-solution,Sandkasten
419798,195449,2021-07-21 16:25:54,Mathes,220365,text-exercise,Sandkasten


In [23]:
unique_entity_list = lerntipps_df['uuid'].unique().tolist()

In [24]:
deleted = set(unique_entity_list) & set(deleted_entity_list)

In [49]:
len(unique_entity_list) - len(deleted)

105

In [26]:
entity_type_list = lerntipps_df['typename'].unique()

In [28]:
entity_type_list

array(['text-exercise', 'text-solution', 'article', 'course',
       'course-page'], dtype=object)

In [33]:
unique_entity_df = lerntipps_df[['uuid', 'typename']]
unique_entity_df.head()

Unnamed: 0_level_0,uuid,typename
id,Unnamed: 1_level_1,Unnamed: 2_level_1
309153,119345,text-exercise
309163,119352,text-exercise
309185,119437,text-exercise
309199,119441,text-exercise
255885,119758,text-exercise


In [42]:
unique_entity_df = unique_entity_df.drop_duplicates(ignore_index = True)
unique_entity_df = unique_entity_df[~unique_entity_df['uuid'].isin(deleted)]

In [48]:
display(Markdown(f"### Gesamte Anzahl der Inhalte"))
len(unique_entity_df)

### Gesamte Anzahl der Inhalte

105

In [46]:
display(Markdown(f"### Typ der Inhalte mit Anzahl"))
unique_entity_df.groupby('typename').count()

### Typ der Inhalte mit Anzahl

Unnamed: 0_level_0,uuid
typename,Unnamed: 1_level_1
article,9
course,6
course-page,72
text-exercise,10
text-solution,8
