In [1]:
import pandas as pd
import mysql.connector

from IPython.display import display, Markdown, HTML

db = mysql.connector.connect(
    host="localhost",
    user="root",
    password="secret",
    port="3306",
    database="serlo"
)

def cached(func):
    cache = dict()
    
    def return_func(arg):
        if (arg in cache):
            return cache[arg]
        else:
            result = func(arg)
            cache[arg] = result
            return result
    
    return return_func

def query(sql):
    c = db.cursor()
    c.execute(sql)
    
    return c.fetchall()

def querySingleton(sql):
    return [ x[0] for x in query(sql) ]

@cached
def getParent(termId):
    return querySingleton("""
        select parent_id from term_taxonomy where id = %s;
    """ % termId)[0]

def getTermName(termId):
    return querySingleton("""
        select term.name from term_taxonomy
        join term on term.id = term_taxonomy.term_id
        where term_taxonomy.id = %s;
    """ % termId)[0]

@cached
def getSubject(termId):
    if int(termId) in [79733, 81317, 20852, 87814, 87827, 85477, 87860, 75049, 76750, 87496, 75678, 91252, 91253]:
        return "Prüfungsbereich Mathematik"
    if int(termId) in [106082]:
        return getTermName(termId)
    
    parent = getParent(termId)
    grandparent = getParent(parent)
    
    if (parent == 106081):
        return getTermName(termId)
    
    return getSubject(parent) if grandparent != None else getTermName(termId)

@cached
def getSubjectFromUuid(uuid):
    taxonomyTerms = querySingleton(f"""
        select term_taxonomy_id from term_taxonomy_entity
        where term_taxonomy_entity.entity_id  = {uuid};
    """)
    
    if len(taxonomyTerms) > 0:
        return getSubject(taxonomyTerms[0])

    parents = querySingleton(f"""
        select parent_id from entity_link
        where entity_link.child_id  = {uuid};
    """)
    
    if len(parents) > 0:
        return getSubjectFromUuid(parents[0])
    
    return None




In [2]:
df = pd.read_sql("""
    select entity_link.parent_id, entity_link.child_id, entity_link.type_id, uuid.id, uuid.discriminator, type.name from uuid
        left join entity_link on uuid.id = entity_link.parent_id
        join entity on entity.id = uuid.id
        join type on type.id = entity.type_id
    where uuid.discriminator = 'entity' and trashed = 0  and type.name = "text-exercise" or type.name = "grouped-exercise";
    """, db)

In [13]:
df["subject"] = df["id"].map(getSubjectFromUuid)

In [14]:
df.head()

Unnamed: 0,parent_id,child_id,type_id,id,discriminator,name,subject
0,2327.0,2329.0,9.0,2327,entity,text-exercise,Mathe
1,2365.0,2367.0,9.0,2365,entity,text-exercise,Mathe
2,2369.0,2371.0,9.0,2369,entity,text-exercise,Mathe
3,2585.0,2587.0,9.0,2585,entity,text-exercise,Mathe
4,2589.0,2591.0,9.0,2589,entity,text-exercise,Mathe


In [15]:
no_solution_df = df[df.isnull().any(axis=1)]

In [35]:
display(Markdown(f"### Liste der Ids ohne Lösungen"))
pd.set_option('display.max_rows', no_solution_df.shape[0]+1)
no_solution_df[["id", "name", "subject"]]

### Liste der Ids ohne Lösungen

Unnamed: 0,id,name,subject
546,9795,text-exercise,Mathe
641,11679,text-exercise,Mathe
651,11991,text-exercise,Mathe
680,13281,text-exercise,Mathe
681,13295,text-exercise,Mathe
716,13931,text-exercise,Mathe
717,13935,text-exercise,Mathe
745,14565,text-exercise,Mathe
746,14567,text-exercise,Mathe
747,14569,text-exercise,Mathe


In [37]:
display(Markdown(f"### Gesamte Anzahl der Ids ohne Lösungen"))
result = no_solution_df.groupby('subject').count()
result.id.sum()

### Gesamte Anzahl der Ids ohne Lösungen

987

In [36]:
display(Markdown(f"### Gruppierte Anzahl der Ids ohne Lösungen nach Fach"))
result["id"].sort_values(ascending=False)

### Gruppierte Anzahl der Ids ohne Lösungen nach Fach

subject
Mathe                                          463
Prüfungsbereich Mathematik                     203
Sandkasten                                      95
à®‡à®²à®•à¯à®•à®£à®®à¯                        46
Chemie                                          39
Nachhaltigkeit                                  24
Informatik                                      18
Deutsch als Fremdsprache                        17
Community                                       16
Biologie                                        14
Wirtschaft                                      12
Englisch                                        12
Physik                                           6
Medienbildung                                    5
Geographie                                       3
MatemÃ¡ticas                                     3
Deutsch als Zweit- und Fremdsprache              2
mathÃ©matiques                                   2
Testbereich                                      1
à®µà®°à®²à®¾à®±à¯     