In [1]:
from datetime import date
from sqlalchemy import create_engine
import pandas as pd
import mysql.connector

pd.options.mode.chained_assignment = None

from IPython.display import display, Markdown, HTML

display(Markdown(f"# Waisen Entitäten"))

# Waisen Entitäten

In [2]:
engine = create_engine("mysql+mysqlconnector://root:secret@localhost:3306/serlo")

connection = engine.raw_connection()

In [3]:
def get_orphan_entities():
    orphan_entities_from_taxonomies = pd.read_sql(
        """
        SELECT entity.id, type.name AS type, trashed FROM entity
            JOIN uuid ON uuid.id = entity.id
            JOIN type ON type.id = type_id
            WHERE entity.id NOT IN (
                SELECT DISTINCT(entity_id) FROM term_taxonomy_entity
            ) 
            AND type_id IN (3, 4, 1, 6, 7, 40, 49, 50)
        """,
        con=engine,
    )

    orphan_entities_from_entities = pd.read_sql(
        """
        SELECT entity.id, type.name AS type, trashed FROM entity
            JOIN uuid ON uuid.id = entity.id
            JOIN type ON type.id = type_id
            WHERE entity.id NOT IN (
                SELECT DISTINCT(child_id) FROM entity_link
            ) 
            AND type_id IN (5, 2, 8, 41, 42, 44, 43, 45, 47, 46)
        """,
        con=engine,
    )

    orphan_entities = pd.concat(
        [orphan_entities_from_entities, orphan_entities_from_taxonomies]
    )

    orphan_entities.index += 1
    return orphan_entities


display(Markdown(f"# Die folgenden Entitäten sind bereits jetzt ohne Eltern "))

orphan_entities_before = get_orphan_entities()
orphan_entities_before

# Die folgenden Entitäten sind bereits jetzt ohne Eltern 

Unnamed: 0,id,type,trashed
1,39391,text-exercise,0
2,48117,text-exercise,0
3,48130,text-exercise,0
4,57671,text-exercise,0
5,24976,article,0
6,27536,article,0
7,41720,article,0
8,48116,article,0
9,64118,article,0
10,78675,article,0


In [4]:
cursor = connection.cursor()


def get_number_of_uuids():
    cursor.execute(
        f"""
        SELECT count(*)
            FROM uuid
        """
    )
    return cursor.fetchall()[0][0]


display(Markdown(f"# Aktuelle Zahl an Inhalten: {get_number_of_uuids()} "))

# Aktuelle Zahl an Inhalten: 276825 

In [5]:
from datetime import datetime
from dateutil.relativedelta import relativedelta

one_year_ago = datetime.now() - relativedelta(years=1)

# See https://github.com/serlo/db-migrations/blob/main/src/20231005150000-delete-uuids-trashed-more-than-1-year-ago.ts
cursor.execute(
    f"""
    SELECT uuid_id
        FROM event_log, uuid
        WHERE uuid.id = event_log.uuid_id
        AND event_log.date < %s
        AND event_log.event_id = 10
        AND uuid.trashed = 1
    """,
    (one_year_ago.isoformat(),),
)

entities_to_be_deleted = cursor.fetchall()
entities_to_be_deleted = tuple([entity[0] for entity in entities_to_be_deleted])

if len(entities_to_be_deleted) > 0:
    cursor.execute(
        f"""
        DELETE FROM uuid 
        WHERE id IN {str(entities_to_be_deleted)}
        """,
    )

display(
    Markdown(
        f"# Zahl an Inhalten nach dem Löschen von denen, die länger als ein Jahr getrashed wurden: {get_number_of_uuids()} "
    )
)

orphan_entities_after = get_orphan_entities()

display(Markdown("# Die Entitäten ohne Eltern sind dieselben wie vor dem Löschen"))

orphan_entities_before.compare(orphan_entities_after)

# Zahl an Inhalten nach dem Löschen von denen, die länger als ein Jahr getrashed wurden: 273085 

# Die Entitäten ohne Eltern sind dieselben wie vor dem Löschen