In [1]:
from sqlalchemy import create_engine
from sqlalchemy import text

engine = create_engine(
    "mysql+mysqlconnector://root:secret@localhost:3306/serlo?charset=latin1"
)

In [3]:
def query(statement):
    with engine.connect() as connection:
        return list(connection.execute(text(statement)))
    
query("""
    select id, trashed from uuid limit 5;
""")

[(1, 0), (2, 0), (3, 0), (4, 0), (5, 0)]

# Taxonomy descriptions

In [14]:
import json

def is_edtrio(description):
    try:
        content = json.loads(description)
        
        return isinstance(content, dict) and "plugin" in content and isinstance(content["plugin"], str) and len(content["plugin"]) > 0
    except json.JSONDecodeError:
        return False
    
print(is_edtrio('{"plugin": "foo"}'))
print(is_edtrio('{"plugin": 2}'))
print(is_edtrio('[{"plugin": "foo"}]'))
print(is_edtrio('{"plugin": "foo"'))

True
False
False
False


In [11]:
import json

def is_legacy(description):
    try:
        content = json.loads(description)
        
        return isinstance(content, list)
    except json.JSONDecodeError:
        return False
    
print(is_legacy('[]'))
print(is_legacy('{"plugin": 2}'))
print(is_legacy('{"plugin": "foo"'))

True
False
False


In [7]:
taxonomies = query("""
    select id, description from term_taxonomy
""")
legacy_taxonomies = query("""
    select id, description from term_taxonomy where description LIKE '[%' 
""")

not_edtrio_taxonomies = [t for t in taxonomies if t[1] is not None and not is_edtrio(t[1])]

print(f"Number of all taxonomies: {len(taxonomies)}")
print(f"Number of not edtrio taxonomies: {len(not_edtrio_taxonomies)}")
print(f"Number of legacy taxonomies: {len(legacy_taxonomies)}")

Number of all taxonomies: 5340
Number of not edtrio taxonomies: 2845
Number of legacy taxonomies: 724


In [8]:
legacy_taxonomy_ids = set(t[0] for t in legacy_taxonomies)
not_edtrio_taxonomies_ids = set(t[0] for t in not_edtrio_taxonomies)

print(f"""Are all taxonomy descriptions starting with '[' a subset
of all taxonomy description which are not in the current edtr-io format?
  -> {legacy_taxonomy_ids <= not_edtrio_taxonomies_ids}""")

Are all taxonomy descriptions starting with '[' a subset
of all taxonomy description which are not in the current edtr-io format?
  -> True


```
for t in taxonomies:
    description = t[1]
    
    if description is not None and description != "" and not is_edtrio(description) and not is_legacy(description):
        print(t)
```

In [15]:
import json

def is_edtrio(description):
    if description is None or description == "":
        return False
    
    try:
        content = json.loads(description)
        
        return isinstance(content, dict) and "plugin" in content and isinstance(content["plugin"], str) and len(content["plugin"]) > 0
    except json.JSONDecodeError:
        return False
        
def is_legacy(description):
    if description is None or description == "":
        return False
    
    try:
        content = json.loads(description)
        
        return isinstance(content, list)
    except json.JSONDecodeError:
        return False
    
def is_json(description):
    if description is None or description == "":
        return False
    
    try:
        content = json.loads(description)
        
        return True
    except json.JSONDecodeError:
        return False
    
def is_markdown(description):
    return description is not None and description != "" and not is_json(description)

In [20]:
def is_json_legacy_or_edtrio(descriptions):
    try:
        print(next(d for d in descriptions if is_json(d) and not is_legacy(d) and not is_edtrio(d)))
    except StopIteration:
        print("yeah, that's good")

is_json_legacy_or_edtrio(t[1] for t in taxonomies)

yeah, that's good


## List of taxonomy descriptions which also need to be converted

```
for t in not_edtrio_taxonomies_ids - legacy_taxonomy_ids:
    print(f"serlo.org/{t}")
```

# Welche revision fields müssen verändert werden?

In [22]:
query("""describe entity_revision_field;""")

[('id', b'int', 'NO', 'PRI', None, 'auto_increment'),
 ('field', b'varchar(255)', 'NO', 'PRI', None, ''),
 ('entity_revision_id', b'bigint', 'NO', 'MUL', None, ''),
 ('value', b'longtext', 'NO', 'MUL', None, '')]

```
mysql> select distinct type.name, entity_revision_field.field from type join entity on type.id = entity.type_id join entity_revision on entity.current_revision_id = entity_revision.id join entity_revision_field on entity_revision_field.entity_revision_id = entity_revision.id where value like '{%}' or value like '[%]';
+-----------------------------------------+-------------+
| name                                    | field       |
+-----------------------------------------+-------------+
| article                                 | content     |
| article                                 | reasoning   |
| text-exercise-group                     | content     |
| grouped-text-exercise                   | content     |
| text-solution                           | content     |
| text-exercise                           | content     |
| text-solution                           | hint        |
| course                                  | description |
| course-page                             | content     |
| video                                   | description |
| video                                   | reasoning   |
| course                                  | reasoning   |
| math-puzzle                             | content     |
| single-choice-right-answer              | content     |
| single-choice-right-answer              | feedback    |
| single-choice-wrong-answer              | content     |
| single-choice-wrong-answer              | feedback    |
| input-number-exact-match-challenge      | feedback    |
| input-string-normalized-match-challenge | feedback    |
| input-expression-equal-match-challenge  | feedback    |
| multiple-choice-right-answer            | content     |
| multiple-choice-wrong-answer            | content     |
| multiple-choice-wrong-answer            | feedback    |
| applet                                  | content     |
| applet                                  | reasoning   |
| event                                   | content     |
| input-string-normalized-match-challenge | solution    |
+-----------------------------------------+-------------+
27 rows in set (1.25 sec)
```

```
mysql> select distinct field from entity_revision_field where value like '[%]' or value like '{%}';
+-------------+
| field       |
+-------------+
| content     |
| hint        |
| reasoning   |
| description |
| feedback    |
| solution    |
+-------------+
6 rows in set (3.05 sec)
```