In [2]:
from getpass import getpass

STAGING_PASSWORD=getpass("Staging MySQL Password: ")

Staging MySQL Password: ········


In [5]:
from sqlalchemy import create_engine
from sqlalchemy import text
from sqlalchemy.orm import sessionmaker

engine_remote = create_engine(
    f"mysql+mysqlconnector://serlo:{STAGING_PASSWORD}@localhost:7777/serlo?charset=utf8mb4"
)

def query(statement, engine, **kwargs):
    with engine.connect() as connection:
        return list(connection.execute(text(statement), kwargs))
    
def execute(statement, engine, **kwargs):
    Session = sessionmaker(bind=engine)
    session = Session()
    session.execute(text(statement), kwargs)
    session.commit()
    session.close()

query("""
    select id, trashed from uuid limit :limit;
""", engine_remote, limit=5)

[(1, 0), (2, 0), (3, 0), (4, 0), (5, 0)]

In [6]:
engine_local = create_engine(
    f"mysql+mysqlconnector://root:secret@localhost:3306/serlo?charset=latin1"
)

query("""
    select id, trashed from uuid limit 5;
""", engine_local)

[(1, 0), (2, 0), (3, 0), (4, 0), (5, 0)]

# Convert user DB

In [13]:
def get_column_type(table, column):
    column_specs = query(f"""
        describe {table};
    """, engine_remote)
    
    for column_spec in column_specs:
        if column_spec[0] == column:
            return column_spec[1].decode("utf8")
    
    return None

get_column_type("user", "description")

'text'

In [69]:
def convert_to_utf8(table, column):
    execute(f"""
        ALTER TABLE {table} MODIFY {column} {get_column_type(table, column)} CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_520_ci
    """, engine_remote)

convert_to_utf8("user", "description")

In [55]:
def is_not_utf8mb3_compatible(text):
    for char in text:
        if ord(char) > 0xFFFF:
            return True
    return False

def decode_text(content):
    return bytes(content, "latin1").decode("utf8", errors='replace')

def update_values(table, column):
    rows = query(f"""
        select id, {column} from {table}
    """, engine_local)
    rows = [{"id": r[0], "value": decode_text(r[1])} for r in rows if r[1] is not None]
    rows = [r for r in rows if is_not_utf8mb3_compatible(r["value"])]
    
    print(f"INFO: {len(rows)} items need to be updated in {table}.{column}")
    
    for row in rows:
        column_id=row["id"]
        
        execute(f"""
            update {table} set {column} = :value where id = :column_id
        """, engine_remote, value=row["value"], column_id=column_id)
        
        print(f"INFO: Repair column {column} of {table} with id {column_id}")

update_values("term_taxonomy", "description")

INFO: 3 items need to be updated in term_taxonomy.description
INFO: Repair column description of term_taxonomy with id 78660
INFO: Repair column description of term_taxonomy with id 200093
INFO: Repair column description of term_taxonomy with id 266616


# Skript for updating all values

In [68]:
def fix_serlo_db():
    db_info = query("""
        SELECT TABLE_NAME, COLUMN_NAME, CHARACTER_SET_NAME, COLLATION_NAME
        FROM information_schema.COLUMNS WHERE TABLE_SCHEMA = 'serlo';
    """, engine_remote)
    
    for info_row in db_info:
        table, column, character_set, _ = info_row
        
        if character_set == "utf8mb3" or character_set == "utf8":
            print(f"Update {table}.{column}")
            convert_to_utf8(table, column)    
            update_values(table, column)
    
fix_serlo_db()

Update url_alias.source


DataError: (mysql.connector.errors.DataError) 1292 (22007): Incorrect datetime value: '0000-00-00 00:00:00' for column 'timestamp' at row 1
[SQL: 
        ALTER TABLE url_alias MODIFY source varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_520_ci
    ]
(Background on this error at: https://sqlalche.me/e/20/9h9h)

# Fix timestamps