In [1]:
import csv

In [8]:

def parse_values(values, row_size):
    """
    Given a file handle and the raw values from a MySQL INSERT
    statement, write the equivalent CSV to the file
    """
    latest_row = []

    reader = csv.reader([values], delimiter=',',
                        doublequote=False,
                        escapechar='\\',
                        quotechar="'",
                        strict=True
    )

    # writer = csv.writer(outfile, quoting=csv.QUOTE_MINIMAL)
    for reader_row in reader:
        for column in reader_row:
            # If our current string is empty...
            if len(column) == 0 or column == 'NULL':
                latest_row.append(chr(0))
                continue
            # If our string starts with an open paren
            if column[0] == "(":
                # If we've been filling out a row
                if len(latest_row) > 0:
                    # Check if the previous entry ended in
                    # a close paren. If so, the row we've
                    # been filling out has been COMPLETED
                    # as:
                    #    1) the previous entry ended in a )
                    #    2) the current entry starts with a (
                    if latest_row[-1][-1] == ")" and len(latest_row) == row_size:
                        # Remove the close paren.
                        latest_row[-1] = latest_row[-1][:-1]
                        # writer.writerow(latest_row)
                        yield latest_row
                        latest_row = []
                # If we're beginning a new row, eliminate the
                # opening parentheses.
                if len(latest_row) == 0:
                    column = column[1:]
            # Add our column to the row we're working on.
            latest_row.append(column)
        # At the end of an INSERT statement, we'll
        # have the semicolon.
        # Make sure to remove the semicolon and
        # the close paren.
        if latest_row[-1][-2:] == ");":
            latest_row[-1] = latest_row[-1][:-2]
            # writer.writerow(latest_row)
            yield latest_row


In [9]:
value = "(1934182,'1840年代发现的物质','STILBENE, (E)-\n(E)-二苯基乙烯','2021-08-01 12:27:55','Stilbene, (E)-','uppercase','page'),(1934182,'CS1德语来源_(de)','(E)-二苯基乙烯','2021-08-01 14:58:21','','uppercase','page'),(1934182,'CS1法语来源_(fr)','(E)-二苯基乙烯','2021-08-01 14:58:21','','uppercase','page'),(1934182,'发光','STILBENE, (E)-\n(E)-二苯基乙烯','2012-04-05 08:33:13','Stilbene, (E)-','uppercase','page'),(1934182,'含有內容需登入查看的頁面','(E)-二苯基乙烯','2022-04-12 11:00:06','','uppercase','page');"
for row in parse_values(value, 7):
    print(row)

['1934182', '1840年代发现的物质', 'STILBENE, (E)-\n(E)-二苯基乙烯', '2021-08-01 12:27:55', 'Stilbene, (E)-', 'uppercase', 'page']
['1934182', 'CS1德语来源_(de)', '(E)-二苯基乙烯', '2021-08-01 14:58:21', '\x00', 'uppercase', 'page']
['1934182', 'CS1法语来源_(fr)', '(E)-二苯基乙烯', '2021-08-01 14:58:21', '\x00', 'uppercase', 'page']
['1934182', '发光', 'STILBENE, (E)-\n(E)-二苯基乙烯', '2012-04-05 08:33:13', 'Stilbene, (E)-', 'uppercase', 'page']
['1934182', '含有內容需登入查看的頁面', '(E)-二苯基乙烯', '2022-04-12 11:00:06', '\x00', 'uppercase', 'page']


In [4]:
value = "(1,'達拉斯',27,7,0),(2,'科索沃行政区划',10,0,0),(3,'期刊',0,0,0),(4,'杂志',38,27,0),(5,'科學小作品',121,12,0),(6,'化石',73,11,0),(7,'地质学',177,18,0),(9,'1780年代',36,25,0),(11,'香港模特兒',2,2,0),(12,'中華民國總統',22,11,0);"
for row in parse_values(value):
    print(row)

['1', '達拉斯', '27', '7', '0']
['2', '科索沃行政区划', '10', '0', '0']
['3', '期刊', '0', '0', '0']
['4', '杂志', '38', '27', '0']
['5', '科學小作品', '121', '12', '0']
['6', '化石', '73', '11', '0']
['7', '地质学', '177', '18', '0']
['9', '1780年代', '36', '25', '0']
['11', '香港模特兒', '2', '2', '0']
['12', '中華民國總統', '22', '11', '0']


In [7]:

reader = csv.reader([value], delimiter=',',
                    doublequote=False,
                    escapechar='\\',
                    quotechar="'",
                    strict=True
)

# writer = csv.writer(outfile, quoting=csv.QUOTE_MINIMAL)
for reader_row in reader:
    print(reader_row)

['(1934182', '1840年代发现的物质', 'STILBENE, (E)-\n(E)-二苯基乙烯', '2021-08-01 12:27:55', 'Stilbene, (E)-', 'uppercase', 'page)', '(1934182', 'CS1德语来源_(de)', '(E)-二苯基乙烯', '2021-08-01 14:58:21', '', 'uppercase', 'page)', '(1934182', 'CS1法语来源_(fr)', '(E)-二苯基乙烯', '2021-08-01 14:58:21', '', 'uppercase', 'page)', '(1934182', '发光', 'STILBENE, (E)-\n(E)-二苯基乙烯', '2012-04-05 08:33:13', 'Stilbene, (E)-', 'uppercase', 'page)', '(1934182', '含有內容需登入查看的頁面', '(E)-二苯基乙烯', '2022-04-12 11:00:06', '', 'uppercase', 'page);']
