I downloaded selected 'Posts.xml' files from the [stackexchange archives](https://archive.org/download/stackexchange).

In [1]:
import os
xml_files = [f for f in os.listdir() if f.endswith('xml')]
xml_files

['ai_Posts.xml',
 'bicycles_Posts.xml',
 'bioinformatics_Posts.xml',
 'biology_Posts.xml',
 'cogsci_Posts.xml',
 'datascience_Posts.xml',
 'devops_Posts.xml',
 'english_Posts.xml',
 'fitness_Posts.xml',
 'opensource_Posts.xml']

In [66]:
import xml.etree.ElementTree as ET
import html # for html.unescape()
import re
from bs4 import BeautifulSoup # for soup.get_text()
import pandas as pd

def safe_unescape(html_raw):
    if str(type(html_raw)) == "<class 'NoneType'>":
        html_raw = ''
    return html.unescape(html_raw)


def list_tags(tags_str):
    return ';'.join(tags_str[1:-1].split('><'))


def get_html_text(html_raw):
    html_str = safe_unescape(html_raw)
    soup = BeautifulSoup(html_str, 'html.parser')
    html_txt = soup.get_text()
    return html_txt


In [67]:
for xml_file in xml_files[0:1]:
    topic = xml_file.split('_')[0]
    row_count = 0

    with open(xml_file, encoding='utf-8') as xml_file_handle:
        csv_file = xml_file.replace('.xml', '.csv')
        print(csv_file)
        data_rows = []
        for line in xml_file_handle:
            if re.match('  <row ', line):
                row_count += 1
                row_root = ET.fromstring(line)

                title = get_html_text( row_root.attrib.get('Title') )
                body = get_html_text( row_root.attrib.get('Body') )
                tags = list_tags(safe_unescape( row_root.attrib.get('Tags') ) )
                last_activity_date = row_root.attrib.get('LastActivityDate')
                row_dict = {'topic':topic, 
                            'title':title, 
                            'body':body, 
                            'tags':tags, 
                            'last_activity_date':last_activity_date,
                           }
                data_rows.append(row_dict)
                
        pd.DataFrame(data_rows).to_csv(csv_file, index=False)


ai_Posts.csv
