In [13]:
import re
import os
import csv
import pandas as pd
import subprocess
from pathlib import Path

class TexProcesser():
    def __init__(self, latex_dir_path, output_path):
        self.latex_folder = Path(latex_dir_path)
        self.output_path = output_path
        
    def pipeline(self,):
        self.processed_tex_dir = self.output_path + '/' + 'processed_tex'
        self._preprocess(input_dir=self.latex_folder, output_dir=self.processed_tex_dir)
        
        output_csv_path = self.output_path + '/latex_info.csv'
        self._extract_field_to_csv(output_csv_path=output_csv_path)
        
        # TODO:把文本批量转换为md文件
        self._postprocess()
        
    def _preprocess(self, input_dir, output_dir):
        '''
        删除一些额外的表格和公式等等
        '''

        for file_path in sorted(input_dir.glob('**/*.tex')):
        # latex = './NLP_LATEX_CORPUS/V04/V04N01-07.tex'
            file_name = os.path.basename(file_path)
            try:
                with open(file_path, 'r', encoding='utf-8') as file:
                    latex_content = file.read()
                print(f'processing {file_name} ...')
                # 执行其他操作
            except UnicodeDecodeError:
                print(f"UnicodeDecodeError: Failed to decode {file_path}. Skipping this file.")
                continue  # 跳过当前文件的处理
            
            # 删除\begin{document}之前的所有内容
            latex_content = re.sub(r'[\s\S]*?(?=\\begin\{document\})', '', latex_content, count=1, flags=re.DOTALL)
                    
            # 删除bibliographystyle之后的所有文本
            # latex_content = re.sub(r'\\bibliographystyle\{.*?\}[\s\S]*', '', latex_content, flags=re.DOTALL)
            
            # 删除所有的图片
            latex_content = re.sub(r'\\begin{figure}.*?\\end{figure}', '', latex_content, flags=re.DOTALL)
            
            # 删除所有的表格
            latex_content = re.sub(r'\\begin{table}.*?\\end{table}', '', latex_content, flags=re.DOTALL)
            latex_content = re.sub(r'\\begin{tabular}.*?\\end{tabular}', '', latex_content, flags=re.DOTALL)
            
            # 替换引用\ref{...}文本为[...]
            latex_content = re.sub(r'\\ref\{([^{}]*)\}', r'[\1]', latex_content, flags=re.DOTALL)
            
            # 删除格式调整符号
            # latex_content = re.sub(r'\\hspace\*\{[^{}]*\}', '', latex_content, flags=re.DOTALL)
            # latex_content = re.sub(r'\\vspace\*\{[^{}]*\}', '', latex_content, flags=re.DOTALL)        
        
            # 删除所有的标签\label{...}
            latex_content = re.sub(r'\\label\{.*?\}', '', latex_content, flags=re.DOTALL)
            
            # 替换掉文本中有\underline{...}的内容            
            # latex_content = re.sub(r'\\underline\{([^{}]*)\}', r'\1', latex_content, flags=re.DOTALL)            
                                    
            # 替换掉所有文献引用为@xcite
            latex_content = re.sub(r'\\cite\{.*?\}', '@xcite', latex_content, flags=re.DOTALL)
            
            # 将所有的公式都替换为@xmath0, @xmath1, @xmath2, ...
            formula_count = 0
            def replace_formula(match):
                nonlocal formula_count
                new_formula = f'@xmath{formula_count}'
                formula_count += 1
                return new_formula
            
            latex_content = re.sub(r'\$.*?\$', replace_formula, latex_content, flags=re.DOTALL)

            # 删除所有脚注
            # NOTE: 必须在替换公式之后在进行删除，否则会引发冲突
            # latex_content = re.sub(r'\\footnote\{.*?\}', '', latex_content, flags=re.DOTALL)
            # latex_content = re.sub(r'(?s)\\footnote\{.*?\}', '', latex_content)
            # latex_content = re.sub  (r'\\footnotemark', '', latex_content, flags=re.DOTALL)
            # latex_content = re.sub(r'\\footnotetext\{.*?\}', '', latex_content, flags=re.DOTALL)       
            
            # # 使用正则表达式找到最后一个 \section{} 的位置并删除其后的内容
            # match_sections = [m.end() for m in re.finditer(r'\\section\{.*?\}', latex_content)]  # 查找所有 \section{} 的位置
            # if match_sections:
            #     last_section_index = match_sections[-1]  
            #     content_after_section = latex_content[last_section_index:]  # 获取最后一个 \section{} 之后的内容

            #     match_next_unknown = re.search(r'\n\\[a-zA-Z]+\{.*?\}', content_after_section)  # 寻找下一个未知标记
            #     if match_next_unknown:
            #         next_unknown_index = last_section_index + match_next_unknown.start()  # 获取下一个未知标记的位置
            #         latex_content = latex_content[:next_unknown_index]  # 保留标记之前的内容
            
            # # 在文章的末尾添加\end{document}
            # latex_content += '\n\\end{document}'
            
            # 删除\acknowledgment
            # latex_content = re.sub(r'\\acknowledgment.*?(?=\\end\{document\})', '', latex_content, flags=re.DOTALL)
            
            # 将处理后的内容保存到新文件中
            output_file = output_dir + '/' + file_name
            with open(output_file, 'w', encoding='utf-8') as file:
                file.write(latex_content)    
                 
    def _extract_field_to_csv(self, output_csv_path=None):
        '''
        extract title, etitle, jabstract, eabstract ... from latex files
        '''
        
        # 定义csv文件的表头
        output_csv_path = Path(output_csv_path)
        csv_header = ['file_name', 'title', 'etitle', 'jabstract', 'eabstract', 'section_names',
                      'sec_intro', 'sec_method', 'sec_result', 'sec_conclusion',
                      'abs_intro', 'abs_method', 'abs_result', 'abs_conclusion']
        
        with open(output_csv_path, 'w', newline='', encoding='utf-8') as csv_file:
            writer = csv.DictWriter(csv_file, fieldnames=csv_header)
            writer.writeheader()
            
            for file_path in sorted(self.latex_folder.glob('**/*.tex')):
                try:
                    with open(file_path, 'r', encoding='utf-8') as file:
                        latex_content = file.read()
                    print(f'processing {file_path} ...')
                except UnicodeDecodeError:
                    print(f"UnicodeDecodeError: Failed to decode {file_path}. Skipping this file.")
                    continue
                    
                matches = self._extract_info(latex_content=latex_content)
                matches['file_name'] = file_path.name
                
                extra_columns = [
                    'sec_intro', 'sec_method', 'sec_result', 'sec_conclusion',
                    'abs_intro', 'abs_method', 'abs_result', 'abs_conclusion'
                ]
                for column in extra_columns:
                    matches[column] = ''
                
                writer.writerow(matches)
        
    def _extract_info(self, latex_content=None):
        
        patterns = {
            # TODO: title 和 jtitle 有时候会同时出现，需要处理
            'title': re.compile(r'\\title\{(.*?)\}', re.DOTALL),
            'etitle': re.compile(r'\\etitle\{(.*?)\}', re.DOTALL),
            'jabstract': re.compile(r'\\jabstract\{(.*?)\}', re.DOTALL),
            'eabstract': re.compile(r'\\eabstract\{(.*?)\}', re.DOTALL),     
            'section_names': re.compile(r'\\section\{(.*?)\}', re.DOTALL),                 
        }
        
        matches = {key: '' if key != 'section_names' else [] for key in patterns.keys()}
        
        for key, pattern in patterns.items():
            matches[key] = pattern.findall(latex_content)
            
            if key == 'section_names':
                matches[key] = [value.strip() for value in matches[key]]  # 移除首尾空白并存储在列表中
                matches[key] = list(filter(None, matches[key]))  # 移除空字符串
            
            if key != 'section_names' and matches[key]:
                matches[key] = re.sub(r'\s*\\\\\n\s*', ' ', matches[key][0]).strip()
            elif key != 'section_names':
                matches[key] = ''

        return matches
    
    def _postprocess(self):
        """
        convert all processed tex files to markdown files
        """
        markdown_output_dir = Path(self.output_path) / 'markdown'
        markdown_output_dir.mkdir(parents=True, exist_ok=True)
        
        for file_path in sorted(Path(self.processed_tex_dir).glob('*.tex')):
            md_output_path = markdown_output_dir / file_path.name.replace('.tex', '.txt')

            try:
                subprocess.run(["pandoc", str(file_path), "-o", str(md_output_path)])
                print(f"Converted {file_path} to Markdown: {md_output_path}")
            except Exception as e:
                print(f"Conversion failed for {file_path}: {e}")

In [46]:
tex_processer = TexProcesser(latex_dir_path='./data/NLP_LATEX_CORPUS/', output_path='./data')
tex_processer.pipeline()

processing V01N01-01.tex ...
processing V01N01-02.tex ...
processing V01N01-03.tex ...
processing V01N01-04.tex ...
processing V02N01-01.tex ...
processing V02N01-02.tex ...
processing V02N01-03.tex ...
processing V02N01-04.tex ...
processing V02N02-01.tex ...
processing V02N02-02.tex ...
processing V02N02-03.tex ...
processing V02N02-04.tex ...
processing V02N03-01.tex ...
processing V02N03-02.tex ...
processing V02N03-03.tex ...
processing V02N03-04.tex ...
processing V02N04-01.tex ...
processing V02N04-02.tex ...
processing V02N04-03.tex ...
processing V02N04-04.tex ...
processing V03N01-01.tex ...
processing V03N01-02.tex ...
processing V03N01-03.tex ...
processing V03N01-04.tex ...
processing V03N02-01.tex ...
processing V03N02-02.tex ...
processing V03N02-03.tex ...
processing V03N02-04.tex ...
processing V03N02-05.tex ...
processing V03N03-01.tex ...
processing V03N03-02.tex ...
processing V03N03-03.tex ...
processing V03N03-04.tex ...
processing V03N03-05.tex ...
processing V03

Error at "source" (line 1, column 1):
unexpected Tok "data/processed_tex/V01N01-01.tex" (line 1, column 1) (CtrlSeq "begin") "\\begin"
expecting end of input


Converted data/processed_tex/V01N01-02.tex to Markdown: data/txt/V01N01-02.txt
Converted data/processed_tex/V01N01-03.tex to Markdown: data/txt/V01N01-03.txt
Converted data/processed_tex/V01N01-04.tex to Markdown: data/txt/V01N01-04.txt
Converted data/processed_tex/V02N01-01.tex to Markdown: data/txt/V02N01-01.txt
Converted data/processed_tex/V02N01-02.tex to Markdown: data/txt/V02N01-02.txt
Converted data/processed_tex/V02N01-03.tex to Markdown: data/txt/V02N01-03.txt
Converted data/processed_tex/V02N01-04.tex to Markdown: data/txt/V02N01-04.txt
Converted data/processed_tex/V02N02-01.tex to Markdown: data/txt/V02N02-01.txt
Converted data/processed_tex/V02N02-02.tex to Markdown: data/txt/V02N02-02.txt
Converted data/processed_tex/V02N02-03.tex to Markdown: data/txt/V02N02-03.txt


Error at "source" (line 1, column 1):
unexpected Tok "data/processed_tex/V02N02-03.tex" (line 1, column 1) (CtrlSeq "begin") "\\begin"
expecting end of input


Converted data/processed_tex/V02N02-04.tex to Markdown: data/txt/V02N02-04.txt
Converted data/processed_tex/V02N03-01.tex to Markdown: data/txt/V02N03-01.txt
Converted data/processed_tex/V02N03-02.tex to Markdown: data/txt/V02N03-02.txt


Error at "source" (line 1, column 1):
unexpected Tok "data/processed_tex/V02N03-03.tex" (line 1, column 1) (CtrlSeq "begin") "\\begin"
expecting end of input
Error at "source" (line 1, column 1):
unexpected Tok "data/processed_tex/V02N04-02.tex" (line 1, column 1) (CtrlSeq "begin") "\\begin"
expecting end of input


Converted data/processed_tex/V02N03-03.tex to Markdown: data/txt/V02N03-03.txt
Converted data/processed_tex/V02N03-04.tex to Markdown: data/txt/V02N03-04.txt
Converted data/processed_tex/V02N04-01.tex to Markdown: data/txt/V02N04-01.txt
Converted data/processed_tex/V02N04-02.tex to Markdown: data/txt/V02N04-02.txt
Converted data/processed_tex/V02N04-03.tex to Markdown: data/txt/V02N04-03.txt
Converted data/processed_tex/V02N04-04.tex to Markdown: data/txt/V02N04-04.txt
Converted data/processed_tex/V03N01-01.tex to Markdown: data/txt/V03N01-01.txt
Converted data/processed_tex/V03N01-02.tex to Markdown: data/txt/V03N01-02.txt
Converted data/processed_tex/V03N01-03.tex to Markdown: data/txt/V03N01-03.txt
Converted data/processed_tex/V03N01-04.tex to Markdown: data/txt/V03N01-04.txt
Converted data/processed_tex/V03N02-01.tex to Markdown: data/txt/V03N02-01.txt
Converted data/processed_tex/V03N02-02.tex to Markdown: data/txt/V03N02-02.txt


Error at "source" (line 1, column 1):
unexpected Tok "data/processed_tex/V03N02-01.tex" (line 1, column 1) (CtrlSeq "begin") "\\begin"
expecting end of input


Converted data/processed_tex/V03N02-03.tex to Markdown: data/txt/V03N02-03.txt
Converted data/processed_tex/V03N02-04.tex to Markdown: data/txt/V03N02-04.txt
Converted data/processed_tex/V03N02-05.tex to Markdown: data/txt/V03N02-05.txt
Converted data/processed_tex/V03N03-01.tex to Markdown: data/txt/V03N03-01.txt
Converted data/processed_tex/V03N03-02.tex to Markdown: data/txt/V03N03-02.txt


Error at "source" (line 1, column 1):
unexpected Tok "data/processed_tex/V03N03-01.tex" (line 1, column 1) (CtrlSeq "begin") "\\begin"
expecting end of input


Converted data/processed_tex/V03N03-03.tex to Markdown: data/txt/V03N03-03.txt
Converted data/processed_tex/V03N03-04.tex to Markdown: data/txt/V03N03-04.txt
Converted data/processed_tex/V03N03-05.tex to Markdown: data/txt/V03N03-05.txt
Converted data/processed_tex/V03N04-01.tex to Markdown: data/txt/V03N04-01.txt
Converted data/processed_tex/V03N04-02.tex to Markdown: data/txt/V03N04-02.txt
Converted data/processed_tex/V03N04-03.tex to Markdown: data/txt/V03N04-03.txt
Converted data/processed_tex/V03N04-04.tex to Markdown: data/txt/V03N04-04.txt
Converted data/processed_tex/V03N04-05.tex to Markdown: data/txt/V03N04-05.txt
Converted data/processed_tex/V03N04-06.tex to Markdown: data/txt/V03N04-06.txt
Converted data/processed_tex/V03N04-07.tex to Markdown: data/txt/V03N04-07.txt
Converted data/processed_tex/V03N04-08.tex to Markdown: data/txt/V03N04-08.txt
Converted data/processed_tex/V04N01-01.tex to Markdown: data/txt/V04N01-01.txt
Converted data/processed_tex/V04N01-02.tex to Markdo

Error at "source" (line 1, column 1):
unexpected Tok "data/processed_tex/V05N02-02.tex" (line 1, column 1) (CtrlSeq "begin") "\\begin"
expecting end of input


Converted data/processed_tex/V05N02-04.tex to Markdown: data/txt/V05N02-04.txt
Converted data/processed_tex/V05N03-01.tex to Markdown: data/txt/V05N03-01.txt


Error at "source" (line 1, column 1):
unexpected Tok "data/processed_tex/V05N03-02.tex" (line 1, column 1) (CtrlSeq "begin") "\\begin"
expecting end of input


Converted data/processed_tex/V05N03-02.tex to Markdown: data/txt/V05N03-02.txt
Converted data/processed_tex/V05N03-05.tex to Markdown: data/txt/V05N03-05.txt
Converted data/processed_tex/V05N04-01.tex to Markdown: data/txt/V05N04-01.txt
Converted data/processed_tex/V05N04-02.tex to Markdown: data/txt/V05N04-02.txt
Converted data/processed_tex/V05N04-03.tex to Markdown: data/txt/V05N04-03.txt
Converted data/processed_tex/V05N04-04.tex to Markdown: data/txt/V05N04-04.txt
Converted data/processed_tex/V05N04-05.tex to Markdown: data/txt/V05N04-05.txt
Converted data/processed_tex/V05N04-06.tex to Markdown: data/txt/V05N04-06.txt
Converted data/processed_tex/V05N04-07.tex to Markdown: data/txt/V05N04-07.txt
Converted data/processed_tex/V05N04-08.tex to Markdown: data/txt/V05N04-08.txt


Error at "source" (line 1, column 1):
unexpected Tok "data/processed_tex/V06N01-01.tex" (line 1, column 1) (CtrlSeq "begin") "\\begin"
expecting end of input


Converted data/processed_tex/V06N01-01.tex to Markdown: data/txt/V06N01-01.txt
Converted data/processed_tex/V06N01-02.tex to Markdown: data/txt/V06N01-02.txt
Converted data/processed_tex/V06N01-03.tex to Markdown: data/txt/V06N01-03.txt
Converted data/processed_tex/V06N01-04.tex to Markdown: data/txt/V06N01-04.txt
Converted data/processed_tex/V06N02-01.tex to Markdown: data/txt/V06N02-01.txt
Converted data/processed_tex/V06N02-02.tex to Markdown: data/txt/V06N02-02.txt
Converted data/processed_tex/V06N02-03.tex to Markdown: data/txt/V06N02-03.txt
Converted data/processed_tex/V06N02-04.tex to Markdown: data/txt/V06N02-04.txt


Error at "source" (line 1, column 1):
unexpected Tok "data/processed_tex/V06N02-03.tex" (line 1, column 1) (CtrlSeq "begin") "\\begin"
expecting end of input


Converted data/processed_tex/V06N02-05.tex to Markdown: data/txt/V06N02-05.txt
Converted data/processed_tex/V06N02-06.tex to Markdown: data/txt/V06N02-06.txt
Converted data/processed_tex/V06N02-07.tex to Markdown: data/txt/V06N02-07.txt
Converted data/processed_tex/V06N03-01.tex to Markdown: data/txt/V06N03-01.txt
Converted data/processed_tex/V06N03-03.tex to Markdown: data/txt/V06N03-03.txt
Converted data/processed_tex/V06N03-04.tex to Markdown: data/txt/V06N03-04.txt
Converted data/processed_tex/V06N03-06.tex to Markdown: data/txt/V06N03-06.txt
Converted data/processed_tex/V06N04-01.tex to Markdown: data/txt/V06N04-01.txt
Converted data/processed_tex/V06N04-02.tex to Markdown: data/txt/V06N04-02.txt
Converted data/processed_tex/V06N04-03.tex to Markdown: data/txt/V06N04-03.txt
Converted data/processed_tex/V06N04-04.tex to Markdown: data/txt/V06N04-04.txt
Converted data/processed_tex/V06N04-05.tex to Markdown: data/txt/V06N04-05.txt
Converted data/processed_tex/V06N04-06.tex to Markdo



Converted data/processed_tex/V06N07-04.tex to Markdown: data/txt/V06N07-04.txt
Converted data/processed_tex/V06N07-05.tex to Markdown: data/txt/V06N07-05.txt
Converted data/processed_tex/V06N07-06.tex to Markdown: data/txt/V06N07-06.txt
Converted data/processed_tex/V07N01-01.tex to Markdown: data/txt/V07N01-01.txt
Converted data/processed_tex/V07N01-02.tex to Markdown: data/txt/V07N01-02.txt
Converted data/processed_tex/V07N01-03.tex to Markdown: data/txt/V07N01-03.txt
Converted data/processed_tex/V07N01-04.tex to Markdown: data/txt/V07N01-04.txt
Converted data/processed_tex/V07N02-01.tex to Markdown: data/txt/V07N02-01.txt
Converted data/processed_tex/V07N02-02.tex to Markdown: data/txt/V07N02-02.txt
Converted data/processed_tex/V07N02-03.tex to Markdown: data/txt/V07N02-03.txt
Converted data/processed_tex/V07N02-04.tex to Markdown: data/txt/V07N02-04.txt




Converted data/processed_tex/V07N02-05.tex to Markdown: data/txt/V07N02-05.txt
Converted data/processed_tex/V07N02-06.tex to Markdown: data/txt/V07N02-06.txt
Converted data/processed_tex/V07N02-07.tex to Markdown: data/txt/V07N02-07.txt
Converted data/processed_tex/V07N03-01.tex to Markdown: data/txt/V07N03-01.txt
Converted data/processed_tex/V07N03-02.tex to Markdown: data/txt/V07N03-02.txt
Converted data/processed_tex/V07N03-03.tex to Markdown: data/txt/V07N03-03.txt
Converted data/processed_tex/V07N03-04.tex to Markdown: data/txt/V07N03-04.txt


Error at "source" (line 1, column 1):
unexpected Tok "data/processed_tex/V07N03-03.tex" (line 1, column 1) (CtrlSeq "begin") "\\begin"
expecting end of input


Converted data/processed_tex/V07N03-05.tex to Markdown: data/txt/V07N03-05.txt
Converted data/processed_tex/V07N04-01.tex to Markdown: data/txt/V07N04-01.txt
Converted data/processed_tex/V07N04-02.tex to Markdown: data/txt/V07N04-02.txt
Converted data/processed_tex/V07N04-03.tex to Markdown: data/txt/V07N04-03.txt


Error at "source" (line 1, column 1):
unexpected Tok "data/processed_tex/V07N04-02.tex" (line 1, column 1) (CtrlSeq "begin") "\\begin"
expecting end of input
Error at "source" (line 1, column 1):
unexpected Tok "data/processed_tex/V07N04-05.tex" (line 1, column 1) (CtrlSeq "begin") "\\begin"
expecting end of input


Converted data/processed_tex/V07N04-04.tex to Markdown: data/txt/V07N04-04.txt
Converted data/processed_tex/V07N04-05.tex to Markdown: data/txt/V07N04-05.txt
Converted data/processed_tex/V07N04-06.tex to Markdown: data/txt/V07N04-06.txt
Converted data/processed_tex/V07N04-07.tex to Markdown: data/txt/V07N04-07.txt
Converted data/processed_tex/V07N04-08.tex to Markdown: data/txt/V07N04-08.txt
Converted data/processed_tex/V07N04-09.tex to Markdown: data/txt/V07N04-09.txt
Converted data/processed_tex/V07N04-10.tex to Markdown: data/txt/V07N04-10.txt
Converted data/processed_tex/V07N04-11.tex to Markdown: data/txt/V07N04-11.txt


Error at "source" (line 1, column 1):
unexpected Tok "data/processed_tex/V07N04-09.tex" (line 1, column 1) (CtrlSeq "begin") "\\begin"
expecting end of input


Converted data/processed_tex/V07N04-12.tex to Markdown: data/txt/V07N04-12.txt
Converted data/processed_tex/V07N04-13.tex to Markdown: data/txt/V07N04-13.txt
Converted data/processed_tex/V07N05-01.tex to Markdown: data/txt/V07N05-01.txt
Converted data/processed_tex/V07N05-02.tex to Markdown: data/txt/V07N05-02.txt
Converted data/processed_tex/V07N05-03.tex to Markdown: data/txt/V07N05-03.txt
Converted data/processed_tex/V07N05-04.tex to Markdown: data/txt/V07N05-04.txt
Converted data/processed_tex/V07N05-05.tex to Markdown: data/txt/V07N05-05.txt
Converted data/processed_tex/V08N01-01.tex to Markdown: data/txt/V08N01-01.txt


Error at "source" (line 1, column 1):
unexpected Tok "data/processed_tex/V07N05-05.tex" (line 1, column 1) (CtrlSeq "begin") "\\begin"
expecting end of input


Converted data/processed_tex/V08N01-02.tex to Markdown: data/txt/V08N01-02.txt
Converted data/processed_tex/V08N01-03.tex to Markdown: data/txt/V08N01-03.txt
Converted data/processed_tex/V08N01-04.tex to Markdown: data/txt/V08N01-04.txt
Converted data/processed_tex/V08N01-05.tex to Markdown: data/txt/V08N01-05.txt
Converted data/processed_tex/V08N01-06.tex to Markdown: data/txt/V08N01-06.txt
Converted data/processed_tex/V08N01-07.tex to Markdown: data/txt/V08N01-07.txt
Converted data/processed_tex/V08N01-08.tex to Markdown: data/txt/V08N01-08.txt
Converted data/processed_tex/V08N02-01.tex to Markdown: data/txt/V08N02-01.txt
Converted data/processed_tex/V08N02-02.tex to Markdown: data/txt/V08N02-02.txt
Converted data/processed_tex/V08N02-03.tex to Markdown: data/txt/V08N02-03.txt
Converted data/processed_tex/V08N03-01.tex to Markdown: data/txt/V08N03-01.txt
Converted data/processed_tex/V08N03-02.tex to Markdown: data/txt/V08N03-02.txt
Converted data/processed_tex/V08N03-03.tex to Markdo

Error at "source" (line 1, column 1):
unexpected Tok "data/processed_tex/V08N03-06.tex" (line 1, column 1) (CtrlSeq "begin") "\\begin"
expecting end of input


Converted data/processed_tex/V08N04-01.tex to Markdown: data/txt/V08N04-01.txt
Converted data/processed_tex/V08N04-02.tex to Markdown: data/txt/V08N04-02.txt
Converted data/processed_tex/V08N04-03.tex to Markdown: data/txt/V08N04-03.txt
Converted data/processed_tex/V08N04-04.tex to Markdown: data/txt/V08N04-04.txt
Converted data/processed_tex/V08N04-05.tex to Markdown: data/txt/V08N04-05.txt
Converted data/processed_tex/V09N01-01.tex to Markdown: data/txt/V09N01-01.txt
Converted data/processed_tex/V09N01-02.tex to Markdown: data/txt/V09N01-02.txt
Converted data/processed_tex/V09N01-03.tex to Markdown: data/txt/V09N01-03.txt


Error at "source" (line 1, column 1):
unexpected Tok "data/processed_tex/V09N01-01.tex" (line 1, column 1) (CtrlSeq "begin") "\\begin"
expecting end of input


Converted data/processed_tex/V09N01-04.tex to Markdown: data/txt/V09N01-04.txt
Converted data/processed_tex/V09N01-05.tex to Markdown: data/txt/V09N01-05.txt
Converted data/processed_tex/V09N01-06.tex to Markdown: data/txt/V09N01-06.txt
Converted data/processed_tex/V09N02-01.tex to Markdown: data/txt/V09N02-01.txt
Converted data/processed_tex/V09N02-02.tex to Markdown: data/txt/V09N02-02.txt
Converted data/processed_tex/V09N02-03.tex to Markdown: data/txt/V09N02-03.txt
Converted data/processed_tex/V09N02-04.tex to Markdown: data/txt/V09N02-04.txt
Converted data/processed_tex/V09N02-05.tex to Markdown: data/txt/V09N02-05.txt
Converted data/processed_tex/V09N03-01.tex to Markdown: data/txt/V09N03-01.txt
Converted data/processed_tex/V09N03-02.tex to Markdown: data/txt/V09N03-02.txt
Converted data/processed_tex/V09N03-03.tex to Markdown: data/txt/V09N03-03.txt
Converted data/processed_tex/V09N03-04.tex to Markdown: data/txt/V09N03-04.txt
Converted data/processed_tex/V09N03-05.tex to Markdo

Error at "source" (line 1, column 1):
unexpected Tok "data/processed_tex/V09N05-03.tex" (line 1, column 1) (CtrlSeq "begin") "\\begin"
expecting end of input
Error at "source" (line 1, column 1):
unexpected Tok "data/processed_tex/V09N05-04.tex" (line 1, column 1) (CtrlSeq "begin") "\\begin"
expecting end of input


Converted data/processed_tex/V09N05-07.tex to Markdown: data/txt/V09N05-07.txt
Converted data/processed_tex/V10N01-01.tex to Markdown: data/txt/V10N01-01.txt
Converted data/processed_tex/V10N01-02.tex to Markdown: data/txt/V10N01-02.txt
Converted data/processed_tex/V10N01-03.tex to Markdown: data/txt/V10N01-03.txt
Converted data/processed_tex/V10N01-04.tex to Markdown: data/txt/V10N01-04.txt
Converted data/processed_tex/V10N01-05.tex to Markdown: data/txt/V10N01-05.txt
Converted data/processed_tex/V10N01-06.tex to Markdown: data/txt/V10N01-06.txt
Converted data/processed_tex/V10N02-01.tex to Markdown: data/txt/V10N02-01.txt
Converted data/processed_tex/V10N02-02.tex to Markdown: data/txt/V10N02-02.txt
Converted data/processed_tex/V10N02-03.tex to Markdown: data/txt/V10N02-03.txt
Converted data/processed_tex/V10N02-04.tex to Markdown: data/txt/V10N02-04.txt
Converted data/processed_tex/V10N02-05.tex to Markdown: data/txt/V10N02-05.txt
Converted data/processed_tex/V10N02-06.tex to Markdo

Error at "source" (line 1, column 1):
unexpected Tok "data/processed_tex/V10N04-05.tex" (line 1, column 1) (CtrlSeq "begin") "\\begin"
expecting end of input


Converted data/processed_tex/V10N04-07.tex to Markdown: data/txt/V10N04-07.txt
Converted data/processed_tex/V10N04-08.tex to Markdown: data/txt/V10N04-08.txt
Converted data/processed_tex/V10N04-09.tex to Markdown: data/txt/V10N04-09.txt
Converted data/processed_tex/V10N04-10.tex to Markdown: data/txt/V10N04-10.txt
Converted data/processed_tex/V10N05-01.tex to Markdown: data/txt/V10N05-01.txt
Converted data/processed_tex/V10N05-02.tex to Markdown: data/txt/V10N05-02.txt
Converted data/processed_tex/V10N05-03.tex to Markdown: data/txt/V10N05-03.txt


Error at "source" (line 1, column 1):
unexpected Tok "data/processed_tex/V10N05-01.tex" (line 1, column 1) (CtrlSeq "begin") "\\begin"
expecting end of input


Converted data/processed_tex/V10N05-04.tex to Markdown: data/txt/V10N05-04.txt
Converted data/processed_tex/V10N05-05.tex to Markdown: data/txt/V10N05-05.txt
Converted data/processed_tex/V10N05-06.tex to Markdown: data/txt/V10N05-06.txt
Converted data/processed_tex/V10N05-07.tex to Markdown: data/txt/V10N05-07.txt


Error at "source" (line 1, column 1):
unexpected Tok "data/processed_tex/V10N05-07.tex" (line 1, column 1) (CtrlSeq "begin") "\\begin"
expecting end of input


Converted data/processed_tex/V10N05-08.tex to Markdown: data/txt/V10N05-08.txt
Converted data/processed_tex/V11N01-01.tex to Markdown: data/txt/V11N01-01.txt
Converted data/processed_tex/V11N01-04.tex to Markdown: data/txt/V11N01-04.txt
Converted data/processed_tex/V11N01-05.tex to Markdown: data/txt/V11N01-05.txt
Converted data/processed_tex/V11N02-03.tex to Markdown: data/txt/V11N02-03.txt
Converted data/processed_tex/V11N02-04.tex to Markdown: data/txt/V11N02-04.txt
Converted data/processed_tex/V11N02-05.tex to Markdown: data/txt/V11N02-05.txt
Converted data/processed_tex/V11N03-01.tex to Markdown: data/txt/V11N03-01.txt
Converted data/processed_tex/V11N03-04.tex to Markdown: data/txt/V11N03-04.txt
Converted data/processed_tex/V11N04-04.tex to Markdown: data/txt/V11N04-04.txt
Converted data/processed_tex/V11N05-02.tex to Markdown: data/txt/V11N05-02.txt
Converted data/processed_tex/V11N05-03.tex to Markdown: data/txt/V11N05-03.txt
Converted data/processed_tex/V11N05-05.tex to Markdo



Converted data/processed_tex/V12N03-09.tex to Markdown: data/txt/V12N03-09.txt
Converted data/processed_tex/V12N04-03.tex to Markdown: data/txt/V12N04-03.txt
Converted data/processed_tex/V12N04-06.tex to Markdown: data/txt/V12N04-06.txt
Converted data/processed_tex/V12N05-02.tex to Markdown: data/txt/V12N05-02.txt
Converted data/processed_tex/V12N05-03.tex to Markdown: data/txt/V12N05-03.txt
Converted data/processed_tex/V12N05-04.tex to Markdown: data/txt/V12N05-04.txt
Converted data/processed_tex/V12N05-05.tex to Markdown: data/txt/V12N05-05.txt
Converted data/processed_tex/V12N05-07.tex to Markdown: data/txt/V12N05-07.txt
Converted data/processed_tex/V12N05-08.tex to Markdown: data/txt/V12N05-08.txt
Converted data/processed_tex/V12N06-02.tex to Markdown: data/txt/V12N06-02.txt
Converted data/processed_tex/V13N01-01.tex to Markdown: data/txt/V13N01-01.txt
Converted data/processed_tex/V13N01-03.tex to Markdown: data/txt/V13N01-03.txt
Converted data/processed_tex/V13N01-05.tex to Markdo

Error at "source" (line 1, column 1):
unexpected Tok "data/processed_tex/V13N03-03.tex" (line 1, column 1) (CtrlSeq "begin") "\\begin"
expecting end of input


Converted data/processed_tex/V13N03-03.tex to Markdown: data/txt/V13N03-03.txt
Converted data/processed_tex/V13N03-04.tex to Markdown: data/txt/V13N03-04.txt
Converted data/processed_tex/V13N03-05.tex to Markdown: data/txt/V13N03-05.txt
Converted data/processed_tex/V13N03-06.tex to Markdown: data/txt/V13N03-06.txt
Converted data/processed_tex/V13N03-07.tex to Markdown: data/txt/V13N03-07.txt
Converted data/processed_tex/V13N03-08.tex to Markdown: data/txt/V13N03-08.txt
Converted data/processed_tex/V13N03-09.tex to Markdown: data/txt/V13N03-09.txt
Converted data/processed_tex/V13N03-10.tex to Markdown: data/txt/V13N03-10.txt
Converted data/processed_tex/V13N04-01.tex to Markdown: data/txt/V13N04-01.txt
Converted data/processed_tex/V13N04-02.tex to Markdown: data/txt/V13N04-02.txt
Converted data/processed_tex/V13N04-03.tex to Markdown: data/txt/V13N04-03.txt
Converted data/processed_tex/V14N01-01.tex to Markdown: data/txt/V14N01-01.txt
Converted data/processed_tex/V14N01-02.tex to Markdo

Error at "source" (line 1, column 1):
unexpected Tok "data/processed_tex/V14N03-03.tex" (line 1, column 1) (CtrlSeq "begin") "\\begin"
expecting end of input


Converted data/processed_tex/V14N03-05.tex to Markdown: data/txt/V14N03-05.txt
Converted data/processed_tex/V14N03-06.tex to Markdown: data/txt/V14N03-06.txt
Converted data/processed_tex/V14N03-07.tex to Markdown: data/txt/V14N03-07.txt
Converted data/processed_tex/V14N03-08.tex to Markdown: data/txt/V14N03-08.txt




Converted data/processed_tex/V14N03-09.tex to Markdown: data/txt/V14N03-09.txt
Converted data/processed_tex/V14N03-10.tex to Markdown: data/txt/V14N03-10.txt
Converted data/processed_tex/V14N03-11.tex to Markdown: data/txt/V14N03-11.txt
Converted data/processed_tex/V14N03-12.tex to Markdown: data/txt/V14N03-12.txt
Converted data/processed_tex/V14N03-13.tex to Markdown: data/txt/V14N03-13.txt
Converted data/processed_tex/V14N03-14.tex to Markdown: data/txt/V14N03-14.txt
Converted data/processed_tex/V14N03-15.tex to Markdown: data/txt/V14N03-15.txt
Converted data/processed_tex/V14N04-01.tex to Markdown: data/txt/V14N04-01.txt
Converted data/processed_tex/V14N04-02.tex to Markdown: data/txt/V14N04-02.txt
Converted data/processed_tex/V14N04-03.tex to Markdown: data/txt/V14N04-03.txt
Converted data/processed_tex/V14N04-04.tex to Markdown: data/txt/V14N04-04.txt
Converted data/processed_tex/V14N04-05.tex to Markdown: data/txt/V14N04-05.txt
Converted data/processed_tex/V14N05-01.tex to Markdo



Converted data/processed_tex/V14N05-04.tex to Markdown: data/txt/V14N05-04.txt
Converted data/processed_tex/V14N05-05.tex to Markdown: data/txt/V14N05-05.txt
Converted data/processed_tex/V14N05-06.tex to Markdown: data/txt/V14N05-06.txt
Converted data/processed_tex/V14N05-07.tex to Markdown: data/txt/V14N05-07.txt
Converted data/processed_tex/V14N05-08.tex to Markdown: data/txt/V14N05-08.txt
Converted data/processed_tex/V15N01-01.tex to Markdown: data/txt/V15N01-01.txt
Converted data/processed_tex/V15N01-02.tex to Markdown: data/txt/V15N01-02.txt
Converted data/processed_tex/V15N01-03.tex to Markdown: data/txt/V15N01-03.txt
Converted data/processed_tex/V15N01-04.tex to Markdown: data/txt/V15N01-04.txt
Converted data/processed_tex/V15N02-01.tex to Markdown: data/txt/V15N02-01.txt
Converted data/processed_tex/V15N02-02.tex to Markdown: data/txt/V15N02-02.txt
Converted data/processed_tex/V15N02-03.tex to Markdown: data/txt/V15N02-03.txt
Converted data/processed_tex/V15N02-04.tex to Markdo



Converted data/processed_tex/V15N02-06.tex to Markdown: data/txt/V15N02-06.txt
Converted data/processed_tex/V15N03-01.tex to Markdown: data/txt/V15N03-01.txt
Converted data/processed_tex/V15N03-02.tex to Markdown: data/txt/V15N03-02.txt
Converted data/processed_tex/V15N03-03.tex to Markdown: data/txt/V15N03-03.txt
Converted data/processed_tex/V15N03-04.tex to Markdown: data/txt/V15N03-04.txt
Converted data/processed_tex/V15N03-05.tex to Markdown: data/txt/V15N03-05.txt
Converted data/processed_tex/V15N03-06.tex to Markdown: data/txt/V15N03-06.txt
Converted data/processed_tex/V15N04-01.tex to Markdown: data/txt/V15N04-01.txt
Converted data/processed_tex/V15N04-02.tex to Markdown: data/txt/V15N04-02.txt
Converted data/processed_tex/V15N04-03.tex to Markdown: data/txt/V15N04-03.txt
Converted data/processed_tex/V15N04-04.tex to Markdown: data/txt/V15N04-04.txt
Converted data/processed_tex/V15N05-01.tex to Markdown: data/txt/V15N05-01.txt
Converted data/processed_tex/V15N05-02.tex to Markdo

Error at "source" (line 1, column 1):
unexpected Tok "data/processed_tex/V16N01-03.tex" (line 1, column 1) (CtrlSeq "begin") "\\begin"
expecting end of input


Converted data/processed_tex/V16N01-05.tex to Markdown: data/txt/V16N01-05.txt
Converted data/processed_tex/V16N02-01.tex to Markdown: data/txt/V16N02-01.txt
Converted data/processed_tex/V16N02-02.tex to Markdown: data/txt/V16N02-02.txt




Converted data/processed_tex/V16N02-03.tex to Markdown: data/txt/V16N02-03.txt
Converted data/processed_tex/V16N03-01.tex to Markdown: data/txt/V16N03-01.txt
Converted data/processed_tex/V16N03-02.tex to Markdown: data/txt/V16N03-02.txt
Converted data/processed_tex/V16N03-03.tex to Markdown: data/txt/V16N03-03.txt


Error at "source" (line 1, column 1):
unexpected Tok "data/processed_tex/V16N03-04.tex" (line 1, column 1) (CtrlSeq "begin") "\\begin"
expecting end of input


Converted data/processed_tex/V16N03-04.tex to Markdown: data/txt/V16N03-04.txt
Converted data/processed_tex/V16N04-01.tex to Markdown: data/txt/V16N04-01.txt
Converted data/processed_tex/V16N04-02.tex to Markdown: data/txt/V16N04-02.txt
Converted data/processed_tex/V16N04-03.tex to Markdown: data/txt/V16N04-03.txt
Converted data/processed_tex/V16N04-04.tex to Markdown: data/txt/V16N04-04.txt
Converted data/processed_tex/V16N04-05.tex to Markdown: data/txt/V16N04-05.txt
Converted data/processed_tex/V16N05-01.tex to Markdown: data/txt/V16N05-01.txt
Converted data/processed_tex/V16N05-02.tex to Markdown: data/txt/V16N05-02.txt
Converted data/processed_tex/V16N05-03.tex to Markdown: data/txt/V16N05-03.txt
Converted data/processed_tex/V16N05-04.tex to Markdown: data/txt/V16N05-04.txt
Converted data/processed_tex/V17N01-01.tex to Markdown: data/txt/V17N01-01.txt
Converted data/processed_tex/V17N01-02.tex to Markdown: data/txt/V17N01-02.txt
Converted data/processed_tex/V17N01-03.tex to Markdo

Error at "source" (line 1, column 1):
unexpected Tok "data/processed_tex/V17N02-01.tex" (line 1, column 1) (CtrlSeq "begin") "\\begin"
expecting end of input


Converted data/processed_tex/V17N02-02.tex to Markdown: data/txt/V17N02-02.txt
Converted data/processed_tex/V17N02-03.tex to Markdown: data/txt/V17N02-03.txt
Converted data/processed_tex/V17N03-01.tex to Markdown: data/txt/V17N03-01.txt
Converted data/processed_tex/V17N03-02.tex to Markdown: data/txt/V17N03-02.txt
Converted data/processed_tex/V17N03-03.tex to Markdown: data/txt/V17N03-03.txt
Converted data/processed_tex/V17N03-04.tex to Markdown: data/txt/V17N03-04.txt
Converted data/processed_tex/V17N03-05.tex to Markdown: data/txt/V17N03-05.txt
Converted data/processed_tex/V17N03-06.tex to Markdown: data/txt/V17N03-06.txt
Converted data/processed_tex/V17N04-01.tex to Markdown: data/txt/V17N04-01.txt


Error at "source" (line 1, column 1):
unexpected Tok "data/processed_tex/V17N04-02.tex" (line 1, column 1) (CtrlSeq "begin") "\\begin"
expecting end of input
Error at "source" (line 1, column 1):
unexpected Tok "data/processed_tex/V17N04-04.tex" (line 1, column 1) (CtrlSeq "begin") "\\begin"
expecting end of input


Converted data/processed_tex/V17N04-02.tex to Markdown: data/txt/V17N04-02.txt
Converted data/processed_tex/V17N04-03.tex to Markdown: data/txt/V17N04-03.txt
Converted data/processed_tex/V17N04-04.tex to Markdown: data/txt/V17N04-04.txt
Converted data/processed_tex/V17N04-05.tex to Markdown: data/txt/V17N04-05.txt
Converted data/processed_tex/V17N04-06.tex to Markdown: data/txt/V17N04-06.txt
Converted data/processed_tex/V17N04-07.tex to Markdown: data/txt/V17N04-07.txt
Converted data/processed_tex/V17N04-08.tex to Markdown: data/txt/V17N04-08.txt
Converted data/processed_tex/V17N05-01.tex to Markdown: data/txt/V17N05-01.txt
Converted data/processed_tex/V17N05-02.tex to Markdown: data/txt/V17N05-02.txt
Converted data/processed_tex/V17N05-03.tex to Markdown: data/txt/V17N05-03.txt




Converted data/processed_tex/V18N01-01.tex to Markdown: data/txt/V18N01-01.txt
Converted data/processed_tex/V18N01-02.tex to Markdown: data/txt/V18N01-02.txt
Converted data/processed_tex/V18N02-01.tex to Markdown: data/txt/V18N02-01.txt
Converted data/processed_tex/V18N02-02.tex to Markdown: data/txt/V18N02-02.txt
Converted data/processed_tex/V18N02-03.tex to Markdown: data/txt/V18N02-03.txt
Converted data/processed_tex/V18N02-04.tex to Markdown: data/txt/V18N02-04.txt
Converted data/processed_tex/V18N02-05.tex to Markdown: data/txt/V18N02-05.txt
Converted data/processed_tex/V18N02-06.tex to Markdown: data/txt/V18N02-06.txt
Converted data/processed_tex/V18N03-01.tex to Markdown: data/txt/V18N03-01.txt
Converted data/processed_tex/V18N03-02.tex to Markdown: data/txt/V18N03-02.txt
Converted data/processed_tex/V18N03-03.tex to Markdown: data/txt/V18N03-03.txt
Converted data/processed_tex/V18N03-04.tex to Markdown: data/txt/V18N03-04.txt
Converted data/processed_tex/V18N04-01.tex to Markdo



Converted data/processed_tex/V20N02-09.tex to Markdown: data/txt/V20N02-09.txt
Converted data/processed_tex/V20N03-01.tex to Markdown: data/txt/V20N03-01.txt
Converted data/processed_tex/V20N03-02.tex to Markdown: data/txt/V20N03-02.txt
Converted data/processed_tex/V20N03-03.tex to Markdown: data/txt/V20N03-03.txt
Converted data/processed_tex/V20N03-04.tex to Markdown: data/txt/V20N03-04.txt
Converted data/processed_tex/V20N03-05.tex to Markdown: data/txt/V20N03-05.txt


Error at "source" (line 1, column 1):
unexpected Tok "data/processed_tex/V20N03-05.tex" (line 1, column 1) (CtrlSeq "begin") "\\begin"
expecting end of input


Converted data/processed_tex/V20N03-06.tex to Markdown: data/txt/V20N03-06.txt
Converted data/processed_tex/V20N03-07.tex to Markdown: data/txt/V20N03-07.txt
Converted data/processed_tex/V20N03-08.tex to Markdown: data/txt/V20N03-08.txt
Converted data/processed_tex/V20N04-01.tex to Markdown: data/txt/V20N04-01.txt
Converted data/processed_tex/V20N04-02.tex to Markdown: data/txt/V20N04-02.txt
Converted data/processed_tex/V20N04-03.tex to Markdown: data/txt/V20N04-03.txt
Converted data/processed_tex/V20N05-01.tex to Markdown: data/txt/V20N05-01.txt
Converted data/processed_tex/V20N05-02.tex to Markdown: data/txt/V20N05-02.txt


Error at "source" (line 1, column 1):
unexpected Tok "data/processed_tex/V20N05-01.tex" (line 1, column 1) (CtrlSeq "begin") "\\begin"
expecting end of input


Converted data/processed_tex/V20N05-03.tex to Markdown: data/txt/V20N05-03.txt
Converted data/processed_tex/V20N05-04.tex to Markdown: data/txt/V20N05-04.txt
Converted data/processed_tex/V20N05-05.tex to Markdown: data/txt/V20N05-05.txt
Converted data/processed_tex/V21N01-01.tex to Markdown: data/txt/V21N01-01.txt
Converted data/processed_tex/V21N01-02.tex to Markdown: data/txt/V21N01-02.txt
Converted data/processed_tex/V21N01-03.tex to Markdown: data/txt/V21N01-03.txt
Converted data/processed_tex/V21N01-04.tex to Markdown: data/txt/V21N01-04.txt
Converted data/processed_tex/V21N02-01.tex to Markdown: data/txt/V21N02-01.txt
Converted data/processed_tex/V21N02-02.tex to Markdown: data/txt/V21N02-02.txt


Error at "source" (line 1, column 1):
unexpected Tok "data/processed_tex/V21N02-01.tex" (line 1, column 1) (CtrlSeq "begin") "\\begin"
expecting end of input


Converted data/processed_tex/V21N02-03.tex to Markdown: data/txt/V21N02-03.txt
Converted data/processed_tex/V21N02-04.tex to Markdown: data/txt/V21N02-04.txt
Converted data/processed_tex/V21N02-05.tex to Markdown: data/txt/V21N02-05.txt
Converted data/processed_tex/V21N02-06.tex to Markdown: data/txt/V21N02-06.txt
Converted data/processed_tex/V21N02-07.tex to Markdown: data/txt/V21N02-07.txt
Converted data/processed_tex/V21N02-08.tex to Markdown: data/txt/V21N02-08.txt
Converted data/processed_tex/V21N02-09.tex to Markdown: data/txt/V21N02-09.txt
Converted data/processed_tex/V21N03-01.tex to Markdown: data/txt/V21N03-01.txt
Converted data/processed_tex/V21N03-02.tex to Markdown: data/txt/V21N03-02.txt
Converted data/processed_tex/V21N03-03.tex to Markdown: data/txt/V21N03-03.txt
Converted data/processed_tex/V21N03-04.tex to Markdown: data/txt/V21N03-04.txt
Converted data/processed_tex/V21N03-05.tex to Markdown: data/txt/V21N03-05.txt




Converted data/processed_tex/V21N03-06.tex to Markdown: data/txt/V21N03-06.txt
Converted data/processed_tex/V21N03-07.tex to Markdown: data/txt/V21N03-07.txt
Converted data/processed_tex/V21N04-01.tex to Markdown: data/txt/V21N04-01.txt
Converted data/processed_tex/V21N04-02.tex to Markdown: data/txt/V21N04-02.txt
Converted data/processed_tex/V21N04-03.tex to Markdown: data/txt/V21N04-03.txt


Error at "source" (line 1, column 1):
unexpected Tok "data/processed_tex/V21N04-03.tex" (line 1, column 1) (CtrlSeq "begin") "\\begin"
expecting end of input


Converted data/processed_tex/V21N04-04.tex to Markdown: data/txt/V21N04-04.txt
Converted data/processed_tex/V21N04-05.tex to Markdown: data/txt/V21N04-05.txt
Converted data/processed_tex/V21N04-06.tex to Markdown: data/txt/V21N04-06.txt


Error at "source" (line 1, column 1):
unexpected Tok "data/processed_tex/V21N04-05.tex" (line 1, column 1) (CtrlSeq "begin") "\\begin"
expecting end of input


Converted data/processed_tex/V21N04-07.tex to Markdown: data/txt/V21N04-07.txt
Converted data/processed_tex/V21N04-08.tex to Markdown: data/txt/V21N04-08.txt
Converted data/processed_tex/V21N04-09.tex to Markdown: data/txt/V21N04-09.txt


Error at "source" (line 1, column 1):
unexpected Tok "data/processed_tex/V21N04-08.tex" (line 1, column 1) (CtrlSeq "begin") "\\begin"
expecting end of input


Converted data/processed_tex/V21N04-10.tex to Markdown: data/txt/V21N04-10.txt
Converted data/processed_tex/V21N04-11.tex to Markdown: data/txt/V21N04-11.txt
Converted data/processed_tex/V21N04-12.tex to Markdown: data/txt/V21N04-12.txt


Error at "source" (line 1, column 1):
unexpected Tok "data/processed_tex/V21N04-11.tex" (line 1, column 1) (CtrlSeq "begin") "\\begin"
expecting end of input


Converted data/processed_tex/V21N05-01.tex to Markdown: data/txt/V21N05-01.txt
Converted data/processed_tex/V21N05-02.tex to Markdown: data/txt/V21N05-02.txt
Converted data/processed_tex/V21N05-03.tex to Markdown: data/txt/V21N05-03.txt
Converted data/processed_tex/V21N05-04.tex to Markdown: data/txt/V21N05-04.txt
Converted data/processed_tex/V21N06-01.tex to Markdown: data/txt/V21N06-01.txt
Converted data/processed_tex/V21N06-02.tex to Markdown: data/txt/V21N06-02.txt


Error at "source" (line 1, column 1):
unexpected Tok "data/processed_tex/V21N06-02.tex" (line 1, column 1) (CtrlSeq "begin") "\\begin"
expecting end of input


Converted data/processed_tex/V21N06-03.tex to Markdown: data/txt/V21N06-03.txt
Converted data/processed_tex/V21N06-04.tex to Markdown: data/txt/V21N06-04.txt
Converted data/processed_tex/V21N06-05.tex to Markdown: data/txt/V21N06-05.txt
Converted data/processed_tex/V22N01-01.tex to Markdown: data/txt/V22N01-01.txt
Converted data/processed_tex/V22N01-02.tex to Markdown: data/txt/V22N01-02.txt
Converted data/processed_tex/V22N02-01.tex to Markdown: data/txt/V22N02-01.txt
Converted data/processed_tex/V22N02-02.tex to Markdown: data/txt/V22N02-02.txt
Converted data/processed_tex/V22N03-01.tex to Markdown: data/txt/V22N03-01.txt
Converted data/processed_tex/V22N03-02.tex to Markdown: data/txt/V22N03-02.txt
Converted data/processed_tex/V22N03-03.tex to Markdown: data/txt/V22N03-03.txt
Converted data/processed_tex/V22N04-01.tex to Markdown: data/txt/V22N04-01.txt
Converted data/processed_tex/V22N04-02.tex to Markdown: data/txt/V22N04-02.txt
Converted data/processed_tex/V22N04-03.tex to Markdo



Converted data/processed_tex/V22N05-03.tex to Markdown: data/txt/V22N05-03.txt
Converted data/processed_tex/V22N05-04.tex to Markdown: data/txt/V22N05-04.txt
Converted data/processed_tex/V23N01-01.tex to Markdown: data/txt/V23N01-01.txt
Converted data/processed_tex/V23N01-02.tex to Markdown: data/txt/V23N01-02.txt
Converted data/processed_tex/V23N01-03.tex to Markdown: data/txt/V23N01-03.txt
Converted data/processed_tex/V23N01-04.tex to Markdown: data/txt/V23N01-04.txt
Converted data/processed_tex/V23N01-05.tex to Markdown: data/txt/V23N01-05.txt
Converted data/processed_tex/V23N02-01.tex to Markdown: data/txt/V23N02-01.txt
Converted data/processed_tex/V23N02-02.tex to Markdown: data/txt/V23N02-02.txt




Converted data/processed_tex/V23N03-01.tex to Markdown: data/txt/V23N03-01.txt
Converted data/processed_tex/V23N03-02.tex to Markdown: data/txt/V23N03-02.txt
Converted data/processed_tex/V23N03-03.tex to Markdown: data/txt/V23N03-03.txt
Converted data/processed_tex/V23N04-01.tex to Markdown: data/txt/V23N04-01.txt
Converted data/processed_tex/V23N04-02.tex to Markdown: data/txt/V23N04-02.txt


Error at "source" (line 1, column 1):
unexpected Tok "data/processed_tex/V23N04-01.tex" (line 1, column 1) (CtrlSeq "begin") "\\begin"
expecting end of input


Converted data/processed_tex/V23N05-01.tex to Markdown: data/txt/V23N05-01.txt
Converted data/processed_tex/V23N05-02.tex to Markdown: data/txt/V23N05-02.txt
Converted data/processed_tex/V23N05-03.tex to Markdown: data/txt/V23N05-03.txt
Converted data/processed_tex/V23N05-04.tex to Markdown: data/txt/V23N05-04.txt
Converted data/processed_tex/V23N05-05.tex to Markdown: data/txt/V23N05-05.txt
Converted data/processed_tex/V24N01-01.tex to Markdown: data/txt/V24N01-01.txt
Converted data/processed_tex/V24N01-02.tex to Markdown: data/txt/V24N01-02.txt
Converted data/processed_tex/V24N01-03.tex to Markdown: data/txt/V24N01-03.txt
Converted data/processed_tex/V24N01-04.tex to Markdown: data/txt/V24N01-04.txt




Converted data/processed_tex/V24N01-05.tex to Markdown: data/txt/V24N01-05.txt
Converted data/processed_tex/V24N01-06.tex to Markdown: data/txt/V24N01-06.txt
Converted data/processed_tex/V24N02-01.tex to Markdown: data/txt/V24N02-01.txt
Converted data/processed_tex/V24N02-02.tex to Markdown: data/txt/V24N02-02.txt
Converted data/processed_tex/V24N02-03.tex to Markdown: data/txt/V24N02-03.txt




Converted data/processed_tex/V24N02-04.tex to Markdown: data/txt/V24N02-04.txt
Converted data/processed_tex/V24N03-01.tex to Markdown: data/txt/V24N03-01.txt
Converted data/processed_tex/V24N03-02.tex to Markdown: data/txt/V24N03-02.txt
Converted data/processed_tex/V24N03-03.tex to Markdown: data/txt/V24N03-03.txt




Converted data/processed_tex/V24N03-04.tex to Markdown: data/txt/V24N03-04.txt
Converted data/processed_tex/V24N03-05.tex to Markdown: data/txt/V24N03-05.txt
Converted data/processed_tex/V24N03-06.tex to Markdown: data/txt/V24N03-06.txt
Converted data/processed_tex/V24N03-07.tex to Markdown: data/txt/V24N03-07.txt
Converted data/processed_tex/V24N03-08.tex to Markdown: data/txt/V24N03-08.txt
Converted data/processed_tex/V24N04-01.tex to Markdown: data/txt/V24N04-01.txt
Converted data/processed_tex/V24N04-02.tex to Markdown: data/txt/V24N04-02.txt
Converted data/processed_tex/V24N04-03.tex to Markdown: data/txt/V24N04-03.txt
Converted data/processed_tex/V24N04-04.tex to Markdown: data/txt/V24N04-04.txt




Converted data/processed_tex/V24N04-05.tex to Markdown: data/txt/V24N04-05.txt
Converted data/processed_tex/V24N05-01.tex to Markdown: data/txt/V24N05-01.txt
Converted data/processed_tex/V24N05-02.tex to Markdown: data/txt/V24N05-02.txt
Converted data/processed_tex/V24N05-03.tex to Markdown: data/txt/V24N05-03.txt
Converted data/processed_tex/V24N05-04.tex to Markdown: data/txt/V24N05-04.txt
Converted data/processed_tex/V25N01-01.tex to Markdown: data/txt/V25N01-01.txt
Converted data/processed_tex/V25N01-02.tex to Markdown: data/txt/V25N01-02.txt
Converted data/processed_tex/V25N01-03.tex to Markdown: data/txt/V25N01-03.txt
Converted data/processed_tex/V25N01-04.tex to Markdown: data/txt/V25N01-04.txt
Converted data/processed_tex/V25N01-05.tex to Markdown: data/txt/V25N01-05.txt




Converted data/processed_tex/V25N02-01.tex to Markdown: data/txt/V25N02-01.txt
Converted data/processed_tex/V25N02-02.tex to Markdown: data/txt/V25N02-02.txt
Converted data/processed_tex/V25N02-03.tex to Markdown: data/txt/V25N02-03.txt
Converted data/processed_tex/V25N03-01.tex to Markdown: data/txt/V25N03-01.txt




Converted data/processed_tex/V25N03-02.tex to Markdown: data/txt/V25N03-02.txt
Converted data/processed_tex/V25N04-01.tex to Markdown: data/txt/V25N04-01.txt
Converted data/processed_tex/V25N04-02.tex to Markdown: data/txt/V25N04-02.txt
Converted data/processed_tex/V25N04-03.tex to Markdown: data/txt/V25N04-03.txt
Converted data/processed_tex/V25N04-04.tex to Markdown: data/txt/V25N04-04.txt
Converted data/processed_tex/V25N04-05.tex to Markdown: data/txt/V25N04-05.txt
Converted data/processed_tex/V25N04-06.tex to Markdown: data/txt/V25N04-06.txt
Converted data/processed_tex/V25N05-01.tex to Markdown: data/txt/V25N05-01.txt
Converted data/processed_tex/V25N05-02.tex to Markdown: data/txt/V25N05-02.txt
Converted data/processed_tex/V25N05-03.tex to Markdown: data/txt/V25N05-03.txt
Converted data/processed_tex/V25N05-04.tex to Markdown: data/txt/V25N05-04.txt
Converted data/processed_tex/V25N05-05.tex to Markdown: data/txt/V25N05-05.txt
Converted data/processed_tex/V25N05-06.tex to Markdo



Converted data/processed_tex/V26N01-04.tex to Markdown: data/txt/V26N01-04.txt
Converted data/processed_tex/V26N01-05.tex to Markdown: data/txt/V26N01-05.txt
Converted data/processed_tex/V26N01-06.tex to Markdown: data/txt/V26N01-06.txt




Converted data/processed_tex/V26N01-07.tex to Markdown: data/txt/V26N01-07.txt
Converted data/processed_tex/V26N01-08.tex to Markdown: data/txt/V26N01-08.txt
Converted data/processed_tex/V26N01-09.tex to Markdown: data/txt/V26N01-09.txt
Converted data/processed_tex/V26N02-01.tex to Markdown: data/txt/V26N02-01.txt
Converted data/processed_tex/V26N02-02.tex to Markdown: data/txt/V26N02-02.txt
Converted data/processed_tex/V26N02-03.tex to Markdown: data/txt/V26N02-03.txt
Converted data/processed_tex/V26N02-04.tex to Markdown: data/txt/V26N02-04.txt


Error at "source" (line 1, column 1):
unexpected Tok "data/processed_tex/V26N02-03.tex" (line 1, column 1) (CtrlSeq "begin") "\\begin"
expecting end of input


Converted data/processed_tex/V26N02-05.tex to Markdown: data/txt/V26N02-05.txt
Converted data/processed_tex/V26N02-06.tex to Markdown: data/txt/V26N02-06.txt




Converted data/processed_tex/V26N02-07.tex to Markdown: data/txt/V26N02-07.txt
Converted data/processed_tex/V26N02-08.tex to Markdown: data/txt/V26N02-08.txt
Converted data/processed_tex/V26N02-09.tex to Markdown: data/txt/V26N02-09.txt




Converted data/processed_tex/V26N03-01.tex to Markdown: data/txt/V26N03-01.txt
Converted data/processed_tex/V26N03-02.tex to Markdown: data/txt/V26N03-02.txt
Converted data/processed_tex/V26N03-03.tex to Markdown: data/txt/V26N03-03.txt
Converted data/processed_tex/V26N03-04.tex to Markdown: data/txt/V26N03-04.txt
Converted data/processed_tex/V26N04-01.tex to Markdown: data/txt/V26N04-01.txt
Converted data/processed_tex/V26N04-02.tex to Markdown: data/txt/V26N04-02.txt


Error at "source" (line 1, column 1):
unexpected Tok "data/processed_tex/V27N01-01.tex" (line 1, column 1) (CtrlSeq "begin") "\\begin"
expecting end of input
Error at "source" (line 1, column 1):
unexpected Tok "data/processed_tex/V27N01-02.tex" (line 1, column 1) (CtrlSeq "begin") "\\begin"
expecting end of input


Converted data/processed_tex/V26N04-03.tex to Markdown: data/txt/V26N04-03.txt
Converted data/processed_tex/V27N01-01.tex to Markdown: data/txt/V27N01-01.txt
Converted data/processed_tex/V27N01-02.tex to Markdown: data/txt/V27N01-02.txt
Converted data/processed_tex/V27N01-05.tex to Markdown: data/txt/V27N01-05.txt


Error at "source" (line 1, column 1):
unexpected Tok "data/processed_tex/V27N01-05.tex" (line 1, column 1) (CtrlSeq "begin") "\\begin"
expecting end of input


In [7]:
import pandas as pd

df = pd.read_csv('./data/latex_info.csv')
df.head()

Unnamed: 0,file_name,title,etitle,jabstract,eabstract,section_names,sec_intro,sec_method,sec_result,sec_conclusion,abs_intro,abs_method,abs_result,abs_conclusion
0,V01N01-01.tex,表層表現中の情報に基づく文章構造の自動抽出,Automatic Detection of Discourse Structure by ...,テキストや談話を理解するためには，まずその文章構造を理解する必要があ\nる．文章構造に関する...,"To understand a text or dialogue, one must tra...","['はじめに', '文章構造のモデルと結束関係', '文章構造の自動抽出', '実験と考察'...",,,,,,,,
1,V01N01-02.tex,,A Comparative Study of Automatic Extraction of...,,While corpus-based studies are now becoming a ...,"['Introduction', 'Importance of Collocational ...",,,,,,,,
2,V01N01-03.tex,並列構造の検出に基づく長い日本語文の構文解析,A Syntactic Analysis Method of Long Japanese S...,従来の構文解析法は十分な精度の解析結果を得ることができず，とくに\n長い文の解析が困難であっ...,Conventional parsing methods can not analyze l...,"['はじめに', '並列構造の検出と文の簡単化', '係り受け解析', '文解析の結果とその...",,,,,,,,
3,V01N01-04.tex,,A System for Finding Translation Patterns by C...,,When the example-based approach is used for ma...,"['Introduction', 'System Overview', 'Example-B...",,,,,,,,
4,V02N01-01.tex,日英機械翻訳における利用者登録語の意味属性の自動推定,Automatic Determination of Semantic Attributes...,"機械翻訳システムを使用して現実の文書を翻訳する場合, 通常, \n翻訳対象文書に合った利用者...",User dictionaries are important for practical ...,"['はじめに', 'システム辞書と利用者辞書', '意味属性推定の方法', '意味属性推定精...",,,,,,,,


In [8]:
df.to_csv('NLP_JP_CORPUS.csv', index=False, encoding='utf-8-sig')

In [19]:
df['etitle'][3]

'A System for Finding Translation Patterns by Comparing an MT Result and Its Correction'

In [44]:
import pandas as pd

pd.read_csv('./test.csv')

Unnamed: 0,file_name,language,title,etitle,jabstract,eabstract,section_names,sec_intro,sec_method,sec_result,sec_conclusion,abs_intro,abs_method,abs_result,abs_conclusion
0,V01N01-01.tex,jp,表層表現中の情報に基づく文章構造の自動抽出,Automatic Detection of Discourse Structure by ...,テキストや談話を理解するためには，まずその文章構造を理解する必要があ\nる．文章構造に関する...,"To understand a text or dialogue, one must tra...","['はじめに', '文章構造のモデルと結束関係', '文章構造の自動抽出', '実験と考察'...",,,,,,,,
1,V01N01-02.tex,en,,A Comparative Study of Automatic Extraction of...,,While corpus-based studies are now becoming a ...,"['Introduction', 'Importance of Collocational ...",,,,,,,,
2,V01N01-03.tex,jp,並列構造の検出に基づく長い日本語文の構文解析,A Syntactic Analysis Method of Long Japanese S...,従来の構文解析法は十分な精度の解析結果を得ることができず，とくに長い文の解析が困難であった．...,Conventional parsing methods can not analyze l...,"['はじめに', '並列構造の検出と文の簡単化', '係り受け解析', '文解析の結果とその...",,,,,,,,
3,V01N01-04.tex,en,,A System for Finding Translation Patterns by C...,,When the example-based approach is used for ma...,"['Introduction', 'System Overview', 'Example-B...",,,,,,,,
4,V02N01-01.tex,jp,日英機械翻訳における利用者登録語の意味属性の自動推定,Automatic Determination of Semantic Attributes...,"機械翻訳システムを使用して現実の文書を翻訳する場合, 通常, \n翻訳対象文書に合った利用者...",User dictionaries are important for practical ...,"['はじめに', 'システム辞書と利用者辞書', '意味属性推定の方法', '意味属性推定精...",,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
624,V26N03-03.tex,jp,事前学習された文の分散表現を用いた機械翻訳の自動評価,Metric for Automatic Machine Translation Evalu...,本稿では，参照文を用いた文単位での機械翻訳自動評価手法について述べる．現在のデファクトスタン...,This study describes a segment-level metric fo...,"['はじめに', '関連研究', '事前学習された文の分散表現を用いた機械翻訳の自動評価',...",,,,,,,,
625,V26N03-04.tex,jp,単語埋め込みに基づくサプライザル,Surprisal through Word Embeddings,ヒトの文処理のモデル化として Hale によりサプライザルが提案されている．サプライザルは文...,The concept of surprisal was proposed by Hale ...,"['はじめに', '前提', '分析手法', '結果と考察', 'おわりに', '分析結果（...",,,,,,,,
626,V26N04-01.tex,jp,複単語表現を考慮した依存構造コーパスの構築と解析,Construction and Analysis of Multiword Express...,複単語表現 (MWE) は統語的または意味的な非構成性を有する複数の単語からなるまとまりであ...,Multiword expressions (MWEs) consist of multip...,"['はじめに', 'MWEを考慮した依存構造コーパスの構築', '連続MWEを考慮した依存構...",,,,,,,,
627,V26N04-02.tex,jp,多義語分散表現の文脈化,Contextualized Multi-Sense Word Embedding,近年，多くの自然言語処理タスクにおいて単語分散表現が利用されている．しかし，各単語に 1 つ...,\footnote[0]{本論文の内容の一部は，The 32nd Pacific Asia ...,"['はじめに', '関連研究', '提案手法', '実験設定', '文脈中での単語間の意味的...",,,,,,,,
