# Module Import

In [None]:
from chemdataextractor import Document
from chemdataextractor.reader.elsevier import ElsevierXmlReader
from chemdataextractor.parse import R, I, join, AutoSentenceParser, AutoTableParser
from chemdataextractor.model.units import TemperatureModel
from chemdataextractor.model import StringType, ModelType, Compound

import os
import pandas as pd

# Property Model

In [None]:
class MeltingPoint(TemperatureModel):
    """ A melting point measurement """
    
    specifier_expression = (
        R("[Mm]elting") + R("point(s?)")
        | R("[Mm]elting") + R("temperature(s?)")
        | I("Tm")
        | I("mp")
        | I("m.p")
        | I("m.p.")
        | I("MP")
        | I("M.P")
        | I("M.P.")
    ).add_action(join)
    
    specifier = StringType(parse_expression=specifier_expression, required=True)
    compound = ModelType(Compound, contextual=True, required=True)
    parsers = [AutoSentenceParser(),  AutoTableParser()]

# Auto Extraction

In [None]:
folder = 'metadata/Article'
file_list = os.listdir(folder)

In [None]:
records = []

for i in range(len(file_list)):
    article = file_list[i]
    print(article)
    
    try:
        f = open(folder + "/" + article, 'rb')
        doc = Document.from_file(f, readers=[ElsevierXmlReader()])

        extraction_model = MeltingPoint
        doc.models = [extraction_model]
        
        try:
            metadata = doc.metadata.serialize()
        except:
            metadata = "Not Found"

        parsed_information = doc.records.serialize()

        for j in parsed_information:
            if "Compound" in j:
                continue
            j["Article Metadata"] = metadata
            j["Extraction Model"] = extraction_model
            records.append(j)
            
    except:
        print('Unable to read document')

In [None]:
Auto_Extraction_Records = pd.DataFrame(records)

Auto_Extraction_Records.to_csv("elsevier_joc.csv", encoding='utf-8-sig')