In [2]:
import requests
import bs4
import os
import sympy
from sympy import symbols, integrate
import google.generativeai as ai

class EquationScraper():
    def __init__(self, doi, api_key):
        self.doi = doi
        self.api_key = api_key
        self.dataset = []

    def make_request(self):
        response = requests.get(f"https://api.elsevier.com/content/article/doi/{self.doi}?APIKey={self.api_key}")
        if response.status_code == 200:
            print("Request successful \n")
            self.full_text = response.text 
        else:
            raise Exception(f"Request failed with status code: {response.status_code} \n")

    def make_soup(self):
        self.soup = bs4.BeautifulSoup(self.full_text, "lxml")

    def find_equations(self):
        self.equation = self.soup.find_all("ce:formula")
        equation_dict = {}
        for eq in self.equation:
            label = eq.find("ce:label")
            cleaned_label = int(label.text[1:-1])
            equation_dict[cleaned_label] = eq.find("mml:math")
        self.mathml_dict = equation_dict
    
    def mathml_to_python(self, mathml):
        ai.configure(api_key=os.getenv("API_KEY"))
        model = ai.GenerativeModel("gemini-1.5-flash")
        query = f"""You are a programming expert and have been tasked with converting an equation from MathML to Python.
        Return the equation in well formatted Python code, using the sympy library if necessary for complex equations. Do not return
        anything other than the equation and do not include the word Python.
        The equation you need to convert is: {mathml}"""
        response = model.generate_content(query)
        return response
    
    def append_dataset(self, mathml, python):
        self.dataset.append({"mathml_input": mathml, "python_output": python})


def main():     
    doi = "10.1016/j.triboint.2021.106934"
    api_key = os.getenv("ELSEVIER_API_KEY")
    scraper = EquationScraper(doi, api_key)
    scraper.make_request()
    scraper.make_soup()
    scraper.find_equations()
    for eq in range(1, len(scraper.mathml_dict)+1):
        mathml = scraper.mathml_dict[eq]
        python = scraper.mathml_to_python(mathml)
        scraper.append_dataset(str(mathml), python.text.split('```')[1].strip())
    print(scraper.dataset)
    save_path = "equations.txt"
    with open(save_path, "w") as f:
        for item in scraper.dataset:
            f.write(f"MathML: {item['mathml_input']}\nPython: {item['python_output']}\n\n")

if __name__ == "__main__":
    main()

  from .autonotebook import tqdm as notebook_tqdm


Exception: Request failed with status code: 401 


In [None]:
"""<ce:formula id="eq0010">
<ce:label>(1)</ce:label>
<mml:math altimg="si4.gif" overflow="scroll">
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mi>g</mml:mi>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</ce:formula>"""