In [236]:
from bs4 import BeautifulSoup
import requests
import pandas as pd


In [237]:
def parse_lipid_from_charmm_gui() -> pd.core.frame.DataFrame:
    """Parses lipid information from the CHARMM-GUI website and saves it as a CSV file.

    Returns
    -------
        pd.core.frame.DataFrame
            A DataFrame containing the lipid information with columns:
                - "Name": The name of the lipid.
                - "Alias": The alias of the lipid extracted from the download link.
                - "Category": The category under which the lipid is listed.
                - "Link": The download link for the lipid file.
    """

    url = "https://www.charmm-gui.org/?doc=archive&lib=lipid"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")
    
    base_url = "https://www.charmm-gui.org"
    col_names = ["Name", "Alias", "Category", "Link"]
    recordings = []
        
    rows = soup.find_all('div', id='main')
    for row in rows:
        for div in row.find_all("div"):
            for title in div.find_all("b"):
                for ul in div.find_all("ul"):
                    for li in ul.find_all('li'):                    
                        sterol_name = li.find("span").get_text().split()[0]
                        
                        download_link = base_url + li.find("a", href=True)["href"]
                        alias = download_link.split("/")[-1]
                        alias = alias.split('.')[0]
                        
                        recording = {"Name": sterol_name, 
                                     "Alias": alias, 
                                     "Category": title.get_text().strip(),
                                     "Link": download_link }
                        recordings.append(recording)
        
    df = pd.DataFrame(recordings)
    df.to_csv('lipid_CHARMM_GUI.csv', sep=';', index=False, header=True, columns=col_names)
    return df

In [238]:
lipid_list = parse_lipid_from_charmm_gui()
lipid_list

Unnamed: 0,Name,Alias,Category,Link
0,CHOLESTEROL,chl1,Sterols,https://www.charmm-gui.orgarchive/lipid/chl1.t...
1,ERGOSTEROL,erg,Sterols,https://www.charmm-gui.orgarchive/lipid/erg.ta...
2,DPOP,dpop,Sterols,https://www.charmm-gui.orgarchive/lipid/dpop.t...
3,β-SITOSTEROL,sito,Sterols,https://www.charmm-gui.orgarchive/lipid/sito.t...
4,STIGMASTEROL,stig,Sterols,https://www.charmm-gui.orgarchive/lipid/stig.t...
...,...,...,...,...
746,BDTM,bdtm,Thio Maltosides,https://www.charmm-gui.orgarchive/lipid/bdtm.t...
747,AUDTM,audtm,Thio Maltosides,https://www.charmm-gui.orgarchive/lipid/audtm....
748,BUDTM,budtm,Thio Maltosides,https://www.charmm-gui.orgarchive/lipid/budtm....
749,ADDTM,addtm,Thio Maltosides,https://www.charmm-gui.orgarchive/lipid/addtm....
