**Spring Boot**

In [None]:
import requests
from bs4 import BeautifulSoup


url = "https://docs.spring.io/spring-boot/appendix/application-properties/index.html"
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

output_file = "../data/technology/spring_boot.properties"
properties = []

# Iterate over all tables containing properties
for table in soup.select('tbody'):
    for row in table.select('tr'):
        columns = row.find_all('td')
        if len(columns) >= 2:
            option = columns[0].get_text(strip=True)
            default = columns[2].get_text(strip=True)
            # Skip entries that are empty or malformed
            if option:
                properties.append((option, default))


with open(output_file, 'w', encoding='utf-8') as f:
    for option, default in properties:
        sanitized_default = default.replace('\u00A0', ' ').strip()
        f.write(f"{option}={sanitized_default}\n" if sanitized_default else f"{option}=\n")

**Cypress**

In [None]:
import requests
from bs4 import BeautifulSoup


url = "https://docs.cypress.io/app/references/configuration"
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

output_file = "../data/technology/cypress.properties"
properties = []

# Iterate over all tables containing properties
for table in soup.select('tbody'):
    for row in table.select('tr'):
        columns = row.find_all('td')
        if len(columns) >= 2:
            option = columns[0].get_text(strip=True)
            default = columns[1].get_text(strip=True)
            # Skip entries that are empty or malformed
            if option:
                properties.append((option, default))


with open(output_file, 'w', encoding='utf-8') as f:
    for option, default in properties:
        f.write(f"{option}={default}\n")

**MySQL**

In [None]:
import requests
from bs4 import BeautifulSoup


url = "https://dev.mysql.com/doc/refman/8.4/en/server-system-variables.html"
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

output_file = "../data/technology/mysql.properties"
properties = []

properties = []
for list in soup.select("ul"):  # Skip header
    for li in list.select("li"):
        option = li.find("code", class_="literal")
        if option:
            option_name = option.get_text(strip=True)
            default_value= ""

            table = li.find("table")
            if table:
                for row in table.select("tr"):
                    th = row.find("th")
                    td = row.find("td")
                    if th and td and th.get_text(strip=True) == "Default Value":
                        code = td.find("code", class_="literal")
                        default_value = code.get_text(strip=True) if code else td.get_text(strip=True)

                properties.append((option_name, default_value))

with open(output_file, 'w', encoding='utf-8') as f:
    for option, default in properties:
        f.write(f"{option}={default}\n")

**GitHub Action**

In [None]:
import requests
from bs4 import BeautifulSoup

url = "https://docs.github.com/en/actions/writing-workflows/workflow-syntax-for-github-actions"
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

output_file = "../data/technology/github_action.properties"
properties = []

list = soup.find_all("ul", class_="List__ListBox-sc-1x7olzq-0 hgjakc")[1]
for entry in list.find_all("li"):
    option = entry.get_text(strip=True)
    default_value = ""
    properties.append((option, default_value))

with open(output_file, 'w', encoding='utf-8') as f:
    for option, default in properties:
        f.write(f"{option}={default}\n")

**Gradle**

In [None]:
import requests
from bs4 import BeautifulSoup

url = "https://docs.gradle.org/current/userguide/build_environment.html"
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

output_file = "../data/technology/gradle.properties"
properties = []

for dlist in soup.find_all("div", class_="dlist"):
    dl = dlist.find("dl")
    for dt in dl.find_all("dt"):
        option = dt.get_text(strip=True)
        if "=" in option:
            option = option.split("=")[0].strip()
            properties.append((option, default_value))

with open(output_file, 'w', encoding='utf-8') as f:
    for option, default in properties:
        f.write(f"{option}={default}\n")

**TSconfig**

In [None]:
import requests
from bs4 import BeautifulSoup

url = "https://www.typescriptlang.org/tsconfig"
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

output_file = "../data/technology/tsconfig.properties"
properties = []

# All config option links point to IDs on the page
for compiler_option in soup.find_all("section", class_="compiler-option"):
    option = compiler_option.find("code").get_text(strip=True)
    default_section = compiler_option.find("ul", class_="compiler-option-md")
    if default_section:
        text = default_section.get_text(strip=True)
        if "Default:" in text:
            default_value = default_section.find("code")
            if default_value:
                default_value = default_value.get_text(strip=True)
        else:
            default_value = ""
    else:
        default_value = ""
    
    properties.append((option, default_value))

with open(output_file, 'w', encoding='utf-8') as f:
    for option, default in properties:
        f.write(f"{option}={default}\n")

**MongoDB**

In [52]:
import requests
from bs4 import BeautifulSoup
import re

url = "https://www.mongodb.com/docs/manual/reference/configuration-options/"
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

output_file = "../data/technology/mongodb.properties"
properties = []

# All config option links point to IDs on the page
for settings in soup.find_all("dl", class_="setting"):
    dt = settings.find("dt")
    dd = settings.find("dd")
    option = dt.find("code").get_text(strip=True)
    default_value = ""

    dd_text = dd.get_text(strip=True)
    for p in dd.find_all("p"):
        p_text = p.get_text(strip=True)
        if "Default:" in p.get_text():
            default_value = p_text.split("Default:")[-1].strip()
        
    
    properties.append((option, default_value))

with open(output_file, 'w', encoding='utf-8') as f:
    for option, default in properties:
        f.write(f"{option}={default}\n")
 

**Alluxio**

In [7]:
import requests
from bs4 import BeautifulSoup
import re

url = "https://docs.alluxio.io/os/user/stable/en/reference/Properties-List.html"
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

output_file = "../data/technology/alluxio.properties"
properties = []

for table_row in soup.find_all("tr"):
    values = table_row.find_all("td")
    
    if len(values) == 3:
        option = values[0].get_text(strip=True)
        default_value = values[1].get_text(strip=True)
        properties.append((option, default_value))

with open(output_file, 'w', encoding='utf-8') as f:
    for option, default in properties:
        f.write(f"{option}={default}\n")

**Angular**

In [13]:
import requests
from bs4 import BeautifulSoup
import re

url = "https://angular.dev/reference/configs/workspace-config"
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

output_file = "../data/technology/angular.properties"
properties = []

for table_row in soup.find_all("tr"):
    values = table_row.find_all("td")

    if len(values) == 2:
        option = values[0].get_text(strip=True)
        default_value = ""
        properties.append((option, default_value))

    if len(values) == 4:
        option = values[0].get_text(strip=True)
        default_value = values[3].get_text(strip=True)
        properties.append((option, default_value))


with open(output_file, 'w', encoding='utf-8') as f:
    for option, default in properties:
        f.write(f"{option}={default}\n")

**Circle CI**

In [32]:
import requests
from bs4 import BeautifulSoup
import re

url = "https://circleci.com/docs/configuration-reference/"
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

output_file = "../data/technology/circleci.properties"
properties = []

for table_data in soup.find_all("td"):
    option = table_data.find("p")
    if option:
        option_name = option.get_text(strip=True)
        default_value = ""
        properties.append((option_name, default_value))


with open(output_file, 'w', encoding='utf-8') as f:
    for option, default in properties:
        f.write(f"{option}={default}\n")

**Elasticsearch**

In [17]:


import requests
from bs4 import BeautifulSoup
import re

urls = [
    "https://www.elastic.co/docs/reference/elasticsearch/configuration-reference/circuit-breaker-settings",
    "https://www.elastic.co/docs/reference/elasticsearch/configuration-reference/auding-settings",
    "https://www.elastic.co/docs/reference/elasticsearch/configuration-reference/enrich-settings",
    "https://www.elastic.co/docs/reference/elasticsearch/configuration-reference/cluster-level-shard-allocation-routing-settings",
    "https://www.elastic.co/docs/reference/elasticsearch/configuration-reference/miscellaneous-cluster-settings",
    "https://www.elastic.co/docs/reference/elasticsearch/configuration-reference/cross-cluster-replication-settings",
    "https://www.elastic.co/docs/reference/elasticsearch/configuration-reference/discovery-cluster-formation-settings",
    "https://www.elastic.co/docs/reference/elasticsearch/configuration-reference/field-data-cache-settings",
    "https://www.elastic.co/docs/reference/elasticsearch/configuration-reference/health-diagnostic-settings",
    "https://www.elastic.co/docs/reference/elasticsearch/configuration-reference/index-lifecycle-management-settings",
    "https://www.elastic.co/docs/reference/elasticsearch/configuration-reference/data-stream-lifecycle-settings",
    "https://www.elastic.co/docs/reference/elasticsearch/configuration-reference/index-management-settings",
    "https://www.elastic.co/docs/reference/elasticsearch/configuration-reference/index-recovery-settings",
    "https://www.elastic.co/docs/reference/elasticsearch/configuration-reference/indexing-buffer-settings",
    "https://www.elastic.co/docs/reference/elasticsearch/configuration-reference/license-settings",
    "https://www.elastic.co/docs/reference/elasticsearch/configuration-reference/local-gateway",
    "https://www.elastic.co/docs/reference/elasticsearch/configuration-reference/machine-learning-settings",
    "https://www.elastic.co/docs/reference/elasticsearch/configuration-reference/inference-settings",
    "https://www.elastic.co/docs/reference/elasticsearch/configuration-reference/monitoring-settings",
    "https://www.elastic.co/docs/reference/elasticsearch/configuration-reference/node-settings",
    "https://www.elastic.co/docs/reference/elasticsearch/configuration-reference/networking-settings",
    "https://www.elastic.co/docs/reference/elasticsearch/configuration-reference/node-query-cache-settings",
    "https://www.elastic.co/docs/reference/elasticsearch/configuration-reference/search-settings",
    "https://www.elastic.co/docs/reference/elasticsearch/configuration-reference/security-settings",
    "https://www.elastic.co/docs/reference/elasticsearch/configuration-reference/shard-request-cache-settings",
    "https://www.elastic.co/docs/reference/elasticsearch/configuration-reference/snapshot-restore-settings",
    "https://www.elastic.co/docs/reference/elasticsearch/configuration-reference/transforms-settings",
    "https://www.elastic.co/docs/reference/elasticsearch/configuration-reference/thread-pool-settings",
    "https://www.elastic.co/docs/reference/elasticsearch/configuration-reference/watcher-settings",
]

output_file = "../data/technology/elastisearch.properties"
properties = []

for url in urls:
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    dl_blocks = soup.find_all("dl")
    for dl in dl_blocks:
        dt_blocks= dl.find_all("dt")
        for dt in dt_blocks:
            code_blocks = dt.find_all("code")
            for code_block in code_blocks:
                option = code_block.get_text().strip()
                if "." in option and not option.startswith("http"):
                    properties.append((option, ""))


with open(output_file, 'w', encoding='utf-8') as f:
    for option, default in properties:
        f.write(f"{option}={default}\n")

**Haddop Common**

In [50]:
import requests
import xml.etree.ElementTree as ET

url = "https://hadoop.apache.org/docs/r2.8.0/hadoop-project-dist/hadoop-common/core-default.xml"
output_file = "../data/technology/hadoop_common.properties"

# Download XML
response = requests.get(url)
response.raise_for_status()
root = ET.fromstring(response.content)

# Parse and write properties safely
with open(output_file, "w") as f:
    for prop in root.findall("property"):
        name_elem = prop.find("name")
        value_elem = prop.find("value")

        name = name_elem.text.strip() if name_elem is not None and name_elem.text else None
        value = value_elem.text.strip() if value_elem is not None and value_elem.text else None

        if name is not None and value is not None:
            f.write(f"{name}={value}\n")

        if name is not None and value is None:
            f.write(f"{name}=""\n")

print(f"Properties saved to {output_file}")



Properties saved to ../data/technology/hadoop_common.properties


**Hadoop HDFS**

In [51]:
import requests
import xml.etree.ElementTree as ET

url = "https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/hdfs-default.xml"
output_file = "../data/technology/hadoop_hdfs.properties"

# Download XML
response = requests.get(url)
response.raise_for_status()
root = ET.fromstring(response.content)

# Parse and write properties safely
with open(output_file, "w") as f:
    for prop in root.findall("property"):
        name_elem = prop.find("name")
        value_elem = prop.find("value")

        name = name_elem.text.strip() if name_elem is not None and name_elem.text else None
        value = value_elem.text.strip() if value_elem is not None and value_elem.text else None

        if name is not None and value is not None:
            f.write(f"{name}={value}\n")

        if name is not None and value is None:
            f.write(f"{name}=""\n")

print(f"Properties saved to {output_file}")


Properties saved to ../data/technology/hadoop_hdfs.properties


**MapReduce**

In [52]:
import requests
import xml.etree.ElementTree as ET

url = "https://hadoop.apache.org/docs/r2.7.1/hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml"
output_file = "../data/technology/mapreduce.properties"

# Download XML
response = requests.get(url)
response.raise_for_status()
root = ET.fromstring(response.content)

# Parse and write properties safely
with open(output_file, "w") as f:
    for prop in root.findall("property"):
        name_elem = prop.find("name")
        value_elem = prop.find("value")

        name = name_elem.text.strip() if name_elem is not None and name_elem.text else None
        value = value_elem.text.strip() if value_elem is not None and value_elem.text else None

        if name is not None and value is not None:
            f.write(f"{name}={value}\n")

        if name is not None and value is None:
            f.write(f"{name}=""\n")

print(f"Properties saved to {output_file}")

Properties saved to ../data/technology/mapreduce.properties


**PHP**

In [54]:

import requests
from bs4 import BeautifulSoup
import re

url = "https://www.php.net/manual/en/ini.list.php"
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

output_file = "../data/technology/php.properties"
properties = []

table_body = soup.find("tbody", class_="tbody")  

for table_row in table_body.find_all("tr")[1:]:
    table_row_data = table_row.find_all("td")
    option = table_row_data[0].get_text(strip=True)
    default_value = table_row_data[1].get_text(strip=True).replace('"', '')
    properties.append((option, default_value))

with open(output_file, 'w', encoding='utf-8') as f:
    for option, default in properties:
        f.write(f"{option}={default}\n")

**PostgreSQL**

Parsing https://www.elastic.co
Extracted 0 parameters.


**RabbitMQ**

In [62]:

import requests
from bs4 import BeautifulSoup
import re

url = "https://www.rabbitmq.com/docs/configure"
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

output_file = "../data/technology/rabbitmq.properties"
properties = []

tables = soup.find_all("table", class_="name-description")  

for table in tables:
    for table_row in table.find_all("tr"):
        table_row_data= table_row.find_all("td")
        if len(table_row_data) == 2:
            option = table_row_data[0].get_text(strip=True)
            default_value = ""
            properties.append((option, default_value))
        


        #option = table_row_data[0].get_text(strip=True)
        #default_value = ""
        #properties.append((option, default_value))

with open(output_file, 'w', encoding='utf-8') as f:
    for option, default in properties:
        f.write(f"{option}={default}\n")

**YARN**

In [63]:
import requests
import xml.etree.ElementTree as ET

url = "https://hadoop.apache.org/docs/r2.7.3/hadoop-yarn/hadoop-yarn-common/yarn-default.xml"
output_file = "../data/technology/yarn.properties"

# Download XML
response = requests.get(url)
response.raise_for_status()
root = ET.fromstring(response.content)

# Parse and write properties safely
with open(output_file, "w") as f:
    for prop in root.findall("property"):
        name_elem = prop.find("name")
        value_elem = prop.find("value")

        name = name_elem.text.strip() if name_elem is not None and name_elem.text else None
        value = value_elem.text.strip() if value_elem is not None and value_elem.text else None

        if name is not None and value is not None:
            f.write(f"{name}={value}\n")

        if name is not None and value is None:
            f.write(f"{name}=""\n")

print(f"Properties saved to {output_file}")

Properties saved to ../data/technology/yarn.properties


Techologies processed:

- spring boot (not complete)
- cypress (complete)
- mysql (only server options)
- github action (complete)
- gradle 
- tsconfig (complete)
- mongodb (complete)
- alluxio (complete)
- android (complete, but manually)
- angular 
- circle ci (complete)
- elasticsearch (complete)
- hadoop common (complete)
- hadoop hdfs (complete)
- mapreduce (complete)
- php (complete)
- postgresql (complete)
- rabbitmq (complete)
- yarn (complete)
- ansible (complete)
- kubernetes (not complete)
- maven (complete)
- apache webserver (complete)

Technologies no processed yet:

- zookeeper
- ansible playbook
- gradle wrapper
- maven wrapper
- hadoop hbase
- kafka
- netlify
- nginx
- redis

**Kubernetes**

In [9]:
import requests, re
from bs4 import BeautifulSoup
from urllib.parse import urljoin

INDEX_URL = "https://kubernetes.io/docs/reference/config-api/"

def get_config_api_urls():
    r = requests.get(INDEX_URL)
    soup = BeautifulSoup(r.text, "html.parser")
    return { urljoin(INDEX_URL, a['href'])
             for a in soup.select("ul li a[href*='config-api/']") }

def extract_fields(url):
    print("Parsing", url)
    r = requests.get(url)
    soup = BeautifulSoup(r.text, "html.parser")
    properties = []
    table_rows = soup.find_all("tr")
    for row in table_rows:
        cells = row.find_all("td")
        if cells:
            option_cell = cells[0]
            option_text = option_cell.find("code").get_text(strip=True)
            if option_text:
                properties.append((option_text, ""))

    return properties


urls = get_config_api_urls()
all_properties = []
for u in urls:
    all_properties.extend(extract_fields(u))

with open("../data/technology/kubernetes.properties", 'w', encoding='utf-8') as f:
    for option, default in set(all_properties):
        f.write(f"{option}={default}\n")

Parsing https://kubernetes.io/docs/reference/config-api/kubelet-config.v1alpha1/
Parsing https://kubernetes.io/docs/reference/config-api/
Parsing https://kubernetes.io/docs/reference/config-api/kubeadm-config.v1beta3/
Parsing https://kubernetes.io/zh-cn/docs/reference/config-api/
Parsing https://kubernetes.io/docs/reference/config-api/kubelet-config.v1/
Parsing https://kubernetes.io/docs/reference/config-api/kube-scheduler-config.v1/
Parsing https://kubernetes.io/docs/reference/config-api/kubeadm-config.v1beta4/
Parsing https://kubernetes.io/docs/reference/config-api/apiserver-config.v1/
Parsing https://kubernetes.io/docs/reference/config-api/apiserver-webhookadmission.v1/
Parsing https://v1-29.docs.kubernetes.io/docs/reference/config-api/
Parsing https://v1-31.docs.kubernetes.io/docs/reference/config-api/
Parsing https://kubernetes.io/docs/reference/config-api/imagepolicy.v1alpha1/
Parsing https://kubernetes.io/docs/reference/config-api/kuberc.v1alpha1/
Parsing https://v1-32.docs.kube

**Ansible**

In [28]:
import requests
from bs4 import BeautifulSoup
import re

URL = "https://docs.ansible.com/ansible/latest/reference_appendices/config.html#common-options"
properties = []

resp = requests.get(URL)
soup = BeautifulSoup(resp.text, "html.parser")


common_options = soup.find("section", {"id": "common-options"})
sections = common_options.find_all("section")
for section in sections:
    option_name = section.find("h3").get_text(strip=True).split("")[0]
    print("Option: ", option_name)
    default_dt = next((dt for dt in section.find_all("dt") if "Default" in dt.get_text()), None)
    #print("Default DT:", default_dt)
    if default_dt:
        dd = default_dt.find_next_sibling("dd")
        print("DD:", dd)
        if dd:
            default_value = dd.get_text(strip=True)
        else:
            default_value = ""

        print("Default Value:", default_value)


    properties.append((option_name, default_value))

with open("../data/technology/ansible.properties", "w") as f:
    for option, default in set(properties):
        f.write(f"{option}={default}\n")

DD: <dd class="field-odd"><p><code class="docutils literal notranslate"><span class="pre">True</span></code></p>
</dd>
Default Value: True
Option:  AGNOSTIC_BECOME_PROMPT
DD: <dd class="field-odd"><p><code class="docutils literal notranslate"><span class="pre">True</span></code></p>
</dd>
Default Value: True
Option:  ANSIBLE_CONNECTION_PATH
DD: <dd class="field-odd"><p><code class="docutils literal notranslate"><span class="pre">None</span></code></p>
</dd>
Default Value: None
Option:  ANSIBLE_COW_ACCEPTLIST
DD: <dd class="field-odd"><p><code class="docutils literal notranslate"><span class="pre">['bud-frogs',</span> <span class="pre">'bunny',</span> <span class="pre">'cheese',</span> <span class="pre">'daemon',</span> <span class="pre">'default',</span> <span class="pre">'dragon',</span> <span class="pre">'elephant-in-snake',</span> <span class="pre">'elephant',</span> <span class="pre">'eyes',</span> <span class="pre">'hellokitty',</span> <span class="pre">'kitty',</span> <span class

**Apache Webserver**

In [57]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import time

BASE = "https://httpd.apache.org/docs/current/mod/directives.html"

def get_directive_links():
    res = requests.get(BASE)
    res.raise_for_status()
    soup = BeautifulSoup(res.text, "html.parser")

    directive_links = []
    directive_list = soup.find("div", {"id": "directive-list"})
    for element in directive_list.find_all("a", href=True):
        href = element["href"]
        full_url = urljoin("https://httpd.apache.org/docs/current/mod/", href)
        directive_links.append(full_url)

    return sorted(set(directive_links))


def extract_default(url):
    res = requests.get(url)
    res.raise_for_status()
    soup = BeautifulSoup(res.text, "html.parser")
    properties = []

    # Get directive name from <h1>
    sections = soup.find_all("div", class_="directive-section")
    for section in sections:
        option_header= section.find("h2")
        option_name = option_header.find("a").get_text(strip=True)
        if option_name.startswith("<") and option_name.endswith(">"):
            option_name = option_name[1:-1].strip()

        default_value = ""
        table = section.find("table", class_="directive")
        for row in table.find_all("tr"):
            header = row.find("th")
            if "Default" in header.get_text():
                cell = row.find("td")
                if cell:
                    parts = cell.get_text(strip=True).split(" ")  # 
                    default_value = " ".join(parts[1:])  # Join parts to handle multi-word defaults

        properties.append((option_name, default_value))

    
    return properties


links = get_directive_links()
print(f"Found {len(links)} directive pages.")

all_properties = []
for url in links:
    print(url)
    all_properties.extend(extract_default(url))

with open("../data/technology/apache_webserver.properties", "w", encoding="utf-8") as f:
    for option, default in set(all_properties):
        f.write(f"{option}={default}\n")


Found 720 directive pages.
https://httpd.apache.org/docs/current/mod/core.html#acceptfilter
https://httpd.apache.org/docs/current/mod/core.html#acceptpathinfo
https://httpd.apache.org/docs/current/mod/core.html#accessfilename
https://httpd.apache.org/docs/current/mod/core.html#adddefaultcharset
https://httpd.apache.org/docs/current/mod/core.html#allowencodedslashes
https://httpd.apache.org/docs/current/mod/core.html#allowoverride
https://httpd.apache.org/docs/current/mod/core.html#allowoverridelist
https://httpd.apache.org/docs/current/mod/core.html#cgimapextension
https://httpd.apache.org/docs/current/mod/core.html#cgipassauth
https://httpd.apache.org/docs/current/mod/core.html#cgivar
https://httpd.apache.org/docs/current/mod/core.html#contentdigest
https://httpd.apache.org/docs/current/mod/core.html#defaultruntimedir
https://httpd.apache.org/docs/current/mod/core.html#defaulttype
https://httpd.apache.org/docs/current/mod/core.html#define
https://httpd.apache.org/docs/current/mod/core