## CARRIER

In [3]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import urllib3

# Disable SSL warnings since verify=False is used
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

BASE_URL = "https://www.carrier.com"
START_URL = "https://www.carrier.com/residential/en/us/products/heating-cooling/"
HEADERS = {"User-Agent": "Mozilla/5.0"}

# Step 1: Get product line names, image URLs, and detail page links
res = requests.get(START_URL, headers=HEADERS, verify=False)
soup = BeautifulSoup(res.content, "html.parser")

product_lines = []

for block in soup.select("div.col-12.col-md-6.col-xl-4"):
    a_tag = block.find("a", href=True)
    img_tag = block.find("img")
    name_tag = block.find("h3")

    if a_tag and img_tag and name_tag:
        line_url = BASE_URL + a_tag["href"].strip()
        line_name = name_tag.get_text(strip=True)
        line_img = img_tag.get("data-src") or img_tag.get("src")

        product_lines.append({
            "product_line_name": line_name,
            "product_line_url": line_url,
            "product_line_image": line_img
        })

# Step 2: Scrape models from each product line page
results = []

for line in product_lines:
    print(f"Scraping product line: {line['product_line_name']}")

    try:
        res = requests.get(line["product_line_url"], headers=HEADERS, verify=False)
        page = BeautifulSoup(res.content, "html.parser")

        model_blocks = page.find_all("div", {"itemtype": "http://schema.org/ListItem"})

        for block in model_blocks:
            meta_name = block.find("meta", itemprop="name")
            title_tag = block.select_one("div.card-title a")
            # Get image tag (more flexible)
            image_tag = block.select_one("img.card-img-top")

            model_image = image_tag["data-src"].strip() if image_tag and image_tag.has_attr("data-src") else (
                image_tag["src"].strip() if image_tag and image_tag.has_attr("src") else ""
            )

            if meta_name and title_tag:
                name_part = meta_name.get("content", "").strip()
                title_part = title_tag.get_text(strip=True)
                combined_model_name = f"{name_part} - {title_part}"

                # Get model page URL
                model_page_rel_url = title_tag["href"]
                model_page_url = BASE_URL + model_page_rel_url

                # Step 3: Get COOLING SEER2 value from individual model page
                try:
                    model_res = requests.get(model_page_url, headers=HEADERS, verify=False)
                    model_soup = BeautifulSoup(model_res.content, "html.parser")

                    seer_label = model_soup.find("td", string=lambda text: text and "COOLING SEER2" in text.upper())
                    if seer_label and seer_label.find_next_sibling("td"):
                        seer_value = seer_label.find_next_sibling("td").get_text(strip=True)
                    else:
                        seer_value = ""

                except Exception as e:
                    print(f"Error fetching SEER2 from {model_page_url}: {e}")
                    seer_value = ""

                # Save result
                results.append({
                    "product_line_name": line["product_line_name"],
                    "product_line_image": line["product_line_image"],
                    "model_name": combined_model_name,
                    "model_image": model_image,
                    "efficiency": seer_value
                })

        time.sleep(1)

    except Exception as e:
        print(f"Error scraping {line['product_line_name']}: {e}")

# Step 4: Save all data to CSV
df = pd.DataFrame(results)
df.to_csv("carrier_hvac_models.csv", index=False)
print(df.head())


Scraping product line: Air Conditioners
Scraping product line: Furnaces
Scraping product line: Heat Pumps
Scraping product line: Fan Coils
Scraping product line: Ductless Systems
Scraping product line: Boilers
Scraping product line: Crossover Solutions
Scraping product line: Evaporator Coils
Scraping product line: Geothermal Heat Pumps
Scraping product line: Packaged Products
Scraping product line: Carrier Opti-V
Scraping product line: Toshiba Carrier Opti-V
Scraping product line: VRF Controls
  product_line_name                                 product_line_image  \
0  Air Conditioners  https://images.carriercms.com/image/upload/w_a...   
1  Air Conditioners  https://images.carriercms.com/image/upload/w_a...   
2  Air Conditioners  https://images.carriercms.com/image/upload/w_a...   
3  Air Conditioners  https://images.carriercms.com/image/upload/w_a...   
4  Air Conditioners  https://images.carriercms.com/image/upload/w_a...   

                                          model_name  \


In [26]:
import pandas as pd

# Load the CSV
df = pd.read_csv("carrier_hvac_models.csv")

# Append ' SEER2' if there's a value
df["efficiency"] = df["efficiency"].apply(
    lambda x: f"{x} SEER2" if pd.notna(x) and str(x).strip() != "" else x
)

# Save updated CSV
df.to_csv("carrier_hvac_models.csv", index=False)

# Preview
print(df[["model_name", "efficiency"]].head())

                                          model_name        efficiency
0  26VNA1 - Infinity®Variable-Speed Central Air C...    Up to 21 SEER2
1      26TPA8 - Performance™ 2-Stage Air Conditioner    Up to 18 SEER2
2  26TPA8***C - Performance™ 2-Stage Coastal Air ...    Up to 18 SEER2
3  26SPA6 - Performance™ Air Conditioner with Int...  Up to 16.5 SEER2
4                  26SCA5 - Comfort™ Air Conditioner  Up to 16.5 SEER2


## LENNOX

In [4]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import urllib3
import urllib.parse

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

BASE_URL = "https://www.lennox.com"
START_URL = "https://www.lennox.com/residential/products/heating-cooling"
HEADERS = {"User-Agent": "Mozilla/5.0"}

# Step 1: Get all product lines
res = requests.get(START_URL, headers=HEADERS, verify=False)
soup = BeautifulSoup(res.content, "html.parser")

product_lines = []

for tile in soup.select("div.lnx-container.lnx-grid.products.lnx-grid-three-up a"):
    href = tile.get("href")
    if href and href.startswith("/residential/products/heating-cooling/"):
        product_line_name = href.strip().split("/")[-1]
        product_line_url = BASE_URL + href.strip()
        product_lines.append({
            "product_line_name": product_line_name,
            "product_line_url": product_line_url
        })

# Step 2: Scrape models from each product line
results = []

for line in product_lines:
    print(f"Scraping: {line['product_line_name']}")
    try:
        res = requests.get(line["product_line_url"], headers=HEADERS, verify=False)
        page = BeautifulSoup(res.content, "html.parser")
        models = page.select("div.lnx-product-tile")

        for m in models:
            name_tag = m.select_one("h2.lnx-product-title span")
            image_tag = m.select_one("div.lnx-product-image img")
            link_tag = m.select_one("h2.lnx-product-title a")

            if name_tag and image_tag and link_tag:
                model_name = name_tag.get_text(strip=True)
                model_image = urllib.parse.urljoin(BASE_URL, image_tag.get("src").strip())
                model_url = BASE_URL + link_tag.get("href").strip()

                # Step 3: Visit model page and extract SEER2
                try:
                    model_res = requests.get(model_url, headers=HEADERS, verify=False)
                    model_soup = BeautifulSoup(model_res.content, "html.parser")

                    rating_tag = model_soup.find("div", class_="lnx-efficiency-rating", itemtype="http://schema.org/Rating")
                    rating_value_tag = rating_tag.find("span", itemprop="ratingValue") if rating_tag else None
                    efficiency = rating_value_tag.get_text(strip=True) + " SEER2" if rating_value_tag else ""

                except Exception as e:
                    print(f"Error fetching SEER2 from {model_url}: {e}")
                    efficiency = ""

                results.append({
                    "product_line_name": line["product_line_name"],
                    "product_line_image": "",  # Not available
                    "model_name": model_name,
                    "model_image": model_image,
                    "efficiency": efficiency
                })

        time.sleep(1)

    except Exception as e:
        print(f"Error scraping {line['product_line_name']}: {e}")

# Step 4: Save results
df = pd.DataFrame(results)
df.to_csv("lennox_hvac_models.csv", index=False)
print(df.head())

Scraping: furnaces
Scraping: air-conditioners
Scraping: heat-pumps
Scraping: packaged-units
Scraping: air-handlers
Scraping: boilers
Scraping: garage-heaters
Scraping: mini-split-systems
  product_line_name product_line_image  \
0          furnaces                      
1          furnaces                      
2          furnaces                      
3          furnaces                      
4          furnaces                      

                                          model_name  \
0               SLP99V Variable-Capacity Gas Furnace   
1  SL297NV Variable-Speed, Ultra-Low Emissions Ga...   
2                  SL280V Variable-Speed Gas Furnace   
3  SL280NV Variable-Speed, Ultra-Low Emissions Ga...   
4                                 EL297V Gas Furnace   

                                         model_image efficiency  
0  https://www.lennox.com/dA/4e5724a728/slp99v-pr...   99 SEER2  
1  https://www.lennox.com/dA/e90670413a/sl297nv-p...   97 SEER2  
2  https://www.lennox.com

## Web Scraping template

In [6]:
import time
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

# ------------------------------
# 1. Initialize Selenium Driver
# ------------------------------
def init_driver():
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')  # run in background
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')
    return webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# ------------------------------
# 2. Extract Product Lines
# ------------------------------
def extract_product_lines(driver, start_url, product_line_selector, name_fn, url_fn, image_fn):
    driver.get(start_url)
    time.sleep(2)
    soup = BeautifulSoup(driver.page_source, "html.parser")

    product_lines = []
    for tag in soup.select(product_line_selector):
        try:
            name = name_fn(tag)
            url = url_fn(tag)
            image = image_fn(tag)
            if name and url:
                product_lines.append({
                    "product_line_name": name,
                    "product_line_url": url,
                    "product_line_image": image
                })
        except:
            continue

    return product_lines

# ------------------------------
# 3. Extract Models from Line
# ------------------------------
def extract_models_from_line(driver, product_line, model_selector, name_fn, image_fn, efficiency_fn=None):
    driver.get(product_line["product_line_url"])
    time.sleep(3)
    soup = BeautifulSoup(driver.page_source, "html.parser")

    models = []
    for block in soup.select(model_selector):
        try:
            name = name_fn(block)
            image = image_fn(block)
            efficiency = efficiency_fn(block) if efficiency_fn else ""
            if name:
                models.append({
                    "model_name": name,
                    "model_image": image,
                    "efficiency": efficiency
                })
        except:
            continue

    return models

# ------------------------------
# 4. Scrape Full Brand
# ------------------------------
def scrape_brand(driver, start_url, line_cfg, model_cfg):
    product_lines = extract_product_lines(
        driver=driver,
        start_url=start_url,
        product_line_selector=line_cfg["selector"],
        name_fn=line_cfg["name_fn"],
        url_fn=line_cfg["url_fn"],
        image_fn=line_cfg["image_fn"]
    )

    all_results = []

    for line in product_lines:
        print(f"Scraping: {line['product_line_name']}")
        try:
            models = extract_models_from_line(
                driver=driver,
                product_line=line,
                model_selector=model_cfg["selector"],
                name_fn=model_cfg["name_fn"],
                image_fn=model_cfg["image_fn"],
                efficiency_fn=model_cfg.get("efficiency_fn")
            )
            for model in models:
                all_results.append({
                    "product_line_name": line["product_line_name"],
                    "product_line_image": line["product_line_image"],
                    "model_name": model["model_name"],
                    "model_image": model["model_image"],
                    "efficiency": model.get("efficiency", "")
                })
        except Exception as e:
            print(f"Error scraping {line['product_line_name']}: {e}")

    return pd.DataFrame(all_results)

# ------------------------------
# 5. Run Scraper for a Brand
# ------------------------------
def run_brand_scraper(start_url, line_cfg, model_cfg, output_file):
    driver = init_driver()
    try:
        df = scrape_brand(driver, start_url, line_cfg, model_cfg)
        df.to_csv(output_file, index=False)
        print(f"Scraping complete. Saved to {output_file}")
    finally:
        driver.quit()


## Rheem

In [7]:
# Rheem-specific CSS selectors and logic
rheem_line_cfg = {
    "selector": "ul.explorelist a[href]",
    "name_fn": lambda tag: tag.get_text(strip=True),
    "url_fn": lambda tag: tag.get("href").strip(),
    "image_fn": lambda tag: tag.find("img")["src"].strip() if tag.find("img") else ""
}

rheem_model_cfg = {
    "selector": "div.productcard",
    "name_fn": lambda tag: tag.select_one("div.producttitle h3.product-name").get_text(strip=True),
    "image_fn": lambda tag: (
        tag.select_one("div.productimage img")["srcset"].split()[0]
        if tag.select_one("div.productimage img") and tag.select_one("div.productimage img").has_attr("srcset")
        else tag.select_one("div.productimage img")["src"].strip()
        if tag.select_one("div.productimage img") and tag.select_one("div.productimage img").has_attr("src")
        else ""
    ),
    "efficiency_fn": lambda tag: tag.select_one("div.product-desc ul li").get_text(strip=True) if tag.select_one("div.product-desc ul li") else ""
}

# Run for Rheem
run_brand_scraper(
    start_url="https://www.rheem.com/products/residential/heating-and-cooling/",
    line_cfg=rheem_line_cfg,
    model_cfg=rheem_model_cfg,
    output_file="rheem_hvac_models.csv"
)


Scraping: Furnaces
Scraping: Air Conditioners
Scraping: Cooling Coils
Scraping: Air Handlers
Scraping: Heat Pumps
Scraping: Mini-Splits
Scraping: Oil Furnaces
Scraping complete. Saved to rheem_hvac_models.csv


## american standard companies

In [8]:
import time
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import urllib.parse

def init_driver():
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')
    return webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

BASE_URL = "https://www.americanstandardair.com"

def extract_product_lines(driver):
    driver.get(BASE_URL + "/products/#heating-cooling")
    time.sleep(3)
    soup = BeautifulSoup(driver.page_source, "html.parser")

    product_lines = []
    blocks = soup.select("div.m-6.inline-block")

    for block in blocks:
        a_tag = block.find("a", href=True)
        if a_tag and a_tag["href"].startswith("/products/"):
            line_name = a_tag.get_text(strip=True)
            line_url = urllib.parse.urljoin(BASE_URL, a_tag["href"])
            product_lines.append({
                "product_line_name": line_name,
                "product_line_url": line_url,
                "product_line_image": ""  # not available at this level
            })

    return product_lines

def extract_models(driver, product_line):
    driver.get(product_line["product_line_url"])
    time.sleep(3)
    soup = BeautifulSoup(driver.page_source, "html.parser")

    models = []
    for h3 in soup.select("h3.mb-2.text-base"):
        a_tag = h3.find("a", href=True)
        if a_tag:
            model_name = a_tag.get_text(strip=True)
            model_url = urllib.parse.urljoin(BASE_URL, a_tag["href"])

            # Find image by matching <a> href
            model_image = ""
            img_container = soup.find("a", href=a_tag["href"])
            if img_container:
                img_tag = img_container.find("img")
                if img_tag and img_tag.get("src"):
                    model_image = urllib.parse.urljoin(BASE_URL, img_tag["src"])

            efficiency = extract_efficiency(driver, model_url)

            models.append({
                "model_name": model_name,
                "model_image": model_image,
                "efficiency": efficiency
            })

    return models

def extract_efficiency(driver, url):
    try:
        driver.get(url)
        time.sleep(2)
        soup = BeautifulSoup(driver.page_source, "html.parser")

        wrapper = soup.find("div", class_="mb-4 max-w-[40rem] lg:hidden")
        if not wrapper:
            return ""

        grids = wrapper.find_all("div", class_=lambda x: x and x.startswith("grid"))
        for grid in grids:
            label = grid.find("span", class_="ml-3")
            value_div = grid.find("div", class_="flex items-center rounded-r-[1rem]")
            if label and value_div and "SEER2" in label.get_text(strip=True).upper():
                return value_div.get_text(strip=True) + " SEER2"

        return ""
    except Exception as e:
        print(f"Error extracting SEER2 from {url}: {e}")
        return ""

def scrape_american_standard():
    driver = init_driver()
    try:
        product_lines = extract_product_lines(driver)
        all_results = []

        for line in product_lines:
            print(f"Scraping: {line['product_line_name']}")
            try:
                models = extract_models(driver, line)
                for model in models:
                    all_results.append({
                        "product_line_name": line["product_line_name"],
                        "product_line_image": line["product_line_url"],
                        "model_name": model["model_name"],
                        "model_image": model["model_image"],
                        "efficiency": model["efficiency"]
                    })
            except Exception as e:
                print(f"Error scraping models in {line['product_line_name']}: {e}")

        df = pd.DataFrame(all_results)
        df.to_csv("american_standard_hvac_models.csv", index=False)
        print("Scraping complete. Saved to american_standard_hvac_models.csv")
    finally:
        driver.quit()

# Run the scraper
scrape_american_standard()


Scraping: Air conditioners
Scraping: Furnace
Scraping: Heat pumps
Scraping: Air handlers
Scraping: Coils
Scraping: Gas & electric packaged
Scraping: Heat pump packaged
Scraping: Air conditioner packaged
Scraping: Hybrid Dual Fuel Systems
Scraping: Single-zone ductless
Scraping: Multi-zone ductless
Scraping: Ventilation
Scraping: Air purification
Scraping: Humidity control
Scraping: Smart thermostats
Scraping: Traditional thermostats
Scraping: Zoning
Scraping: Light Commercial Products
Scraping complete. Saved to american_standard_hvac_models.csv


## Goodman

In [46]:
import time
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import urllib.parse

BASE_URL = "https://www.goodmanmfg.com"

# Initialize Selenium driver
def init_driver():
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')
    return webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# Extract product lines
def extract_product_lines(driver):
    driver.get(BASE_URL + "/products")
    time.sleep(3)
    soup = BeautifulSoup(driver.page_source, "html.parser")

    lines = []
    for item in soup.select("div.gm-product-item"):
        a_tag = item.find("a", href=True)
        img_tag = item.find("img")
        name_tag = item.select_one("div.gm-category span")

        if a_tag and name_tag:
            line_name = name_tag.get_text(strip=True)
            line_url = urllib.parse.urljoin(BASE_URL, a_tag["href"])
            line_img = img_tag["src"].strip() if img_tag else ""
            lines.append({
                "product_line_name": line_name,
                "product_line_url": line_url,
                "product_line_image": line_img
            })

    return lines

# Extract models
def extract_models(driver, product_line):
    driver.get(product_line["product_line_url"])
    time.sleep(3)
    soup = BeautifulSoup(driver.page_source, "html.parser")

    models = []
    for block in soup.select("div.gm-product.primary-list"):
        a_tag = block.find("a", href=True, title=True)
        img_tag = block.find("img")

        if a_tag:
            model_name = a_tag.get_text(strip=True)
            model_url = urllib.parse.urljoin(BASE_URL, a_tag["href"])
            model_image = img_tag["src"].strip() if img_tag else ""
            efficiency = extract_efficiency(driver, model_url)

            models.append({
                "product_line_name": product_line["product_line_name"],
                "product_line_image": product_line["product_line_image"],
                "model_name": model_name,
                "model_image": model_image,
                "efficiency": efficiency
            })

    return models

# Extract efficiency
def extract_efficiency(driver, url):
    try:
        driver.get(url)
        time.sleep(2)
        soup = BeautifulSoup(driver.page_source, "html.parser")

        block = soup.find("div", class_="dimension-left")
        if block:
            for li in block.find_all("li"):
                spans = li.find_all("span")
                if len(spans) >= 2 and "EFFICIENCY" in spans[0].get_text(strip=True).upper():
                    return spans[1].get_text(strip=True)
        return ""
    except Exception as e:
        print(f"Error extracting SEER2 from {url}: {e}")
        return ""

# Main scraper
def scrape_goodman():
    driver = init_driver()
    try:
        product_lines = extract_product_lines(driver)
        all_results = []

        for line in product_lines:
            print(f"Scraping: {line['product_line_name']}")
            try:
                models = extract_models(driver, line)
                all_results.extend(models)
            except Exception as e:
                print(f"Error scraping models from {line['product_line_name']}: {e}")

        df = pd.DataFrame(all_results)

        # Enforce column order
        df = df[["product_line_name", "product_line_image", "model_name", "model_image", "efficiency"]]

        df.to_csv("goodman_hvac_models.csv", index=False)
        print("Scraping complete. Saved to goodman_hvac_models.csv")
    finally:
        driver.quit()

# Run it
scrape_goodman()


Scraping: Air Conditioners
Scraping: Heat Pumps
Scraping: Gas Furnaces
Scraping: Packaged Units
Scraping: Air Handlers and Coils
Scraping: Indoor Air Essentials
Scraping: Temperature Control
Scraping: Ductless Systems
Scraping complete. Saved to goodman_hvac_models.csv


## York

In [48]:
import time
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import urllib.parse

BASE_URL = "https://www.york.com"

# 1. Initialize driver
def init_driver():
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')
    return webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# 2. Extract product lines
def extract_product_lines(driver):
    driver.get(BASE_URL + "/residential-products#HeatingandCooling")
    time.sleep(4)
    soup = BeautifulSoup(driver.page_source, "html.parser")

    product_lines = []
    blocks = soup.select("div.productcategories")

    for block in blocks:
        a_tag = block.find("a", href=True)
        img_tag = block.find("img")
        h2_tag = block.find("h2")

        if a_tag and img_tag and h2_tag:
            name = h2_tag.get_text(strip=True)
            url = urllib.parse.urljoin(BASE_URL, a_tag["href"])
            image = urllib.parse.urljoin(BASE_URL, img_tag["src"])
            product_lines.append({
                "product_line_name": name,
                "product_line_url": url,
                "product_line_image": image
            })

    return product_lines

# 3. Extract models from product line
def extract_models(driver, product_line):
    driver.get(product_line["product_line_url"])
    time.sleep(3)
    soup = BeautifulSoup(driver.page_source, "html.parser")

    models = []
    blocks = soup.select("div.col-sm-6.col-md-4")

    for block in blocks:
        h2_tag = block.find("h2", class_="title")
        img_tag = block.find("img")

        if h2_tag and img_tag:
            model_name = h2_tag.get_text(strip=True)
            model_image = img_tag["src"]
            model_image = urllib.parse.urljoin(BASE_URL, model_image)

            models.append({
                "product_line_name": product_line["product_line_name"],
                "product_line_image": product_line["product_line_image"],
                "model_name": model_name,
                "model_image": model_image,
                "efficiency": model_name  # per your instruction
            })

    return models

# 4. Main scraper
def scrape_york():
    driver = init_driver()
    try:
        product_lines = extract_product_lines(driver)
        all_results = []

        for line in product_lines:
            print(f"Scraping: {line['product_line_name']}")
            try:
                models = extract_models(driver, line)
                all_results.extend(models)
            except Exception as e:
                print(f"Error scraping models for {line['product_line_name']}: {e}")

        df = pd.DataFrame(all_results)
        df = df[["product_line_name", "product_line_image", "model_name", "model_image", "efficiency"]]
        df.to_csv("york_hvac_models.csv", index=False)
        print("Scraping complete. Saved to york_hvac_models.csv")
    finally:
        driver.quit()

# Run it
scrape_york()


Scraping: Furnaces
Scraping: Heat Pumps
Scraping: Mini-Split Systems
Scraping: Packaged Units
Scraping: Air Conditioners
Scraping: Air Handlers
Scraping: Evaporator Coils
Scraping: Furnaces
Scraping: Heat Pumps
Scraping: Mini-Split Systems
Scraping: Packaged Units
Scraping: Air Conditioners
Scraping: Air Handlers
Scraping: Evaporator Coils
Scraping: Furnaces
Scraping: Heat Pumps
Scraping: Mini-Split Systems
Scraping: Packaged Units
Scraping: Ultraviolet Air Treatment System
Scraping: Whole-House Bypass Humidifier
Scraping: Whole-House Dehumidifier
Scraping: Whole-House Fan-Powered Humidifier
Scraping: Energy Recovery Ventilator
Scraping: Hybrid Electronic Air Cleaner
Scraping: Media Air Cleaners
Scraping: Steam Humidifier
Scraping: Ultraviolet Air Treatment System
Scraping: Whole-House Bypass Humidifier
Scraping: Whole-House Dehumidifier
Scraping: Whole-House Fan-Powered Humidifier
Scraping: Energy Recovery Ventilator
Scraping: Hybrid Electronic Air Cleaner
Scraping: Media Air Cleaners

## bryant

In [49]:
import time
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import urllib.parse

BASE_URL = "https://www.bryant.com"

# 1. Set up Selenium driver
def init_driver():
    options = webdriver.ChromeOptions()
    options.add_argument("--headless")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    return webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# 2. Extract product lines
def extract_product_lines(driver):
    driver.get(BASE_URL + "/en/us/products/")
    time.sleep(3)
    soup = BeautifulSoup(driver.page_source, "html.parser")

    product_lines = []
    items = soup.select("ul.ct-menusub-basic-header li.nav-item.link-text-indent")

    for li in items:
        a_tag = li.find("a", href=True)
        if a_tag:
            name = a_tag.get_text(strip=True)
            url = urllib.parse.urljoin(BASE_URL, a_tag["href"])
            product_lines.append({
                "product_line_name": name,
                "product_line_url": url,
                "product_line_image": ""  # no image at product line level
            })

    return product_lines

# 3. Extract models
def extract_models(driver, product_line):
    driver.get(product_line["product_line_url"])
    time.sleep(3)
    soup = BeautifulSoup(driver.page_source, "html.parser")

    models = []
    blocks = soup.select("div.col-lg-4.col-md-6.pb-4.item-start")

    for block in blocks:
        # Model name
        name_tag = block.select_one("div.card-title a")
        model_name = name_tag.get_text(strip=True) if name_tag else ""

        # Model image
        img_tag = block.select_one("a img")
        model_image = img_tag["data-src"] if img_tag and img_tag.has_attr("data-src") else (
            img_tag["src"] if img_tag and img_tag.has_attr("src") else ""
        )
        model_image = urllib.parse.urljoin(BASE_URL, model_image)

        # SEER2 efficiency
        eff_tag = block.select_one("span.COOLING.EFFICIENCY")
        efficiency = eff_tag.get_text(strip=True) if eff_tag else ""

        if model_name:
            models.append({
                "product_line_name": product_line["product_line_name"],
                "product_line_image": product_line["product_line_image"],
                "model_name": model_name,
                "model_image": model_image,
                "efficiency": efficiency
            })

    return models

# 4. Main scraper
def scrape_bryant():
    driver = init_driver()
    try:
        product_lines = extract_product_lines(driver)
        all_results = []

        for line in product_lines:
            print(f"Scraping: {line['product_line_name']}")
            try:
                models = extract_models(driver, line)
                all_results.extend(models)
            except Exception as e:
                print(f"Error scraping models from {line['product_line_name']}: {e}")

        df = pd.DataFrame(all_results)
        df = df[["product_line_name", "product_line_image", "model_name", "model_image", "efficiency"]]
        df.to_csv("bryant_hvac_models.csv", index=False)
        print("Scraping complete. Saved to bryant_hvac_models.csv")
    finally:
        driver.quit()

# Run it
scrape_bryant()


Scraping: Air Conditioners
Scraping: Boilers
Scraping: Crossover Solutions
Scraping: Ductless Systems
Scraping: Evaporator Coils
Scraping: Fan Coils
Scraping: Gas Furnaces
Scraping: Geothermal Heat Pumps
Scraping: Heat Pumps
Scraping: Oil Furnaces
Scraping: Packaged Products
Scraping: Evolution™ Connex™
Scraping: ecobee Smart Thermostats
Scraping: Bryant Smart Thermostat
Scraping: Air Purifiers
Scraping: CO Alarms
Scraping: Dehumidifiers
Scraping: Humidifiers
Scraping: UV Lamps
Scraping: Ventilators
Scraping: Outdoor Units
Scraping: Indoor Units
Scraping: Controls & Accessories
Scraping: Rooftop
Scraping: Split Systems
Scraping: About Our Dealers
Scraping: Federal Tax Credits
Scraping: Financing
Scraping: Minimum Efficiency Standards
Scraping: Rebates
Scraping: Repair or Replace?
Scraping: System Types
Scraping: Evolution™ System
Scraping: Ductless Crossover
Scraping: Ductless Systems
Scraping: Geothermal Systems
Scraping: Heat Pumps
Scraping: Indoor Air Quality
Scraping: InteliSense T

## Amana

In [50]:
import time
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import urllib.parse

BASE_URL = "https://www.amana-hac.com"

# 1. Initialize driver
def init_driver():
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')
    return webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# 2. Extract product lines
def extract_product_lines(driver):
    driver.get(BASE_URL + "/products")
    time.sleep(3)
    soup = BeautifulSoup(driver.page_source, "html.parser")

    lines = []
    for block in soup.select("div.product"):
        a_tag = block.find("a", href=True)
        h3_tag = block.find("h3")
        img_tag = block.select_one("div.product-img img")

        if a_tag and h3_tag and img_tag:
            name = h3_tag.get_text(strip=True)
            url = urllib.parse.urljoin(BASE_URL, a_tag["href"])
            img = urllib.parse.urljoin(BASE_URL, img_tag["src"])
            lines.append({
                "product_line_name": name,
                "product_line_url": url,
                "product_line_image": img
            })

    return lines

# 3. Extract models from product line
def extract_models(driver, product_line):
    driver.get(product_line["product_line_url"])
    time.sleep(3)
    soup = BeautifulSoup(driver.page_source, "html.parser")

    models = []
    for block in soup.select("div.pitem"):
        name_tag = block.select_one("div.min-height-box h3")
        img_tag = block.select_one("div.product-img img")
        desc = block.select_one("div.product-desc")

        # Get model name
        model_name = name_tag.get_text(strip=True) if name_tag else ""

        # Get image
        model_image = img_tag["src"] if img_tag and img_tag.has_attr("src") else ""
        model_image = urllib.parse.urljoin(BASE_URL, model_image)

        # Get SEER2 efficiency
        efficiency = ""
        if desc:
            for li in desc.select("li"):
                text = li.get_text(strip=True)
                if "SEER2" in text:
                    efficiency = text
                    break

        if model_name:
            models.append({
                "product_line_name": product_line["product_line_name"],
                "product_line_image": product_line["product_line_image"],
                "model_name": model_name,
                "model_image": model_image,
                "efficiency": efficiency
            })

    return models

# 4. Main scraper
def scrape_amana():
    driver = init_driver()
    try:
        product_lines = extract_product_lines(driver)
        all_results = []

        for line in product_lines:
            print(f"Scraping: {line['product_line_name']}")
            try:
                models = extract_models(driver, line)
                all_results.extend(models)
            except Exception as e:
                print(f"Error scraping models from {line['product_line_name']}: {e}")

        df = pd.DataFrame(all_results)
        df = df[["product_line_name", "product_line_image", "model_name", "model_image", "efficiency"]]
        df.to_csv("amana_hvac_models.csv", index=False)
        print("Scraping complete. Saved to amana_hvac_models.csv")
    finally:
        driver.quit()

# Run it
scrape_amana()


Scraping: Amana Cloud Services
Scraping: Air Conditioners
Scraping: Gas Furnaces
Scraping: Heat Pumps
Scraping: Air Handlers and Coils
Scraping: Temperature Controls
Scraping: Packaged Units
Scraping: Indoor Air Essentials
Scraping: Ductless Systems
Scraping complete. Saved to amana_hvac_models.csv


## TRANE

In [51]:
import time
import re
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import urllib.parse

BASE_URL = "https://www.trane.com"

# 1. Set up Selenium
def init_driver():
    options = webdriver.ChromeOptions()
    options.add_argument("--headless")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    return webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# 2. Extract product lines
def extract_product_lines(driver):
    driver.get(BASE_URL + "/residential/en/products/")
    time.sleep(3)
    soup = BeautifulSoup(driver.page_source, "html.parser")

    lines = []
    for block in soup.select("div.border.hover\\:shadow-md.p-4.flex.flex-col"):
        name_tag = block.select_one("div.min-h-\\[140px\\] a")
        img_tag = block.select_one("a img")

        if name_tag and img_tag:
            name = name_tag.get_text(strip=True)
            url = urllib.parse.urljoin(BASE_URL, name_tag["href"])
            image = urllib.parse.urljoin(BASE_URL, img_tag["src"])
            lines.append({
                "product_line_name": name,
                "product_line_url": url,
                "product_line_image": image
            })

    return lines

# 3. Extract models from product line
def extract_models(driver, product_line):
    driver.get(product_line["product_line_url"])
    time.sleep(3)
    soup = BeautifulSoup(driver.page_source, "html.parser")

    models = []
    for card in soup.select("div.product-card.border.flex.flex-col"):
        model_tag = card.select_one("h3.text-base a")
        img_tag = card.select_one("a img")

        if model_tag and img_tag:
            model_name = model_tag.get_text(strip=True)
            model_url = urllib.parse.urljoin(BASE_URL, model_tag["href"])
            model_image = urllib.parse.urljoin(BASE_URL, img_tag["src"])
            efficiency = extract_efficiency(driver, model_url)

            models.append({
                "product_line_name": product_line["product_line_name"],
                "product_line_image": product_line["product_line_image"],
                "model_name": model_name,
                "model_image": model_image,
                "efficiency": efficiency
            })

    return models

# 4. Extract SEER2 efficiency from model detail page
def extract_efficiency(driver, url):
    try:
        driver.get(url)
        time.sleep(2)
        soup = BeautifulSoup(driver.page_source, "html.parser")
        para = soup.find("p", class_="text-white font-medium text-22")
        if para:
            text = para.get_text(strip=True)
            match = re.search(r"SEER2\s+of\s+up\s+to\s+\d+(\.\d+)?", text, re.IGNORECASE)
            if match:
                return match.group(0)
        return ""
    except Exception as e:
        print(f"Error extracting SEER2 from {url}: {e}")
        return ""

# 5. Run the scraper
def scrape_trane():
    driver = init_driver()
    try:
        product_lines = extract_product_lines(driver)
        all_results = []

        for line in product_lines:
            print(f"Scraping: {line['product_line_name']}")
            try:
                models = extract_models(driver, line)
                all_results.extend(models)
            except Exception as e:
                print(f"Error scraping models from {line['product_line_name']}: {e}")

        df = pd.DataFrame(all_results)
        df = df[["product_line_name", "product_line_image", "model_name", "model_image", "efficiency"]]
        df.to_csv("trane_hvac_models.csv", index=False)
        print("Scraping complete. Saved to trane_hvac_models.csv")
    finally:
        driver.quit()

# Run it
scrape_trane()


Scraping: Air conditioners
Scraping: Furnaces
Scraping: Heat pumps
Scraping: Air handlers
Scraping: Smart thermostats
Scraping: Traditional thermostats
Scraping: Zoning
Scraping: Gas & electric packaged
Scraping: Heat pump packaged
Scraping: Earthwise hybrid dual fuel packaged
Scraping: Air conditioner packaged
Scraping: Single-zone ductless
Scraping: Multi-zone ductless
Scraping: Filters
Scraping: Air purification
Scraping: Humidity control
Scraping: Ventilation
Scraping: Trane Home App
Scraping: Coils
Scraping complete. Saved to trane_hvac_models.csv


## Daikin

In [52]:
import time
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import urllib.parse

BASE_URL = "https://daikincomfort.com"

def init_driver():
    options = webdriver.ChromeOptions()
    options.add_argument("--headless")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    return webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# Step 1: Extract product lines
def extract_product_lines(driver):
    driver.get(BASE_URL + "/products/heating-cooling/whole-home-systems")
    time.sleep(3)
    soup = BeautifulSoup(driver.page_source, "html.parser")

    lines = []
    blocks = soup.select("div.row.hoverTab__preview__item")

    for block in blocks:
        h6_tag = block.select_one("h6")
        img_tag = block.select_one("picture img")
        a_tag = block.select_one("a.Link[href]")

        if h6_tag and img_tag and a_tag:
            name = h6_tag.get_text(strip=True)
            image = urllib.parse.urljoin(BASE_URL, img_tag["src"])
            url = urllib.parse.urljoin(BASE_URL, a_tag["href"])

            lines.append({
                "product_line_name": name,
                "product_line_image": image,
                "product_line_url": url
            })

    return lines

# Step 2: Extract models
def extract_models(driver, product_line):
    driver.get(product_line["product_line_url"])
    time.sleep(3)
    soup = BeautifulSoup(driver.page_source, "html.parser")

    models = []
    for card in soup.select("div.productCard"):
        name_tag = card.select_one("h6.productCard__title")
        img_tag = card.select_one("a.prodImageLink img")
        eff_block = card.select_one("div.productCard__content__info__each")

        model_name = name_tag.get_text(strip=True) if name_tag else ""

        model_image = ""
        if img_tag and img_tag.has_attr("src"):
            model_image = urllib.parse.urljoin(BASE_URL, img_tag["src"])

        efficiency = ""
        if eff_block:
            label = eff_block.find("h6")
            value = eff_block.find("p")
            if label and value and "SEER2" in label.get_text(strip=True).upper():
                efficiency = value.get_text(strip=True) + " " + label.get_text(strip=True)

        if model_name:
            models.append({
                "product_line_name": product_line["product_line_name"],
                "product_line_image": product_line["product_line_image"],
                "model_name": model_name,
                "model_image": model_image,
                "efficiency": efficiency
            })

    return models

# Step 3: Run full scraper
def scrape_daikin():
    driver = init_driver()
    try:
        product_lines = extract_product_lines(driver)
        all_results = []

        for line in product_lines:
            print(f"Scraping: {line['product_line_name']}")
            try:
                models = extract_models(driver, line)
                all_results.extend(models)
            except Exception as e:
                print(f"Error scraping models from {line['product_line_name']}: {e}")

        df = pd.DataFrame(all_results)
        df = df[["product_line_name", "product_line_image", "model_name", "model_image", "efficiency"]]
        df.to_csv("daikin_hvac_models.csv", index=False)
        print("Scraping complete. Saved to daikin_hvac_models.csv")
    finally:
        driver.quit()

# Run it
scrape_daikin()


Scraping: Whole Home Heat Pumps
Scraping: Whole Home Air Conditioners
Scraping: Air Handlers and Coils
Scraping: Gas Furnaces
Scraping: Packaged Products
Scraping complete. Saved to daikin_hvac_models.csv
