In [None]:
%pip install pandas numpy requests beautifulsoup4

In [2]:
import pandas as pd
import requests
from urllib import response
from bs4 import BeautifulSoup

In [34]:
df =pd.read_csv('data/listing_address.csv')
df.head()

Unnamed: 0,Sale Date,Address,City,Zip
0,11/12/2025,2811 VILLAGE BLVD 404,WEST PALM BEACH,33409
1,11/12/2025,915 SW 3RD ST,BOYNTON BEACH,33435
2,11/12/2025,5619 KINGSMILL CT,LAKE WORTH,33463
3,11/12/2025,22171 SW 62ND AVE,BOCA RATON,33428
4,11/12/2025,1026 10TH LN,GREENACRES,33463


In [35]:
#cleaning column names (removing leading/trailing spaces)
df.columns = df.columns.str.strip()

In [37]:
# creating full address column (not including zip code as for some properties there is a range of zip codes)
df["full_address"] = (df["Address"].str.strip()+ ", " + df["City"].str.strip() + " " + df["Zip"].astype(str))
#df["full_address"] = (df["Address"].str.strip()+ ", " + df["City"].str.strip())
df["full_address"].head()


0    2811 VILLAGE BLVD 404, WEST PALM BEACH 33409
1              915 SW 3RD ST, BOYNTON BEACH 33435
2             5619 KINGSMILL CT, LAKE WORTH 33463
3             22171 SW 62ND AVE, BOCA RATON 33428
4                  1026 10TH LN, GREENACRES 33463
Name: full_address, dtype: object

In [38]:
#helper function to check for empty addresses

def is_empty_address(address):
    if address is None:
        return True
    if isinstance(address, float):  # NaN case
        return True
    if address.strip() == "":
        return True
    return False


In [39]:
#api call payload builder needs address to look for in the searchText field

def build_payload(search_address):
    return {
        "inputName": "addresssearch",
        "searchLimit": "20",
        "uID": "89540f28-8b9a-4aed-b609-72529f86a3ca",
        "version": 2,
        "removeZip": True,
        "papaVersion": True,
        "removeChar": "_",
        "removeSpace": True,
        "papaVariance": False,
        "searchText": search_address
        }
    

In [None]:
#calling api and returning json response
def api_call(address):
    url = "https://maps.pbc.gov/giswebapi/anysearch"
    response = requests.post(url, json=build_payload(address))
    if response.status_code == 200:
        data = response.json()
        return data
    else:
        print(f"Error: {response.status_code}")
        return None

In [41]:
#helper function to normalize text : convert everything in to upercase and remove extra spaces and adds only 1 space between words
def normalize(text):
    return " ".join(text.upper().split())

In [42]:
#finds the pcn number from the api response data for the given address
def get_pcn_number(address, data):
    address_norm = normalize(address)
    for item in data:
         search_term = item.get("searchTerm", "")
         if normalize(search_term) == address_norm:
            print("get_pcn_number :" + str(item.get("PCN")))
            return item.get("PCN")

    return None

In [43]:
# calling api and getting pcn number for the given address
def get_pcn(address):
    if is_empty_address(address):
        return None
    
    data = api_call(address)
    if not data:
        return None

    return get_pcn_number(address, data)

In [None]:
#saving all the pcn results in a list
pcn_results = []
for idx, row in df.iterrows():
    address = row["full_address"]
    print(f"Processing row {idx}: {address}")
    pcn = get_pcn(address)
    pcn_results.append(pcn)
    

In [14]:
#creating a new column in dataframe to save pcn results
df["PCN"] = pcn_results
df["PCN"].head()

0    None
1    None
2    None
3    None
4    None
Name: PCN, dtype: object

In [15]:
# function to get property details using pcn number
def get_property_details(pcn):
    if pcn is None or str(pcn).strip() == "":
        return None
    
    url = "https://pbcpao.gov/Property/MapDetails"
    params ={"parcelId": pcn}
    
    response = requests.get(url, params=params)
    if response.status_code == 200:
        return response.text
    else:
        print(f"Error fetching details for PCN {pcn}: {response.status_code}")
        return None

In [16]:
#getting owners from the property details page
def get_owners(soup):
    owners_section = soup.find("div", class_="map-owners")
    if not owners_section:
        return None

    owners = [
        td.get_text(" ", strip=True)
        for td in owners_section.find_all("td")
        if td.get_text(strip=True)
    ]

    return "; ".join(owners) if owners else None


In [17]:
#getting mailing address from the property details page
def get_mailing_address(soup):
    for row in soup.find_all("tr"):
        label_cell = row.find("td", class_="label")
        if not label_cell:
            continue

        if label_cell.get_text(strip=True) == "Mailing Address":
            value_cell = row.find("td", class_="value")
            if not value_cell:
                return None

            lines = [
                label.get_text(strip=True)
                for label in value_cell.find_all("label")
                if label.get_text(strip=True)
            ]

            return ", ".join(lines) if lines else None

    return None


In [18]:
def get_location(soup):
    for row in soup.find_all("tr"):
        label_cell = row.find("td", class_="label")
        if not label_cell:
            continue

        if label_cell.get_text(strip=True) == "Location":
            value_cell = row.find("td", class_="value")
            if not value_cell:
                return None

            location_label = value_cell.find("label", id="lblLocation")
            if not location_label:
                return None

            return location_label.get_text(strip=True)

    return None

In [19]:
#saving the parsed property details in a dictionary
def parse_property_html(html):
    soup = BeautifulSoup(html, "html.parser")

    return {
        "Owner_Name": get_owners(soup),
        "Mailing_Address": get_mailing_address(soup),
        "Location": get_location(soup)
    }



In [20]:
#creating new columns for Owner_Name and Mailing_Address
df["Owner_Name"] = None
df["Mailing_Address"] = None
df["Location"] = None


for idx, pcn in df["PCN"].items():
    if pd.isna(pcn) or not str(pcn).strip():
        print(f"Skipping row {idx} due to missing PCN")
        continue

    try:
        html = get_property_details(pcn)
        if not html:
            print(f"No HTML returned for PCN {pcn}")
            continue

        parsed = parse_property_html(html)

        df.at[idx, "Owner_Name"] = parsed.get("Owner_Name")
        df.at[idx, "Mailing_Address"] = parsed.get("Mailing_Address")
        df.at[idx, "Location"] = parsed.get("Location")

        print(f"Processed PCN {pcn}")

    except Exception as e:
        print(f"Failed PCN {pcn}: {e}")


Skipping row 0 due to missing PCN
Skipping row 1 due to missing PCN
Skipping row 2 due to missing PCN
Skipping row 3 due to missing PCN
Skipping row 4 due to missing PCN
Skipping row 5 due to missing PCN
Skipping row 6 due to missing PCN
Skipping row 7 due to missing PCN
Skipping row 8 due to missing PCN
Skipping row 9 due to missing PCN
Skipping row 10 due to missing PCN
Skipping row 11 due to missing PCN
Skipping row 12 due to missing PCN
Skipping row 13 due to missing PCN
Skipping row 14 due to missing PCN
Skipping row 15 due to missing PCN
Skipping row 16 due to missing PCN
Skipping row 17 due to missing PCN
Skipping row 18 due to missing PCN
Skipping row 19 due to missing PCN
Skipping row 20 due to missing PCN
Skipping row 21 due to missing PCN
Skipping row 22 due to missing PCN
Skipping row 23 due to missing PCN
Skipping row 24 due to missing PCN
Skipping row 25 due to missing PCN
Skipping row 26 due to missing PCN
Skipping row 27 due to missing PCN
Skipping row 28 due to missing

In [None]:
#saving the final dataframe to a new csv file
df.to_csv("data/listing_address_final.csv", index=False)
