##### Reading property address instead of listing address.
###### Need clean the address column


In [None]:
%pip install pandas numpy requests beautifulsoup4 re

In [7]:
import pandas as pd
import requests
from urllib import response
from bs4 import BeautifulSoup
import re

In [3]:
df =pd.read_csv('data/property_address.csv')
df.head()

Unnamed: 0,First_Name_1,Last_Name_1,First_Name_2,Last_Name_2,companyName,Property_Street_Address,Property_Street_Address_2,Property_City,Property_State,Property_Zip_Code,...,lastSaleInfo.saleDate,lastSaleInfo.ownershipDuration,lastSaleInfo.saleAmount,mls.status,mls.price,mls.listingType,agentInfo.name,agentInfo.phone,agentInfo.email,InLead
0,,,,,,412 Sw Seventh Ct,,Boynton Beach,FL,33435,...,,,,,,,,,,No
1,,,,,,805 Sw Third St,,Boynton Beach,FL,33435,...,,,,,,,,,,No
2,,,,,,801 Sw Second St,,Boynton Beach,FL,33435,...,,,,,,,,,,No
3,,,,,,720 Sw Second St,,Boynton Beach,FL,33435,...,,,,,,,,,,No
4,,,,,,223 Sw Seventh Ave,,Boynton Beach,FL,33435,...,,,,,,,,,,No


In [4]:
#cleaning column names (removing leading/trailing spaces)
df.columns = df.columns.str.strip()

In [None]:
df["Property_Street_Address"]

In [8]:
# Function to convert numbers to words (1-100)
def number_to_words(n):
    ones = [
        "", "First", "Second", "Third", "Fourth", "Fifth",
        "Sixth", "Seventh", "Eighth", "Ninth"
    ]

    teens = [
        "Tenth", "Eleventh", "Twelfth", "Thirteenth", "Fourteenth",
        "Fifteenth", "Sixteenth", "Seventeenth", "Eighteenth", "Nineteenth"
    ]

    tens = [
        "", "", "Twenty", "Thirty", "Forty", "Fifty",
        "Sixty", "Seventy", "Eighty", "Ninety"
    ]

    if n < 10:
        return ones[n]

    elif 10 <= n < 20:
        return teens[n - 10]

    elif n < 100:
        ten, one = divmod(n, 10)
        return tens[ten] + ("" if one == 0 else f" {ones[one]}")

    elif n == 100:
        return "One Hundredth"


In [9]:
def number_to_ordinal(n):
    if 11 <= (n % 100) <= 13:
        suffix = "th"
    else:
        suffix = {1: "st", 2: "nd", 3: "rd"}.get(n % 10, "th")

    return f"{n}{suffix}"


In [10]:
ordinal_map = {
    number_to_words(i): number_to_ordinal(i)
    for i in range(1, 101)
}


In [11]:
pattern = re.compile(
    r"\b(" + "|".join(re.escape(k) for k in ordinal_map.keys()) + r")\b",
    re.IGNORECASE
)

In [12]:
def replace_ordinals(address):
    return pattern.sub(
        lambda m: ordinal_map[m.group(0).title()],
        address
    )


In [13]:
df["Clean_Address"] = df["Property_Street_Address"].astype(str).str.replace(
    pattern,
    lambda m: ordinal_map[m.group(0).title()],
    regex=True
)


In [36]:
df["full_address"] = (df["Clean_Address"].str.strip()+ ", " + df["Property_City"].str.strip() + " " + df["Property_Zip_Code"].astype(str))
#df["full_address"] = (df["Clean_Address"].str.strip()+ ", " + df["Property_City"].str.strip())
df["full_address"].head()


0     412 Sw 7th Ct, Boynton Beach 33435
1     805 Sw 3rd St, Boynton Beach 33435
2     801 Sw 2nd St, Boynton Beach 33435
3     720 Sw 2nd St, Boynton Beach 33435
4    223 Sw 7th Ave, Boynton Beach 33435
Name: full_address, dtype: object

In [43]:
#helper function to check for empty addresses

def is_empty_address(address):
    if address is None:
        return True
    if isinstance(address, float):  # NaN case
        return True
    if address.strip() == "":
        return True
    return False


In [44]:
#api call payload builder needs address to look for in the searchText field

def build_payload(search_address):
    return {
        "inputName": "addresssearch",
        "searchLimit": "20",
        "uID": "89540f28-8b9a-4aed-b609-72529f86a3ca",
        "version": 2,
        "removeZip": True,
        "papaVersion": True,
        "removeChar": "_",
        "removeSpace": True,
        "papaVariance": False,
        "searchText": search_address
        }
    

In [45]:
#calling api and returning json response
def api_call(address):
    url = "https://maps.pbc.gov/giswebapi/anysearch"
    response = requests.post(url, json=build_payload(address))
    if response.status_code == 200:
        data = response.json()
        return data
    else:
        print(f"Error: {response.status_code}")
        return None

In [46]:
#helper function to normalize text : convert everything in to upercase and remove extra spaces and adds only 1 space between words
def normalize(text):
    return " ".join(text.upper().split())

In [47]:
#finds the pcn number from the api response data for the given address
def get_pcn_number(address, data):
    address_norm = normalize(address)
    for item in data:
         search_term = item.get("searchTerm", "")
         if normalize(search_term) == address_norm:
            print("get_pcn_number :" + str(item.get("PCN")))
            return item.get("PCN")

    return None

In [48]:
# calling api and getting pcn number for the given address
def get_pcn(address):
    if is_empty_address(address):
        return None
    
    data = api_call(address)
    if not data:
        return None

    return get_pcn_number(address, data)

In [49]:
#saving all the pcn results in a list
pcn_results = []

for idx, row in df.iterrows():
    address = row["full_address"]
    print(f"Processing row {idx}: {address}")
    pcn = get_pcn(address)
    pcn_results.append(pcn)
    

Processing row 0: 412 Sw 7th Ct, Boynton Beach 33435
get_pcn_number :08434528190000211
Processing row 1: 805 Sw 3rd St, Boynton Beach 33435
Processing row 2: 801 Sw 2nd St, Boynton Beach 33435
get_pcn_number :08434528220090010
Processing row 3: 720 Sw 2nd St, Boynton Beach 33435
get_pcn_number :08434528220010250
Processing row 4: 223 Sw 7th Ave, Boynton Beach 33435
get_pcn_number :08434528220010361
Processing row 5: 219 Sw 7th Ave, Boynton Beach 33435
get_pcn_number :08434528220010381
Processing row 6: 904 S Seacrest Blvd, Boynton Beach 33435
get_pcn_number :08434528120200110
Processing row 7: 118 Sw 8th Ave, Boynton Beach 33435
get_pcn_number :08434528120210041
Processing row 8: 127 Sw 8th Ave, Boynton Beach 33435
get_pcn_number :08434528120160170
Processing row 9: 145 Sw 8th Ave, Boynton Beach 33435
get_pcn_number :08434528120160130
Processing row 10: 139 Sw 8th Ave, Boynton Beach 33435
get_pcn_number :08434528120160150
Processing row 11: 423 Sw 9th Ave, Boynton Beach 33435
get_pcn_n

In [51]:
#creating a new column in dataframe to save pcn results
df["PCN"] = pcn_results
df["PCN"].head()

0    08434528190000211
1                 None
2    08434528220090010
3    08434528220010250
4    08434528220010361
Name: PCN, dtype: object

In [52]:
# function to get property details using pcn number
def get_property_details(pcn):
    if pcn is None or str(pcn).strip() == "":
        return None
    
    url = "https://pbcpao.gov/Property/MapDetails"
    params ={"parcelId": pcn}
    
    response = requests.get(url, params=params)
    if response.status_code == 200:
        return response.text
    else:
        print(f"Error fetching details for PCN {pcn}: {response.status_code}")
        return None

In [53]:
#getting owners from the property details page
def get_owners(soup):
    owners_section = soup.find("div", class_="map-owners")
    if not owners_section:
        return None

    owners = [
        td.get_text(" ", strip=True)
        for td in owners_section.find_all("td")
        if td.get_text(strip=True)
    ]

    return "; ".join(owners) if owners else None


In [57]:
#getting mailing address from the property details page
def get_mailing_address(soup):
    for row in soup.find_all("tr"):
        label_cell = row.find("td", class_="label")
        if not label_cell:
            continue

        if label_cell.get_text(strip=True) == "Mailing Address":
            value_cell = row.find("td", class_="value")
            if not value_cell:
                return None

            lines = [
                label.get_text(strip=True)
                for label in value_cell.find_all("label")
                if label.get_text(strip=True)
            ]

            return ", ".join(lines) if lines else None

    return None


In [58]:
#saving the parsed property details in a dictionary
def parse_property_html(html):
    soup = BeautifulSoup(html, "html.parser")

    return {
        "Owner_Name": get_owners(soup),
        "Mailing_Address": get_mailing_address(soup)
    }


In [None]:
#creating new columns for Owner_Name and Mailing_Address
df["Owner_Name"] = None
df["Mailing_Address"] = None

for idx, pcn in df["PCN"].items():
    if pd.isna(pcn) or not str(pcn).strip():
        print(f"Skipping row {idx} due to missing PCN")
        continue
    
    try:
        html= get_property_details(pcn)
        parsed = parse_property_html(html)
        df.at[idx, "Owner_Name"] = parsed.get("Owner_Name")
        df.at[idx, "Mailing_Address"] = parsed.get("Mailing_Address")
        print(f"Processed PCN {pcn}")
        
    except Exception as e:
        print(f"Failed PCN {pcn}: {e}")

In [60]:
df.to_csv("data/property_address_with_owner_and_mailing.csv", index=False)