In [140]:
import math
import re

import geopandas as gpd
import numpy as np
import pandas as pd
import rtree
from scipy.sparse import csr_matrix
from scipy.sparse.csgraph import connected_components
from tqdm import tqdm


In [141]:
# Read in both CSV files
file1_path = "data/Parcel_Data_2021_Table_-2691831558175163259.csv"

# Read the CSV files
df1 = pd.read_csv(file1_path)

# Display basic info about the dataframes
print(f"Dataset 1 shape: {df1.shape}")


  df1 = pd.read_csv(file1_path)


Dataset 1 shape: (2424770, 51)


In [142]:
df1[df1["Property Location"].str.startswith("1017 HYPERION AVE", na=False)]


Unnamed: 0,Zip Code,City Tax Rate Area,AIN,Roll Year,Tax Rate Area Code,Assessor ID,Property Location,Property Use Type,Property Use Code,Use Code 1st Digit,Use Code 2nd Digit,Use Code 3rd Digit,Use Code 4th Digit,Number of Buildings,Year Built,Effective Year,Square Footage,Number of Bedrooms,Number of Bathrooms,Number of Units,Recording Date,Land Value,Land Base Year,Improvement Value,Improvement Base Year,"Total Value, Land + Improvement",Home Owners Exemption,Real Estate Exemption,Fixture Value,Fixture Exemption,Personal Property Value,Personal Property Exemption,Property taxable?,Total Value,Total Exemption,Taxable Value,Classification,Region Number,Cluster Code,Parcel Legal Description,Address House Number,Address House Number Fraction,Direction,Street,Unit Number,City,Zip Code.1,Row ID,Location Latitude,Location Longitude,OBJECTID
1252963,90029-3109,LOS ANGELES,5427007013,2024,13,5427-007-013,1017 HYPERION AVE LOS ANGELES CA 90029,R-I,200,Residential,"Double, Duplex, or Two Units",4 Stories or Less,,2,1946,1952,2184,6,2,2,6/16/2023 7:00:00 AM,1096500,2023,418200,2023,1514700,0,0,0,0,0,0,Y,1514700,0,1514700,,4,4422,CHILDS HEIGHTS LOT 13 BLK 10,1017,,,HYPERION AVE,,LOS ANGELES CA,90029.0,20245427007013,34.090412,-118.280831,56340440


In [143]:
df1["City Tax Rate Area"].value_counts()

City Tax Rate Area
LOS ANGELES       805738
unincorporated    326824
LONG BEACH        106381
SANTA CLARITA      71910
LANCASTER          50077
                   ...  
ROSEMEAD             269
MAYWOOD              213
PARAMOUNT             94
INDUSTRY              36
EL SEGUNDO             4
Name: count, Length: 123, dtype: int64

In [144]:
# Merge the dataframes - df1 is the left dataframe, and we're matching AIN from df1
# with PARCEL_NUMBER from df2
merged_df = df1
# pd.merge(
#     left=df1, right=df2, left_on="AIN", right_on="PARCEL_NUMBER", how="left"
# )

# # Display info about the merged dataframe
# print(f"Merged dataset shape: {merged_df.shape}")

# # Preview the merged dataframe
# print(merged_df.head())

In [145]:
merged_df = merged_df[merged_df["City Tax Rate Area"] == "LOS ANGELES"]
merged_df

Unnamed: 0,Zip Code,City Tax Rate Area,AIN,Roll Year,Tax Rate Area Code,Assessor ID,Property Location,Property Use Type,Property Use Code,Use Code 1st Digit,Use Code 2nd Digit,Use Code 3rd Digit,Use Code 4th Digit,Number of Buildings,Year Built,Effective Year,Square Footage,Number of Bedrooms,Number of Bathrooms,Number of Units,Recording Date,Land Value,Land Base Year,Improvement Value,Improvement Base Year,"Total Value, Land + Improvement",Home Owners Exemption,Real Estate Exemption,Fixture Value,Fixture Exemption,Personal Property Value,Personal Property Exemption,Property taxable?,Total Value,Total Exemption,Taxable Value,Classification,Region Number,Cluster Code,Parcel Legal Description,Address House Number,Address House Number Fraction,Direction,Street,Unit Number,City,Zip Code.1,Row ID,Location Latitude,Location Longitude,OBJECTID
0,91304-3327,LOS ANGELES,2004001003,2024,16,2004-001-003,8321 FAUST AVE LOS ANGELES CA 91304,SFR,0101,Residential,Single Family Residence,,Pool,1,1973,1973,2090,4,3,1,5/29/2009 7:00:00 AM,711689,2006,301176,2006,1012865,0,0,0,0,0,0,Y,1012865,0,1012865,,2,2121,TRACT NO 25040 LOT 99,8321,,,FAUST AVE,,LOS ANGELES CA,91304.0,20242004001003,34.220225,-118.620681,55087477
1,91304-3327,LOS ANGELES,2004001004,2024,16,2004-001-004,8313 FAUST AVE LOS ANGELES CA 91304,SFR,0101,Residential,Single Family Residence,,Pool,1,1973,1973,2479,5,3,1,12/1/2021 8:00:00 AM,370538,2010,255879,2010,626417,0,0,0,0,0,0,Y,626417,0,626417,,2,2121,TRACT NO 25040 LOT 100,8313,,,FAUST AVE,,LOS ANGELES CA,91304.0,20242004001004,34.220044,-118.620681,55087478
2,91304-3327,LOS ANGELES,2004001005,2024,16,2004-001-005,8309 FAUST AVE LOS ANGELES CA 91304,SFR,0100,Residential,Single Family Residence,,,1,1973,1973,2057,4,2,1,8/18/2017 7:00:00 AM,526360,2018,198577,2018,724937,0,0,0,0,0,0,Y,724937,0,724937,,2,2121,TRACT NO 25040 LOT 101,8309,,,FAUST AVE,,LOS ANGELES CA,91304.0,20242004001005,34.219862,-118.620688,55087479
3,91304-3332,LOS ANGELES,2004001008,2024,16,2004-001-008,8325 MAYNARD AVE LOS ANGELES CA 91304,SFR,0101,Residential,Single Family Residence,,Pool,1,1978,1978,2423,4,3,1,7/2/1979 7:00:00 AM,128421,1980,221965,1980,350386,7000,0,0,0,0,0,Y,350386,7000,343386,,2,2121,*TR=30333 LOT 1,8325,,,MAYNARD AVE,,LOS ANGELES CA,91304.0,20242004001008,34.220339,-118.622718,55087480
4,91304-3332,LOS ANGELES,2004001009,2024,16,2004-001-009,8311 MAYNARD AVE LOS ANGELES CA 91304,SFR,0101,Residential,Single Family Residence,,Pool,1,1978,1978,2226,4,3,1,8/29/2023 7:00:00 AM,139933,1984,210012,1984,349945,7000,0,0,0,0,0,Y,349945,7000,342945,,2,2121,*TR=30333 LOT 2,8311,,,MAYNARD AVE,,LOS ANGELES CA,91304.0,20242004001009,34.220327,-118.623062,55087481
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2016313,90732-4530,LOS ANGELES,7563037052,2024,20,7563-037-052,2193 WARMOUTH ST LOS ANGELES CA 90732,SFR,0100,Residential,Single Family Residence,,,1,1959,1959,1933,3,3,1,1/13/2014 8:00:00 AM,485785,1995,263473,1995,749258,7000,0,0,0,0,0,Y,749258,7000,742258,,14,14190,LOT 120 OF TRACT 22374 AND POR OF LOT IV L S 1...,2193,,,WARMOUTH ST,,LOS ANGELES CA,90732.0,20247563037052,33.719514,-118.325376,57103790
2016314,90732-4519,LOS ANGELES,7563037053,2024,20,7563-037-053,3810 S ANCHOVY AVE LOS ANGELES CA 90732,SFR,0100,Residential,Single Family Residence,,,1,1965,1965,2206,3,2,1,1/6/2020 8:00:00 AM,793428,2020,198356,2020,991784,0,0,0,0,0,0,Y,991784,0,991784,,14,14190,TR=22374 LOT 125,3810,,S,ANCHOVY AVE,,LOS ANGELES CA,90732.0,20247563037053,33.719952,-118.325128,57103791
2016315,90732-4519,LOS ANGELES,7563037054,2024,20,7563-037-054,3806 S ANCHOVY AVE LOS ANGELES CA 90732,SFR,0100,Residential,Single Family Residence,,,1,1959,1961,1862,3,2,1,5/2/2017 7:00:00 AM,703626,2008,175899,2008,879525,7000,0,0,0,0,0,Y,879525,7000,872525,,14,14190,TR=22374 LOT 126,3806,,S,ANCHOVY AVE,,LOS ANGELES CA,90732.0,20247563037054,33.720119,-118.325081,57103792
2016316,90732-4556,LOS ANGELES,7563037055,2024,20,7563-037-055,2197 W PASEO DEL MAR LOS ANGELES CA 90732,SFR,0100,Residential,Single Family Residence,,,1,1962,1962,2153,4,2,1,4/11/2012 7:00:00 AM,47779,1975,92991,1975,140770,7000,0,0,0,0,0,Y,140770,7000,133770,,14,14190,TR=22374 LOT 127,2197,,W,PASEO DEL MAR,,LOS ANGELES CA,90732.0,20247563037055,33.720279,-118.325034,57103793


In [146]:
# Option 1: Set the display option to show all columns
pd.set_option("display.max_columns", None)

print(list(merged_df.columns))

['Zip Code', 'City Tax Rate Area', 'AIN', 'Roll Year', 'Tax Rate Area Code', 'Assessor ID', 'Property Location', 'Property Use Type', 'Property Use Code', 'Use Code 1st Digit', 'Use Code 2nd Digit', 'Use Code 3rd Digit', 'Use Code 4th Digit', 'Number of Buildings', 'Year Built', 'Effective Year', 'Square Footage', 'Number of Bedrooms', 'Number of Bathrooms', 'Number of Units', 'Recording Date', 'Land Value', 'Land Base Year', 'Improvement Value', 'Improvement Base Year', 'Total Value, Land + Improvement', 'Home Owners Exemption', 'Real Estate Exemption', 'Fixture Value', 'Fixture Exemption', 'Personal Property Value', 'Personal Property Exemption', 'Property taxable?', 'Total Value', 'Total Exemption', 'Taxable Value', 'Classification', 'Region Number', 'Cluster Code', 'Parcel Legal Description', 'Address House Number', 'Address House Number Fraction', 'Direction', 'Street', 'Unit Number', 'City', 'Zip Code.1', 'Row ID', 'Location Latitude', 'Location Longitude', 'OBJECTID']


In [147]:
selected = merged_df[
    [
        "Zip Code",
        "City Tax Rate Area",
        "AIN",
        "Roll Year",
        "Tax Rate Area Code",
        "Property Location",
        "Property Use Type",
        "Property Use Code",
        "Number of Buildings",
        "Year Built",
        "Effective Year",
        "Square Footage",
        "Number of Bedrooms",
        "Number of Bathrooms",
        "Number of Units",
        "Land Value",
        "Land Base Year",
        "Improvement Value",
        "Improvement Base Year",
        "Total Value, Land + Improvement",
        "Home Owners Exemption",
        "Real Estate Exemption",
        "Fixture Value",
        "Fixture Exemption",
        "Personal Property Value",
        "Personal Property Exemption",
        "Property taxable?",
        "Total Value",
        "Total Exemption",
        "Taxable Value",
        "Classification",
        "Region Number",
        "Location Latitude",
        "Location Longitude",
    ]
]

In [148]:
# selected["Total_Taxes_Paid_Calc"] = (
#     selected["F1ST_INSTALLMENT_TAX"] + selected["F2ND_INSTALLMENT_TAX"]
# )
# selected

In [151]:
zoning = gpd.read_file("data/ZONING_PLY_20250403.geojson")
zoning = zoning[["zone_cmplt", "geometry"]]
zoning

Unnamed: 0,zone_cmplt,geometry
0,(F)CM-1-CUGU,"MULTIPOLYGON (((-118.38793 34.22862, -118.3883..."
1,(F)CM-1-CUGU,"MULTIPOLYGON (((-118.38793 34.22959, -118.3883..."
2,(F)CM-1-CUGU,"MULTIPOLYGON (((-118.38793 34.23014, -118.3883..."
3,(F)CM-1-CUGU,"MULTIPOLYGON (((-118.38794 34.23041, -118.3883..."
4,(F)CM-1-CUGU,"MULTIPOLYGON (((-118.38794 34.2314, -118.38836..."
...,...,...
58658,C4-2D-SN,"MULTIPOLYGON (((-118.32464 34.10324, -118.3246..."
58659,C4-2D,"MULTIPOLYGON (((-118.33089 34.10051, -118.3302..."
58660,(T)(Q)C4-2D-SN,"MULTIPOLYGON (((-118.3306 34.1011, -118.33077 ..."
58661,[Q]C2-2L-CDO-CUGU,"MULTIPOLYGON (((-118.36707 34.2176, -118.36728..."


In [152]:
# Step 1: Convert the regular dataframe to a GeoDataFrame by creating Point geometries
# First, make a copy to avoid modifying the original
selected_gdf = selected.copy()

# Create a geometry column with Point objects from latitude and longitude
selected_gdf["geometry"] = selected_gdf.apply(
    lambda row: Point(row["Location Longitude"], row["Location Latitude"]), axis=1
)

# Convert to a GeoDataFrame
selected_gdf = gpd.GeoDataFrame(selected_gdf, geometry="geometry")

# Make sure both GeoDataFrames have the same CRS (Coordinate Reference System)
# If you know the CRS of your data, set it explicitly
# For example, if your coordinates are in WGS84:
selected_gdf.crs = "EPSG:4326"

# Ensure zoning has the same CRS, or reproject if needed
if zoning.crs != selected_gdf.crs:
    zoning = zoning.to_crs(selected_gdf.crs)

# Step 2: Perform spatial join - this will add zoning attributes to each point
joined_data = gpd.sjoin(selected_gdf, zoning, how="left", predicate="within")

# Step 3: If you want a regular dataframe with the original columns plus zoning info
# (You might want to drop the extra geometry column and index_right column)
result = pd.DataFrame(joined_data.drop(columns=["geometry", "index_right"]))

# Alternatively, if you want to add specific columns from zoning to your original dataframe:
# selected['zoning_type'] = joined_data['zoning_type']  # Replace with your actual column names

In [153]:
result["ZONE_PREFIX"] = result["zone_cmplt"].str.split("-").str[0]
result

Unnamed: 0,Zip Code,City Tax Rate Area,AIN,Roll Year,Tax Rate Area Code,Property Location,Property Use Type,Property Use Code,Number of Buildings,Year Built,Effective Year,Square Footage,Number of Bedrooms,Number of Bathrooms,Number of Units,Land Value,Land Base Year,Improvement Value,Improvement Base Year,"Total Value, Land + Improvement",Home Owners Exemption,Real Estate Exemption,Fixture Value,Fixture Exemption,Personal Property Value,Personal Property Exemption,Property taxable?,Total Value,Total Exemption,Taxable Value,Classification,Region Number,Location Latitude,Location Longitude,zone_cmplt,ZONE_PREFIX
0,91304-3327,LOS ANGELES,2004001003,2024,16,8321 FAUST AVE LOS ANGELES CA 91304,SFR,0101,1,1973,1973,2090,4,3,1,711689,2006,301176,2006,1012865,0,0,0,0,0,0,Y,1012865,0,1012865,,2,34.220225,-118.620681,RE9-1,RE9
1,91304-3327,LOS ANGELES,2004001004,2024,16,8313 FAUST AVE LOS ANGELES CA 91304,SFR,0101,1,1973,1973,2479,5,3,1,370538,2010,255879,2010,626417,0,0,0,0,0,0,Y,626417,0,626417,,2,34.220044,-118.620681,RE9-1,RE9
2,91304-3327,LOS ANGELES,2004001005,2024,16,8309 FAUST AVE LOS ANGELES CA 91304,SFR,0100,1,1973,1973,2057,4,2,1,526360,2018,198577,2018,724937,0,0,0,0,0,0,Y,724937,0,724937,,2,34.219862,-118.620688,RE9-1,RE9
3,91304-3332,LOS ANGELES,2004001008,2024,16,8325 MAYNARD AVE LOS ANGELES CA 91304,SFR,0101,1,1978,1978,2423,4,3,1,128421,1980,221965,1980,350386,7000,0,0,0,0,0,Y,350386,7000,343386,,2,34.220339,-118.622718,RE11-1,RE11
4,91304-3332,LOS ANGELES,2004001009,2024,16,8311 MAYNARD AVE LOS ANGELES CA 91304,SFR,0101,1,1978,1978,2226,4,3,1,139933,1984,210012,1984,349945,7000,0,0,0,0,0,Y,349945,7000,342945,,2,34.220327,-118.623062,RE11-1,RE11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2016313,90732-4530,LOS ANGELES,7563037052,2024,20,2193 WARMOUTH ST LOS ANGELES CA 90732,SFR,0100,1,1959,1959,1933,3,3,1,485785,1995,263473,1995,749258,7000,0,0,0,0,0,Y,749258,7000,742258,,14,33.719514,-118.325376,R1-1XL,R1
2016314,90732-4519,LOS ANGELES,7563037053,2024,20,3810 S ANCHOVY AVE LOS ANGELES CA 90732,SFR,0100,1,1965,1965,2206,3,2,1,793428,2020,198356,2020,991784,0,0,0,0,0,0,Y,991784,0,991784,,14,33.719952,-118.325128,R1-1XL,R1
2016315,90732-4519,LOS ANGELES,7563037054,2024,20,3806 S ANCHOVY AVE LOS ANGELES CA 90732,SFR,0100,1,1959,1961,1862,3,2,1,703626,2008,175899,2008,879525,7000,0,0,0,0,0,Y,879525,7000,872525,,14,33.720119,-118.325081,R1-1XL,R1
2016316,90732-4556,LOS ANGELES,7563037055,2024,20,2197 W PASEO DEL MAR LOS ANGELES CA 90732,SFR,0100,1,1962,1962,2153,4,2,1,47779,1975,92991,1975,140770,7000,0,0,0,0,0,Y,140770,7000,133770,,14,33.720279,-118.325034,R1-1XL,R1


In [154]:
result["ZONE_PREFIX"].value_counts()

ZONE_PREFIX
R1          251717
R3           61757
RS           55071
R2           51911
RD1.5        36587
             ...  
[Q]R1P           1
(T)[Q]R3         1
(T)(Q)M3         1
[T]RE9           1
SL               1
Name: count, Length: 303, dtype: int64

In [155]:
parcels = gpd.read_file("data/Parcels.gdb", columns=["AIN", "geometry"])

In [156]:
parcels["AIN"] = pd.to_numeric(parcels["AIN"], errors="coerce").astype(
    "Int64"
)  # Keeps NaNs


In [157]:
merged_results = pd.merge(
    result,
    parcels,
    on="AIN",  # Join on the AIN column that exists in both dataframes
    how="left",  # Keep all rows from 'result' even if no matching AIN in parcels
)

# If you want to convert the merged result back to a GeoDataFrame
# (in case you need to do more spatial operations later)
# Note: This will use the geometry from the parcels dataframe
merged_gdf = gpd.GeoDataFrame(merged_results, geometry="geometry")

In [158]:
projected_gdf = merged_gdf.to_crs("EPSG:2229")

# Calculate area in square feet first (State Plane uses US Survey Feet)
projected_gdf["area_sq_feet"] = projected_gdf.geometry.area

# Convert to acres (1 acre = 43,560 square feet)
projected_gdf["acreage"] = projected_gdf["area_sq_feet"] / 43560

# Drop the intermediate area calculation if you don't need it
projected_gdf = projected_gdf.drop(columns=["area_sq_feet"])

# If you want to round the acreage to a specific number of decimal places
projected_gdf["acreage"] = projected_gdf["acreage"].round(3)

# Convert back to original CRS if needed for further spatial operations
final_gdf = projected_gdf.to_crs(merged_gdf.crs)

In [159]:
final_gdf[final_gdf["Property Location"].str.startswith("1017 HYPERION AVE", na=False)]


Unnamed: 0,Zip Code,City Tax Rate Area,AIN,Roll Year,Tax Rate Area Code,Property Location,Property Use Type,Property Use Code,Number of Buildings,Year Built,Effective Year,Square Footage,Number of Bedrooms,Number of Bathrooms,Number of Units,Land Value,Land Base Year,Improvement Value,Improvement Base Year,"Total Value, Land + Improvement",Home Owners Exemption,Real Estate Exemption,Fixture Value,Fixture Exemption,Personal Property Value,Personal Property Exemption,Property taxable?,Total Value,Total Exemption,Taxable Value,Classification,Region Number,Location Latitude,Location Longitude,zone_cmplt,ZONE_PREFIX,geometry,acreage
618932,90029-3109,LOS ANGELES,5427007013,2024,13,1017 HYPERION AVE LOS ANGELES CA 90029,R-I,200,2,1946,1952,2184,6,2,2,1096500,2023,418200,2023,1514700,0,0,0,0,0,0,Y,1514700,0,1514700,,4,34.090412,-118.280831,R3-1VL,R3,"MULTIPOLYGON (((6476718.72 1855390.06, 6476695...",0.168


In [160]:
final_gdf["Property_Tax_Value"] = final_gdf["Taxable Value"] * 0.01

In [161]:
final_gdf = final_gdf.dropna(subset=["acreage"])

In [162]:
final_gdf[final_gdf["acreage"].isna()]


Unnamed: 0,Zip Code,City Tax Rate Area,AIN,Roll Year,Tax Rate Area Code,Property Location,Property Use Type,Property Use Code,Number of Buildings,Year Built,Effective Year,Square Footage,Number of Bedrooms,Number of Bathrooms,Number of Units,Land Value,Land Base Year,Improvement Value,Improvement Base Year,"Total Value, Land + Improvement",Home Owners Exemption,Real Estate Exemption,Fixture Value,Fixture Exemption,Personal Property Value,Personal Property Exemption,Property taxable?,Total Value,Total Exemption,Taxable Value,Classification,Region Number,Location Latitude,Location Longitude,zone_cmplt,ZONE_PREFIX,geometry,acreage,Property_Tax_Value


In [164]:
final_gdf["Property_Tax_Value"].sum() * 0.2478


np.float64(2001904733.5903978)

In [165]:
# Function to extract first letter of zoning code
def extract_first_letter(zone_code):
    if pd.isna(zone_code):
        return None

    # Remove any brackets or parentheses and their contents
    # This regex looks for patterns like [...], (...), etc.
    cleaned_code = re.sub(r"[\[\(].*?[\]\)]", "", zone_code)

    # Remove any remaining special characters
    cleaned_code = re.sub(r"[^a-zA-Z0-9]", "", cleaned_code)

    # Take the first letter if there is one
    if cleaned_code and len(cleaned_code) > 0:
        return cleaned_code[0]
    else:
        return None


# Apply the function to create a new column
final_gdf["zone_type"] = final_gdf["ZONE_PREFIX"].apply(extract_first_letter)


In [166]:
final_gdf["zone_type"].value_counts()

zone_type
R    700393
C     60750
M     20204
P      7753
Q      3875
O      3411
A      3263
L       970
U       725
D       382
N       353
H        56
T        45
G        30
S         1
Name: count, dtype: int64

In [29]:
# final_gdf.to_csv("LA_City_Property_Parcel_Acreage_Zoning_Tax_Data.csv")

In [168]:
final_geom = final_gdf[
    [
        "geometry",
        "AIN",
        "Property Location",
        "zone_type",
        "acreage",
        "Property_Tax_Value",
        "zone_cmplt",
        "Location Latitude",
        "Location Longitude",
        "Taxable Value",
        "Property taxable?",
    ]
]
final_geom.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 804530 entries, 0 to 805743
Data columns (total 11 columns):
 #   Column              Non-Null Count   Dtype   
---  ------              --------------   -----   
 0   geometry            804530 non-null  geometry
 1   AIN                 804530 non-null  int64   
 2   Property Location   766062 non-null  object  
 3   zone_type           802211 non-null  object  
 4   acreage             804530 non-null  float64 
 5   Property_Tax_Value  804530 non-null  float64 
 6   zone_cmplt          802211 non-null  object  
 7   Location Latitude   803235 non-null  float64 
 8   Location Longitude  803235 non-null  float64 
 9   Taxable Value       804530 non-null  int64   
 10  Property taxable?   804530 non-null  object  
dtypes: float64(4), geometry(1), int64(2), object(4)
memory usage: 73.7+ MB


In [169]:
# Basic statistics on your tax column
print("Tax value summary statistics:")
print(final_gdf["Property_Tax_Value"].describe())

# Check the largest values - they might be outliers
print("\nTop 10 largest tax values:")
print(
    final_gdf.nlargest(10, "Property_Tax_Value")[
        ["AIN", "ZONE_PREFIX", "Property_Tax_Value", "acreage"]
    ]
)

# Are there any negative or zero values?
print(
    f"\nCount of zero/negative tax values: {len(final_gdf[final_gdf['Property_Tax_Value'] <= 0])}"
)

# Check spatial distribution - are all properties truly in LA City?
if "geometry" in final_gdf.columns:
    # Calculate area of study
    bounds = final_gdf.total_bounds
    area_km2 = (
        (bounds[2] - bounds[0]) * (bounds[3] - bounds[1]) / 1000000
    )  # rough estimate
    print(f"\nApproximate geographic area covered: {area_km2:.2f} km²")

    # LA City is roughly 1,300 km² - if your area is much larger, you have data outside the city

# Calculate mean tax per property
mean_tax = final_gdf["Property_Tax_Value"].mean()
count = len(final_gdf)
print(f"\nMean tax per property: ${mean_tax:.2f}")
print(f"Number of properties: {count}")

Tax value summary statistics:
count    8.045300e+05
mean     1.004153e+04
std      5.519038e+04
min      0.000000e+00
25%      2.491463e+03
50%      4.984520e+03
75%      8.920680e+03
max      9.863504e+06
Name: Property_Tax_Value, dtype: float64

Top 10 largest tax values:
               AIN ZONE_PREFIX  Property_Tax_Value  acreage
439206  4334007008          C2          9863503.84    8.478
570057  5151023400          C2          8915393.84    4.195
287869  2673021041          M2          8389656.85   70.930
569856  5151014031          C4          7150390.55    4.216
533646  5089008031       (Q)C4          6868097.83    8.421
569859  5151015012          C2          6687086.19    2.708
364813  4211010118       M(PV)          5718954.91   13.754
558436  5138007089       LASED          5456448.68    9.166
423884  4319001015          C2          5292837.93    2.150
563638  5144023227          C2          5130600.00    0.942

Count of zero/negative tax values: 21191

Approximate geographic

In [170]:
final_gdf["acreage"].sum()

np.float64(582436.355)

In [109]:
final_geom.to_file("final_geom.gpkg", driver="GPKG")

In [172]:
# Define chunk processing function at module level so it can be pickled
def _process_chunk(chunk_data):
    """
    Process a chunk of polygons to find overlaps.

    Parameters:
    chunk_data: Tuple containing (chunk_idx, gdf, areas, chunk_size, n, overlap_threshold, debug)

    Returns:
    Tuple of (rows, cols) with indices of overlapping polygons
    """
    chunk_idx, gdf, areas, chunk_size, n, overlap_threshold, debug = chunk_data

    start_idx = chunk_idx * chunk_size
    end_idx = min(start_idx + chunk_size, n)
    chunk_indices = range(start_idx, end_idx)

    # Create R-tree spatial index for this chunk
    idx = rtree.index.Index()
    for i, idx_val in enumerate(chunk_indices):
        # Skip zero area geometries when building index
        if areas[idx_val] <= 0:
            continue
        idx.insert(i, gdf.geometry.iloc[idx_val].bounds)

    # Find overlaps
    rows = []
    cols = []

    for i, idx1 in enumerate(chunk_indices):
        # Skip zero area geometries
        if areas[idx1] <= 0:
            continue

        geom1 = gdf.geometry.iloc[idx1]
        area1 = areas[idx1]
        bounds = geom1.bounds

        # Find potential overlaps using spatial index
        for j in idx.intersection(bounds):
            idx2 = start_idx + j

            # Only check forward to avoid duplicates
            if idx2 <= idx1:
                continue

            # Skip zero area geometries
            if areas[idx2] <= 0:
                continue

            # Get geometry
            geom2 = gdf.geometry.iloc[idx2]

            # Quick intersection check
            if not geom1.intersects(geom2):
                continue

            try:
                # Calculate intersection
                intersection = geom1.intersection(geom2)
                intersection_area = intersection.area

                # Calculate overlap ratios
                ratio1 = intersection_area / area1
                ratio2 = intersection_area / areas[idx2]

                # Check if either ratio exceeds threshold
                if ratio1 >= overlap_threshold or ratio2 >= overlap_threshold:
                    rows.append(idx1)
                    cols.append(idx2)
            except Exception as e:
                if debug:
                    print(f"Error checking overlap between {idx1} and {idx2}: {str(e)}")

    return rows, cols


# Define component processing function at module level
def _process_component(component_data):
    """
    Process a component (group of overlapping polygons).
    Instead of merging geometries, use geometry with highest acreage.

    Parameters:
    component_data: Tuple containing (component_id, gdf, labels)

    Returns:
    Tuple of (merged_data_dict, merge_count)
    """
    component_id, gdf, labels = component_data

    # Get indices for this component
    component_indices = np.where(labels == component_id)[0]

    if len(component_indices) == 1:
        # Single property, no merging needed
        return gdf.iloc[component_indices[0]].to_dict(), 0
    else:
        # Get the properties to merge
        group_data = gdf.iloc[component_indices]

        # Find index of property with highest acreage
        if "acreage" in group_data.columns:
            max_acreage_idx = group_data["acreage"].idxmax()
            # Use geometry of property with highest acreage
            main_geom = group_data.loc[max_acreage_idx, "geometry"]
        else:
            # If no acreage column, use the largest geometry by area
            max_area_idx = group_data["_area"].idxmax()
            main_geom = group_data.loc[max_area_idx, "geometry"]

        # Prepare aggregated data
        agg_data = {}

        # For all numeric columns, sum them
        for col in group_data.select_dtypes(include=np.number).columns:
            if col not in ["Location Latitude", "Location Longitude", "_area"]:
                agg_data[col] = group_data[col].sum()

        # Keep the max acreage value
        if "acreage" in group_data.columns:
            agg_data["acreage"] = group_data["acreage"].max()

        # For lat/long, take median values
        if "Location Latitude" in group_data.columns:
            agg_data["Location Latitude"] = group_data["Location Latitude"].median()
        if "Location Longitude" in group_data.columns:
            agg_data["Location Longitude"] = group_data["Location Longitude"].median()

        # For string columns, join unique values
        for col in group_data.select_dtypes(include=["object"]).columns:
            unique_values = group_data[col].dropna().astype(str).unique()
            agg_data[col] = ", ".join(unique_values) if len(unique_values) > 0 else None

        # For boolean columns, use logical OR
        for col in group_data.select_dtypes(include=["bool"]).columns:
            agg_data[col] = any(group_data[col])

        # Set the geometry to the one with highest acreage
        agg_data["geometry"] = main_geom

        return agg_data, len(component_indices)


def merge_overlapping_properties_serial(
    gdf, overlap_threshold=0.8, chunk_size=10000, debug=True
):
    """
    Memory-efficient version that processes chunks sequentially.
    Uses geometry with highest acreage for each merged group.

    Parameters:
    gdf (GeoDataFrame): Input GeoDataFrame with property polygons
    overlap_threshold (float): Minimum overlap ratio required for merging (0.0 to 1.0)
    chunk_size (int): Number of polygons to process in each chunk
    debug (bool): Whether to print debug messages

    Returns:
    GeoDataFrame: Processed GeoDataFrame with merged properties
    """
    if debug:
        print("Starting memory-efficient property polygon merging...")

    try:
        # Make a copy to avoid modifying the original
        gdf = gdf.copy()
        original_crs = gdf.crs

        # Convert to UTM Zone 11N for accurate spatial calculations
        projected_crs = "EPSG:32611"  # UTM Zone 11N (meters)
        if debug:
            print(f"Converting from {original_crs} to {projected_crs} for processing")

        # Convert to projected CRS
        gdf = gdf.to_crs(projected_crs)

        # Fix any invalid geometries
        if debug:
            print("Fixing invalid geometries...")
        gdf["geometry"] = gdf.geometry.apply(
            lambda geom: geom.buffer(0) if not geom.is_valid else geom
        )

        # Pre-compute areas for all polygons
        if debug:
            print("Pre-computing areas...")
        gdf["_area"] = gdf.geometry.area
        areas = gdf["_area"].values

        if debug:
            print(
                f"Area statistics: Min={min(areas):.2f}, Max={max(areas):.2f}, Mean={np.mean(areas):.2f}"
            )

        # Calculate number of chunks
        n = len(gdf)
        n_chunks = math.ceil(n / chunk_size)
        if debug:
            print(f"Processing {n} properties in {n_chunks} chunks of {chunk_size}...")

        # Process all chunks serially to save memory
        all_rows = []
        all_cols = []

        for chunk_idx in tqdm(range(n_chunks), disable=not debug):
            rows, cols = _process_chunk(
                (chunk_idx, gdf, areas, chunk_size, n, overlap_threshold, debug)
            )
            all_rows.extend(rows)
            all_cols.extend(cols)

            # Free memory periodically
            if chunk_idx % 10 == 0 and debug:
                import gc

                gc.collect()

        # Check if any overlaps were found
        if not all_rows:
            if debug:
                print("No overlapping properties detected with the current threshold.")
                print(
                    f"Try a lower overlap_threshold value (current: {overlap_threshold})"
                )
            return gdf.drop(columns=["_area"]).to_crs(original_crs)

        # Create sparse matrix for the entire dataset
        if debug:
            print("Creating adjacency matrix...")

        # Add symmetric relationships (more efficiently)
        all_rows.extend(all_cols)
        all_cols.extend(all_rows[: -len(all_cols)])
        data = np.ones(len(all_rows), dtype=np.int8)

        adjacency_matrix = csr_matrix((data, (all_rows, all_cols)), shape=(n, n))

        # Find connected components (groups of overlapping properties)
        if debug:
            print("Finding connected components...")
        n_components, labels = connected_components(adjacency_matrix, directed=False)

        if debug:
            print(f"Found {n_components} distinct property groups")

        # Check if any merging actually happened
        if n_components == n:
            if debug:
                print("No properties were merged.")
            return gdf.drop(columns=["_area"]).to_crs(original_crs)

        # Process components sequentially to save memory
        new_rows = []
        merged_count = 0

        if debug:
            print("Processing and merging components...")

        # Pre-allocate a list to store processed components
        processed_components = []

        for component_id in tqdm(range(n_components), disable=not debug):
            row_dict, count = _process_component((component_id, gdf, labels))
            processed_components.append(row_dict)
            merged_count += count

            # Free memory periodically
            if component_id % 1000 == 0 and debug:
                import gc

                gc.collect()

        # Create GeoDataFrame in one operation from processed components
        if debug:
            print("Creating final GeoDataFrame...")
        merged_gdf = gpd.GeoDataFrame(processed_components, crs=projected_crs)

        if debug:
            print(
                f"Original properties: {len(gdf)}, Merged properties: {len(merged_gdf)}"
            )
            print(f"Properties merged into groups: {merged_count}")

            if "Property_Tax_Value" in merged_gdf.columns:
                print(
                    f"Total tax before merging: ${gdf['Property_Tax_Value'].sum():,.2f}"
                )
                print(
                    f"Total tax after merging: ${merged_gdf['Property_Tax_Value'].sum():,.2f}"
                )

        # Convert back to original CRS
        merged_gdf = merged_gdf.to_crs(original_crs)

        # Drop internal calculation columns
        if "_area" in merged_gdf.columns:
            merged_gdf = merged_gdf.drop(columns=["_area"])

        return merged_gdf

    except Exception as e:
        if debug:
            print(f"Error during merging: {str(e)}")
            import traceback

            traceback.print_exc()
        return gdf.drop(columns=["_area"] if "_area" in gdf.columns else []).to_crs(
            original_crs
        )

In [173]:
result = merge_overlapping_properties_serial(
    final_geom, overlap_threshold=0.8, chunk_size=10000
)

Starting memory-efficient property polygon merging...
Converting from EPSG:2229 to EPSG:32611 for processing
Fixing invalid geometries...
Pre-computing areas...
Area statistics: Min=0.00, Max=7132366.79, Mean=2928.51
Processing 804530 properties in 81 chunks of 10000...


100%|██████████| 81/81 [30:27<00:00, 22.56s/it]  


Creating adjacency matrix...
Finding connected components...
Found 671580 distinct property groups
Processing and merging components...


  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, ou

Creating final GeoDataFrame...
Original properties: 804530, Merged properties: 671580
Properties merged into groups: 138973
Total tax before merging: $8,078,711,596.41
Total tax after merging: $8,078,711,596.41


In [176]:
result["ZONE_PREFIX"] = result["zone_cmplt"].str.split("-").str[0]

In [None]:
result[result["zone_type"].isin(["R, C", "C, R"])]

In [None]:
result.loc[result["zone_type"].isin(["R, C", "C, R"]), "zone_type"] = "C"

In [219]:
result[result["zone_type"].isin(["R, O"])]

Unnamed: 0,geometry,AIN,Property Location,zone_type,acreage,Property_Tax_Value,zone_cmplt,Location Latitude,Location Longitude,Taxable Value,Property taxable?,ZONE_PREFIX


In [218]:
result.loc[result["zone_type"].isin(["R, O"]), "zone_type"] = "R"

In [224]:
# Create a mapping of AIN to Effective Year from final_gdf
ain_to_effective_year = final_gdf.set_index("AIN")["Effective Year"].to_dict()

# Map this to the result DataFrame based on the AIN column
result["Effective Year"] = result["AIN"].map(ain_to_effective_year)


In [225]:
invalid_count = result["Effective Year"].isna().sum()
print(f"Number of rows without a valid Effective Year: {invalid_count}")


Number of rows without a valid Effective Year: 6022


In [228]:
# Group by ZONE_PREFIX and calculate aggregates
zone_summary = {}

# Get unique zone prefixes
zone_prefixes = result["ZONE_PREFIX"].unique()
print(zone_prefixes)
for zone in zone_prefixes:
    # Filter for just this zone prefix
    zone_data = result[result["ZONE_PREFIX"] == zone]

    # Skip if zone is None/NaN
    if pd.isna(zone):
        continue

    try:
        # Calculate metrics with error handling
        total_acreage = zone_data["acreage"].sum()
        total_property_tax = zone_data["Property_Tax_Value"].sum()

        # Filter for recent Effective Year (after 2015)
        recent_data = zone_data[zone_data["Effective Year"] > 2015]
        total_acreage_recent = recent_data["acreage"].sum()
        total_property_tax_recent = recent_data["Property_Tax_Value"].sum()
    except KeyError as e:
        print(f"Missing key {e} in DataFrame. Skipping zone: {zone}")
        continue  # Skip this zone if keys are missing

    # Avoid division by zero
    tax_per_acre = total_property_tax / total_acreage if total_acreage > 0 else 0
    tax_per_acre_recent = (
        total_property_tax_recent / total_acreage_recent
        if total_acreage_recent > 0
        else 0
    )

    # Store in dictionary
    zone_summary[zone] = {
        "total_acreage": round(total_acreage, 2),
        "total_property_tax": round(total_property_tax, 2),
        "tax_per_acre": round(tax_per_acre, 2),
        "tax_per_acre_recent": round(tax_per_acre_recent, 2),
    }

# Now zone_summary is a dictionary with all the metrics by zone prefix
print(zone_summary)


['RE9' 'RE11' 'OS' '[Q]OS' '(Q)R1' '(Q)RE11' '(Q)RD3' '[T][Q]M1' 'A1' 'RS'
 'C2' '[Q]M1' 'R3' 'C4' 'P' '[Q]PF' '(T)(Q)C1.5' 'CR' None '(T)(Q)R4'
 '(Q)R3' nan '(T)RS' 'R1' 'RA' '[Q]C1' '[Q]C1.5' '(Q)RD5' 'RE20' 'RE40'
 'RE15' '(T)RE11' 'A2' 'QCR' '(T)(Q)CR' 'QC1' '(Q)RD2' 'QRD3' '[Q]CR'
 '[Q]C2' 'C1' '(T)(Q)C2' 'R1P' 'C1.5' 'R2' '(Q)C1.5' '(WC)TOPANGA'
 '(T)(Q)RD2' 'RD2' 'RD1.5' '(Q)RD1.5' '(Q)RD6' '(T)(Q)R1' '(Q)CR'
 '(T)(Q)RD3' '(T)(Q)RD5' '(Q)C1' '[Q]C4' 'QC2' '(Q)P' '(T)(Q)C1' 'PF'
 '(T)R1' '[Q]R3' '(T)R3' 'QRD5' '(Q)C2' 'R4' '(T)RE9' '(T)(Q)RD4' '(Q)RE9'
 '(T)(Q)R3' '(Q)C4' 'M2' 'MR1' '(Q)MR1' '[Q]CM' '[T][Q]C2' '(T)(Q)RZ3'
 '[Q]RD2' '[Q]P' '(Q)R3P' '(T)(Q)RD1.5' '(Q)M1' 'RD5' 'M1' 'RD3'
 '(T)(Q)M1' 'QR3' '(Q)RS' '[T]R3' 'QRD1.5' 'R3P' '(Q)R4' '(Q)RZ2.5'
 '(Q)R2' '[Q]R2' '[Q]R1' '(Q)RD4' '(T)[Q]C2' '[T]R1' '(T)[Q]R4'
 '(T)(Q)RAS4' '[Q]RD5' '(Q)RAS3' '(T)(Q)RAS3' '(T)[Q]RAS4' 'RMP' '[T]RD2'
 '[T][Q]RD1.5' 'QRD2' '(T)(Q)C4' '[Q]RD3' '(WC)RIVER' '(Q)RAS4'
 '[T][Q]MR1' '[Q]MR1' '(WC)UP

In [226]:
# Group by ZONE_PREFIX and calculate aggregates
zone_summary_types = {}

# Get unique zone prefixes
zone_prefixes = result["zone_type"].unique()
print(zone_prefixes)
for zone in zone_prefixes:
    # Filter for just this zone prefix
    zone_data = result[result["zone_type"] == zone]

    # Skip if zone is None/NaN
    if pd.isna(zone):
        continue

    try:
        # Calculate metrics with error handling
        total_acreage = zone_data["acreage"].sum()
        total_property_tax = zone_data["Property_Tax_Value"].sum()

        # Filter for recent Effective Year (after 2015)
        recent_data = zone_data[zone_data["Effective Year"] > 2015]
        total_acreage_recent = recent_data["acreage"].sum()
        total_property_tax_recent = recent_data["Property_Tax_Value"].sum()
    except KeyError as e:
        print(f"Missing key {e} in DataFrame. Skipping zone: {zone}")
        continue  # Skip this zone if keys are missing

    # Avoid division by zero
    tax_per_acre = total_property_tax / total_acreage if total_acreage > 0 else 0
    tax_per_acre_recent = (
        total_property_tax_recent / total_acreage_recent
        if total_acreage_recent > 0
        else 0
    )

    # Store in dictionary
    zone_summary_types[zone] = {
        "total_acreage": round(total_acreage, 2),
        "total_property_tax": round(total_property_tax, 2),
        "tax_per_acre": round(tax_per_acre, 2),
        "tax_per_acre_recent": round(tax_per_acre_recent, 2),
    }

# Now zone_summary is a dictionary with all the metrics by zone prefix
print(zone_summary_types)

['R' 'O' 'M' 'A' 'C' 'P' None 'Q' 'T' 'U' 'D' 'N' 'L' 'H' 'G' 'S']
{'R': {'total_acreage': np.float64(135605.39), 'total_property_tax': np.float64(5831477944.01), 'tax_per_acre': np.float64(43003.29), 'tax_per_acre_recent': np.float64(106068.47)}, 'O': {'total_acreage': np.float64(38782.41), 'total_property_tax': np.float64(1073025.93), 'tax_per_acre': np.float64(27.67), 'tax_per_acre_recent': np.float64(523.93)}, 'M': {'total_acreage': np.float64(19436.42), 'total_property_tax': np.float64(489443069.81), 'tax_per_acre': np.float64(25181.75), 'tax_per_acre_recent': np.float64(77075.02)}, 'A': {'total_acreage': np.float64(17286.44), 'total_property_tax': np.float64(33713641.22), 'tax_per_acre': np.float64(1950.29), 'tax_per_acre_recent': np.float64(27968.32)}, 'C': {'total_acreage': np.float64(14240.45), 'total_property_tax': np.float64(1496838554.25), 'tax_per_acre': np.float64(105111.75), 'tax_per_acre_recent': np.float64(280337.45)}, 'P': {'total_acreage': np.float64(12062.3), 'total

In [231]:
# Extract tax_per_acre values, filtering out any missing values
tax_per_acre_values = {
    k: v["tax_per_acre"] for k, v in zone_summary_types.items() if "tax_per_acre" in v
}

# Sort by tax_per_acre in descending order
sorted_data = sorted(tax_per_acre_values.items(), key=lambda x: x[1], reverse=True)

# Display results
for letter, tax in sorted_data:
    print(f"{letter}: {tax}")

N: 134479.58
H: 129979.44
D: 107941.41
C: 105111.75
T: 83663.0
Q: 45620.14
R: 43003.29
U: 30056.37
M: 25181.75
L: 7494.72
P: 4061.67
A: 1950.29
G: 799.93
O: 27.67
S: 0.0


In [232]:
# Extract tax_per_acre values, filtering out any missing values
tax_per_acre_values = {
    k: v["tax_per_acre_recent"]
    for k, v in zone_summary_types.items()
    if "tax_per_acre_recent" in v
}

# Sort by tax_per_acre in descending order
sorted_data = sorted(tax_per_acre_values.items(), key=lambda x: x[1], reverse=True)

# Display results
for letter, tax in sorted_data:
    print(f"{letter}: {tax}")

N: 477377.02
C: 280337.45
T: 191304.97
R: 106068.47
M: 77075.02
P: 48318.26
A: 27968.32
U: 24583.59
O: 523.93
Q: 0
D: 0
L: 0.0
H: 0
G: 0
S: 0


In [233]:
# Map tax_per_acre and tax_per_acre_recent to result
result["tax_per_acre"] = result["zone_type"].map(
    lambda z: zone_summary_types.get(z, {}).get("tax_per_acre", None)
)
result["tax_per_acre_recent"] = result["zone_type"].map(
    lambda z: zone_summary_types.get(z, {}).get("tax_per_acre_recent", None)
)


In [234]:
zone_summary_types["R"]

{'total_acreage': np.float64(135605.39),
 'total_property_tax': np.float64(5831477944.01),
 'tax_per_acre': np.float64(43003.29),
 'tax_per_acre_recent': np.float64(106068.47)}

In [188]:
zone_summary_types["C"]

{'total_acreage': np.float64(14238.62),
 'total_property_tax': np.float64(1494437946.19),
 'tax_per_acre': np.float64(104956.63)}

In [189]:
zone_summary_types["P"]

{'total_acreage': np.float64(12062.3),
 'total_property_tax': np.float64(48993060.62),
 'tax_per_acre': np.float64(4061.67)}

In [235]:
zone_summary["R1"]

{'total_acreage': np.float64(40420.92),
 'total_property_tax': np.float64(1607094989.96),
 'tax_per_acre': np.float64(39758.99),
 'tax_per_acre_recent': np.float64(80930.09)}

In [181]:
zone_summary["R2"]

{'total_acreage': np.float64(6902.92),
 'total_property_tax': np.float64(267469883.37),
 'tax_per_acre': np.float64(38747.34)}

In [182]:
zone_summary["R5"]

{'total_acreage': np.float64(73.31),
 'total_property_tax': np.float64(19588509.95),
 'tax_per_acre': np.float64(267208.35)}

In [194]:
result[result["Property Location"].str.startswith("7366 WOODVALE", na=False)][
    "Property Location"
]


9701    7366 WOODVALE CT  LOS ANGELES CA  91307, 7360 ...
Name: Property Location, dtype: object

In [193]:
final_geom[final_geom["Property Location"].str.startswith("901 S FLOWER ST", na=False)][
    "zone_type"
].value_counts()


zone_type
R    78
Name: count, dtype: int64

In [200]:
pd.set_option("display.max_colwidth", None)  # Ensures full column width is displayed

print(
    result[result["Property Location"].str.startswith("901 S FLOWER ST", na=False)][
        "Property Location"
    ]
)
print(
    result[result["Property Location"].str.startswith("2508 N HYPERION", na=False)][
        "Property Location"
    ]
)
print(
    result[result["Property Location"].str.startswith("615 S WILTON", na=False)][
        "Property Location"
    ]
)

457158    901 S FLOWER ST, NO   301  LOS ANGELES CA  90015, 901 S FLOWER ST, NO   300  LOS ANGELES CA  90015, 901 S FLOWER ST, NO   302  LOS ANGELES CA  90015, 901 S FLOWER ST, NO   303  LOS ANGELES CA  90015, 901 S FLOWER ST, NO   304  LOS ANGELES CA  90015, 901 S FLOWER ST, NO   305  LOS ANGELES CA  90015, 901 S FLOWER ST, NO   306  LOS ANGELES CA  90015, 901 S FLOWER ST, NO   307  LOS ANGELES CA  90015, 901 S FLOWER ST, NO   308  LOS ANGELES CA  90015, 901 S FLOWER ST, NO   400  LOS ANGELES CA  90015, 901 S FLOWER ST, NO   402  LOS ANGELES CA  90015, 901 S FLOWER ST, NO   404  LOS ANGELES CA  90015, 901 S FLOWER ST, NO   406  LOS ANGELES CA  90015, 901 S FLOWER ST, NO   408  LOS ANGELES CA  90015, 901 S FLOWER ST, NO   410  LOS ANGELES CA  90015, 901 S FLOWER ST, NO   412  LOS ANGELES CA  90015, 901 S FLOWER ST, NO   413  LOS ANGELES CA  90015, 901 S FLOWER ST, NO   414  LOS ANGELES CA  90015, 901 S FLOWER ST, NO   415  LOS ANGELES CA  90015, 901 S FLOWER ST, NO   416  LOS ANGELES C

In [205]:
final_geom[final_geom["Property Location"].str.startswith("900 S FIGUEROA", na=False)][
    "zone_type"
].value_counts()

zone_type
C    273
Name: count, dtype: int64

In [203]:
final_geom[final_geom["Property Location"].str.startswith("2508 N HYPERION", na=False)]


Unnamed: 0,geometry,AIN,Property Location,zone_type,acreage,Property_Tax_Value,zone_cmplt,Location Latitude,Location Longitude,Taxable Value,Property taxable?
622131,"MULTIPOLYGON (((6479103.52 1860552.18, 6479130.91 1860551.59, 6479111.64 1860492.51, 6479001.709 1860495.142, 6479020.819 1860553.986, 6479103.52 1860552.18)))",5432002028,"2508 N HYPERION AVE, 101 LOS ANGELES CA 90027",C,0.15,11067.66,[Q]C2-1VL,34.104503,-118.27287,1106766,Y
622132,"MULTIPOLYGON (((6479103.52 1860552.18, 6479130.91 1860551.59, 6479111.64 1860492.51, 6479001.709 1860495.142, 6479020.819 1860553.986, 6479103.52 1860552.18)))",5432002029,"2508 N HYPERION AVE, 102 LOS ANGELES CA 90027",C,0.15,10824.32,[Q]C2-1VL,34.104503,-118.27287,1082432,Y
622133,"MULTIPOLYGON (((6479103.52 1860552.18, 6479130.91 1860551.59, 6479111.64 1860492.51, 6479001.709 1860495.142, 6479020.819 1860553.986, 6479103.52 1860552.18)))",5432002030,"2508 N HYPERION AVE, 103 LOS ANGELES CA 90027",C,0.15,9955.05,[Q]C2-1VL,34.104503,-118.27287,995505,Y
622134,"MULTIPOLYGON (((6479103.52 1860552.18, 6479130.91 1860551.59, 6479111.64 1860492.51, 6479001.709 1860495.142, 6479020.819 1860553.986, 6479103.52 1860552.18)))",5432002031,"2508 N HYPERION AVE, 104 LOS ANGELES CA 90027",C,0.15,9091.2,[Q]C2-1VL,34.104503,-118.27287,909120,Y
622135,"MULTIPOLYGON (((6479103.52 1860552.18, 6479130.91 1860551.59, 6479111.64 1860492.51, 6479001.709 1860495.142, 6479020.819 1860553.986, 6479103.52 1860552.18)))",5432002032,"2508 N HYPERION AVE, 105 LOS ANGELES CA 90027",R,0.15,9273.26,RD2-1VL,34.104328,-118.272436,927326,Y


In [206]:
final_geom[final_geom["Property Location"].str.startswith("615 S WILTON", na=False)]


Unnamed: 0,geometry,AIN,Property Location,zone_type,acreage,Property_Tax_Value,zone_cmplt,Location Latitude,Location Longitude,Taxable Value,Property taxable?
657930,"MULTIPOLYGON (((6466512.82 1845334.48, 6466363.03 1845334.28, 6466365.73 1845384.29, 6466512.76 1845384.48, 6466512.82 1845334.48)))",5504018019,"615 S WILTON PL, A LOS ANGELES CA 90005",C,0.17,10680.0,CR(PKM)-1,34.062734,-118.314427,1068000,Y
657931,"MULTIPOLYGON (((6466512.82 1845334.48, 6466363.03 1845334.28, 6466365.73 1845384.29, 6466512.76 1845384.48, 6466512.82 1845334.48)))",5504018020,"615 S WILTON PL, B LOS ANGELES CA 90005",C,0.17,4700.11,CR(PKM)-1,34.062734,-118.314427,470011,Y
657932,"MULTIPOLYGON (((6466512.82 1845334.48, 6466363.03 1845334.28, 6466365.73 1845384.29, 6466512.76 1845384.48, 6466512.82 1845334.48)))",5504018021,"615 S WILTON PL, C LOS ANGELES CA 90005",C,0.17,5010.11,CR(PKM)-1,34.062734,-118.314427,501011,Y
657933,"MULTIPOLYGON (((6466512.82 1845334.48, 6466363.03 1845334.28, 6466365.73 1845384.29, 6466512.76 1845384.48, 6466512.82 1845334.48)))",5504018022,"615 S WILTON PL, D LOS ANGELES CA 90005",R,0.17,8649.22,RD3-1,34.062788,-118.314987,864922,Y


In [186]:
result["Property_Tax_Value"].sum() * 0.2478

np.float64(2001904733.5903983)

In [187]:
result["acreage"].sum()

np.float64(243725.83699999994)

In [236]:
result.to_file("final_parcels_merged.geojson", driver="GeoJSON")

In [237]:
result.to_file("final_parcels_merged.shp", driver="ESRI Shapefile")


  result.to_file("final_parcels_merged.shp", driver="ESRI Shapefile")
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
