In [2]:
import pandas as pd

In [4]:
df = pd.read_csv('../local-data/property_size_merged.csv')
df.columns

Index(['ZIP_CODE', 'COUNTY', 'Y', 'STORE_TYPE', 'CITY', 'Ï»¿X', 'LATITUDE',
       'ADDITONAL_ADDRESS', 'INCENTIVE_PROGRAM', 'GRANTEE_NAME', 'STORE_NAME',
       'STATE', 'OBJECTID', 'STORE_STREET_ADDRESS', 'RECORD_ID', 'ZIP4',
       'LONGITUDE', 'FOOD_COMPOSITE_KEY', 'TAX_OBJ_ID', 'ACCOUNT_NUM',
       'APPRAISAL_YR_x', 'BLDG_CLASS_DESC', 'YEAR_BUILT', 'REMODEL_YR',
       'GROSS_BLDG_AREA', 'FOUNDATION_TYP_DESC', 'FOUNDATION_AREA',
       'BASEMENT_DESC', 'BASEMENT_AREA', 'NUM_STORIES', 'CONSTR_TYP_DESC',
       'HEATING_TYP_DESC', 'AC_TYP_DESC', 'NUM_UNITS', 'NET_LEASE_AREA',
       'PROPERTY_NAME', 'PROPERTY_QUAL_DESC', 'PROPERTY_COND_DESC',
       'PHYS_DEPR_PCT', 'FUNCT_DEPR_PCT', 'EXTRNL_DEPR_PCT', 'TOT_DEPR_PCT',
       'IMP_VAL', 'LAND_VAL', 'MKT_VAL', 'APPR_METHOD_DESC',
       'COMPARABILITY_CD', 'PCT_COMPLETE', 'APPRAISAL_YR_y', 'DIVISION_CD',
       'BIZ_NAME', 'OWNER_NAME1', 'OWNER_NAME2', 'EXCLUDE_OWNER',
       'OWNER_ADDRESS_LINE1', 'OWNER_ADDRESS_LINE2', 'OWNER_ADDRE

In [8]:
display(df.GROSS_BLDG_AREA)
display(df.GROSS_BLDG_AREA.value_counts())

0          NaN
1       4800.0
2       1465.0
3          NaN
4       1296.0
         ...  
3652       NaN
3653       NaN
3654       NaN
3655       NaN
3656       NaN
Name: GROSS_BLDG_AREA, Length: 3657, dtype: float64

GROSS_BLDG_AREA
0.0        90
2400.0     15
3080.0      9
2000.0      9
2585.0      9
           ..
9762.0      1
3906.0      1
626.0       1
14991.0     1
23711.0     1
Name: count, Length: 1245, dtype: int64

# Map: building size groups

This cell creates a Folium map that groups points by GROSS_BLDG_AREA (quartiles). The map is saved to `graphs/location_size_map.html` and displayed inline.

In [10]:
import pandas as pd
import folium
from branca.colormap import StepColormap

# Read data
df = pd.read_csv('../local-data/property_size_merged.csv')

# Try to detect latitude/longitude columns (common names)
lat_candidates = ['LATITUDE','Latitude','latitude','LAT','Y']
lon_candidates = ['LONGITUDE','Longitude','longitude','LON','X']
lat_col = next((c for c in lat_candidates if c in df.columns), None)
lon_col = next((c for c in lon_candidates if c in df.columns), None)

# If lat/lon not found, try lowercase versions
if lat_col is None or lon_col is None:
    lat_col = next((c for c in df.columns if c.lower()=='latitude' or c.lower()=='lat' or c.lower()=='y'), lat_col)
    lon_col = next((c for c in df.columns if c.lower()=='longitude' or c.lower()=='lon' or c.lower()=='x'), lon_col)

# Ensure GROSS_BLDG_AREA numeric
if 'GROSS_BLDG_AREA' not in df.columns:
    raise ValueError('GROSS_BLDG_AREA column not found in dataframe')

df['GROSS_BLDG_AREA'] = pd.to_numeric(df['GROSS_BLDG_AREA'], errors='coerce')

# Filter to rows with valid coords and area
if lat_col is not None and lon_col is not None:
    df_map = df.dropna(subset=[lat_col, lon_col, 'GROSS_BLDG_AREA']).copy()
    df_map[lat_col] = pd.to_numeric(df_map[lat_col], errors='coerce')
    df_map[lon_col] = pd.to_numeric(df_map[lon_col], errors='coerce')
    df_map = df_map.dropna(subset=[lat_col, lon_col])
else:
    # fallback: no coords present — create empty df_map and center map on Dallas
    df_map = pd.DataFrame(columns=df.columns)

# Create quartile bins for building area (handles duplicates via 'duplicates' option)
try:
    df_map['area_bin'] = pd.qcut(df_map['GROSS_BLDG_AREA'], q=4, labels=['Small','Medium','Large','Very large'], duplicates='drop')
except ValueError:
    # If qcut fails (too many identical values), fall back to simple cut
    df_map['area_bin'] = pd.cut(df_map['GROSS_BLDG_AREA'], bins=4, labels=['Small','Medium','Large','Very large'])

# Color map for categories
color_map = {
    'Small': 'green',
    'Medium': 'blue',
    'Large': 'orange',
    'Very large': 'red'
}

# Choose map center
if not df_map.empty:
    center = [df_map[lat_col].mean(), df_map[lon_col].mean()]
else:
    center = [32.7767, -96.7970]  # Dallas fallback

m = folium.Map(location=center, zoom_start=10, tiles='CartoDB.Positron')

# Add a legend as a StepColormap proxy
colormap = StepColormap(colors=['green','blue','orange','red'], index=[0,1,2,3], caption='GROSS_BLDG_AREA quartile (Small -> Very large)')
colormap.add_to(m)

# Add points
for _, row in df_map.iterrows():
    try:
        lat = float(row[lat_col])
        lon = float(row[lon_col])
    except Exception:
        continue
    cat = row.get('area_bin', 'Unknown')
    color = color_map.get(str(cat), 'gray')
    # popup content: include some identifying fields if present
    popup_items = []
    for fld in ['STORE_NAME','RECORD_ID','STORE_TYPE','GROSS_BLDG_AREA']:
        if fld in df_map.columns:
            popup_items.append(f"{fld}: {row.get(fld)}")
    popup = folium.Popup('<br>'.join(map(str,popup_items)), max_width=300)
    # Scale radius modestly for visibility
    try:
        radius = max(3, min(15, (float(row['GROSS_BLDG_AREA']) ** 0.5) / 5))
    except Exception:
        radius = 6
    folium.CircleMarker(location=[lat, lon], radius=radius, color=color, fill=True, fill_opacity=0.7, popup=popup).add_to(m)

# Save and display
out_path = '../graphs/location_size_map.html'
try:
    m.save(out_path)
    print(f"Map saved to {out_path}")
except Exception as e:
    print('Could not save map:', e)

# Display inline (works in Jupyter)
m


Map saved to ../graphs/location_size_map.html


In [11]:
import pandas as pd
import folium
from branca.element import Template, MacroElement

# Read data
df = pd.read_csv('../local-data/property_size_merged.csv')

# Try to detect latitude/longitude columns (common names)
lat_candidates = ['LATITUDE','Latitude','latitude','LAT','Y']
lon_candidates = ['LONGITUDE','Longitude','longitude','LON','X']
lat_col = next((c for c in lat_candidates if c in df.columns), None)
lon_col = next((c for c in lon_candidates if c in df.columns), None)

# If lat/lon not found, try lowercase versions
if lat_col is None or lon_col is None:
    lat_col = next((c for c in df.columns if c.lower()=='latitude' or c.lower()=='lat' or c.lower()=='y'), lat_col)
    lon_col = next((c for c in df.columns if c.lower()=='longitude' or c.lower()=='lon' or c.lower()=='x'), lon_col)

# Ensure GROSS_BLDG_AREA numeric
if 'GROSS_BLDG_AREA' not in df.columns:
    raise ValueError('GROSS_BLDG_AREA column not found in dataframe')

df['GROSS_BLDG_AREA'] = pd.to_numeric(df['GROSS_BLDG_AREA'], errors='coerce')

# Filter to rows with valid coords and area
if lat_col is not None and lon_col is not None:
    df_map = df.dropna(subset=[lat_col, lon_col, 'GROSS_BLDG_AREA']).copy()
    df_map[lat_col] = pd.to_numeric(df_map[lat_col], errors='coerce')
    df_map[lon_col] = pd.to_numeric(df_map[lon_col], errors='coerce')
    df_map = df_map.dropna(subset=[lat_col, lon_col])
else:
    # fallback: no coords present — create empty df_map and center map on Dallas
    df_map = pd.DataFrame(columns=df.columns)

# Create quartile bins for building area (handles duplicates via 'duplicates' option)
try:
    df_map['area_bin'] = pd.qcut(df_map['GROSS_BLDG_AREA'], q=4, labels=['Small','Medium','Large','Very large'], duplicates='drop')
except ValueError:
    # If qcut fails (too many identical values), fall back to simple cut
    df_map['area_bin'] = pd.cut(df_map['GROSS_BLDG_AREA'], bins=4, labels=['Small','Medium','Large','Very large'])

# Color map for categories
color_map = {
    'Small': 'green',
    'Medium': 'blue',
    'Large': 'orange',
    'Very large': 'red'
}

# Choose map center
if not df_map.empty:
    center = [df_map[lat_col].mean(), df_map[lon_col].mean()]
else:
    center = [32.7767, -96.7970]  # Dallas fallback

m = folium.Map(location=center, zoom_start=10, tiles='CartoDB.Positron')

# Add points
for _, row in df_map.iterrows():
    try:
        lat = float(row[lat_col])
        lon = float(row[lon_col])
    except Exception:
        continue
    cat = row.get('area_bin', 'Unknown')
    color = color_map.get(str(cat), 'gray')
    # popup content: include some identifying fields if present
    popup_items = []
    for fld in ['STORE_NAME','RECORD_ID','STORE_TYPE','GROSS_BLDG_AREA']:
        if fld in df_map.columns:
            popup_items.append(f"{fld}: {row.get(fld)}")
    popup = folium.Popup('<br>'.join(map(str,popup_items)), max_width=300)
    # Scale radius modestly for visibility
    try:
        radius = max(3, min(15, (float(row['GROSS_BLDG_AREA']) ** 0.5) / 5))
    except Exception:
        radius = 6
    folium.CircleMarker(location=[lat, lon], radius=radius, color=color, fill=True, fill_opacity=0.7, popup=popup).add_to(m)

# Create custom HTML legend
legend_html = '''
<div style="position: fixed; 
            bottom: 50px; right: 50px; width: 180px; height: 140px; 
            background-color: white; z-index:9999; font-size:14px;
            border:2px solid grey; border-radius: 5px; padding: 10px">
<p style="margin: 0 0 5px 0; font-weight: bold;">Building Size (Quartiles)</p>
<p style="margin: 3px 0;"><i class="fa fa-circle" style="color:green"></i> Small</p>
<p style="margin: 3px 0;"><i class="fa fa-circle" style="color:blue"></i> Medium</p>
<p style="margin: 3px 0;"><i class="fa fa-circle" style="color:orange"></i> Large</p>
<p style="margin: 3px 0;"><i class="fa fa-circle" style="color:red"></i> Very Large</p>
</div>
'''

m.get_root().html.add_child(folium.Element(legend_html))

# Save and display
out_path = '../graphs/location_size_map.html'
try:
    m.save(out_path)
    print(f"Map saved to {out_path}")
except Exception as e:
    print('Could not save map:', e)

# Display inline (works in Jupyter)
m

Map saved to ../graphs/location_size_map.html
