In [None]:

import geopandas as gpd
import pandas as pd
from shapely.geometry import *
from pyproj import CRS
# 设置pandas显示的最大列数
pd.set_option('display.max_columns', None)
# 设置某一行的最大字符显示长度，防止某列字符过长时被截断
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.precision', 3)  # 设置小数点显示精度
pd.options.display.float_format = "{:.3f}".format


## Extract old houses in fhsz areas

In [22]:
## wui houses
house=gpd.read_parquet('../Data/Processed/buildings/buildingswui.parquet')\
.query("COUNTYFP.isin(['111','037','059','073','079','083','065','071','029'])")
## fhsz tags
excluded_classes = [
    "Recommended LRA Very High",
    "SRA Very High",
    "SRA High",
    "Reclassified LRA Very High",
    "SRA Moderate",
]
housefhsz=house.query( 'FHSZ_7Clas in @excluded_classes')

In [25]:
newhousefhsz=pd.read_csv('../Data/Processed/buildings/newhouse.csv')
print(newhousefhsz.columns)


Index(['Unnamed: 0', 'FID_Califo', 'geometry', 'FHSZ_7Clas', 'COUNTYFP',
       'newhouse'],
      dtype='object')


In [None]:
# 新增一列'newhouse',表明该building是old or new
housefhsz=housefhsz.merge(newhousefhsz[['FID_Califo','newhouse']],on='FID_Califo',how='left')
housefhsz['newhouse']=housefhsz['newhouse'].fillna(0)
oldhousefhsz= housefhsz[~housefhsz['FID_Califo'].isin(newhousefhsz['FID_Califo'])]
print(len(housefhsz),len(oldhousefhsz),len(newhousefhsz))

744381 705506 38875


# Create 10m defensible areas for years 2010 and 2022

In [None]:
def clusterandbufferhousefhsz(meter1,fp,year):
    if year==2022: 
        housef=house[house['COUNTYFP'] == fp]
        # housef=house
    if year==2010:
        # housef=oldhouse[oldhouse['COUNTYFP'] == fp]
        housef=oldhousefhsz
    expected_crs = CRS("EPSG:5070")
    if housef.crs==expected_crs:
        # Create 10mbuffer around houses and dissolve overlapping buffers into single geometries.
        dissolved10 = housef.geometry.buffer(meter1).unary_union
        # buffer----house's geometry
        buffer_minus_houses = dissolved10.difference(housef.geometry.unary_union)
        # Handle MultiPolygon and Polygon 
        if isinstance(buffer_minus_houses, Polygon):
            remaining_buffer_polygons = [buffer_minus_houses]
        elif isinstance(buffer_minus_houses, MultiPolygon):
            remaining_buffer_polygons = list(buffer_minus_houses.geoms)
        else:
            remaining_buffer_polygons = []
        # Create GeoDataFrame for remaining buffer after subtracting houses
        remaining_buffer_gdf = gpd.GeoDataFrame(geometry=remaining_buffer_polygons, crs="EPSG:5070")
        # Output: GeoDataFrame with buffer polygons after subtracting overlapping houses.
        # print(remaining_buffer_gdf.head(2))
        remaining_buffer_gdf = remaining_buffer_gdf.to_crs("EPSG:4326")
        remaining_buffer_gdf.reset_index(inplace=True)
        remaining_buffer_gdf.rename(columns={'index': 'cluster'}, inplace=True)
        remaining_buffer_gdf['COUNTYFP']=fp
        # remaining_buffer_gdf.to_file(f'../Data/Processed/Building/cluster/{year}/fhsz10m/{year}buffer10mdiffhouseallfhsz.gpkg')
        # print(f'finish{fp}')
        return remaining_buffer_gdf

In [None]:
clusterandbufferhousefhsz(10,'079',2022)
# clusterandbufferhousefhsz(10,'083',2022)
# clusterandbufferhousefhsz(10,'111',2022)
# clusterandbufferhousefhsz(10,'037',2022)
# clusterandbufferhousefhsz(10,'059',2022)
# clusterandbufferhousefhsz(10,'073',2022)
# clusterandbufferhousefhsz(10,'065',2022)
# clusterandbufferhousefhsz(10,'071',2022)
# clusterandbufferhousefhsz(10,'029',2022)




# Dividing housing types

In [None]:
def clusterhouse(meter1,fp,year):
    if year==2022:
        housef=housefhsz[housefhsz['COUNTYFP'] == fp]
    print(len(housef))
    expected_crs = CRS("EPSG:5070")
    if housef.crs==expected_crs:
        # Create 10mbuffer around houses and dissolve overlapping buffers into single geometries.
        buffer_minus_houses = housef.geometry.buffer(meter1).unary_union
        # Handle MultiPolygon and Polygon cases
        if isinstance(buffer_minus_houses, Polygon):
            remaining_buffer_polygons = [buffer_minus_houses]
        elif isinstance(buffer_minus_houses, MultiPolygon):
            remaining_buffer_polygons = list(buffer_minus_houses.geoms)
        else:
            remaining_buffer_polygons = []
 
        remaining_buffer_gdf = gpd.GeoDataFrame(geometry=remaining_buffer_polygons, crs="EPSG:5070")
        remaining_buffer_gdf = remaining_buffer_gdf.to_crs("EPSG:4326")
        remaining_buffer_gdf.reset_index(inplace=True)
        remaining_buffer_gdf.rename(columns={'index': 'cluster'}, inplace=True)
       
        remaining_buffer_gdf['COUNTYFP']=fp
        remaining_buffer_gdf[f'cluster{meter1}']= remaining_buffer_gdf['COUNTYFP']+  remaining_buffer_gdf['cluster'].astype(str)+'000'
        remaining_buffer_gdf.drop(columns='cluster',inplace=True)
        # remaining_buffer_gdf.to_file(f'../data/Building/cluster/housingtype/{year}buffer{meter1}mdiffhouse{fp}fhsz.gpkg')
        # print(f'finish{fp}')
        return remaining_buffer_gdf

In [28]:
def cluster15and50(housefhsz,meter1,meter2):

    
    s111=clusterhouse(meter1,'111',2022)
    s083=clusterhouse(meter1,'083',2022)
    s037=clusterhouse(meter1,'037',2022)
    s059=clusterhouse(meter1,'059',2022)

    s073=clusterhouse(meter1,'073',2022)
    s071=clusterhouse(meter1,'071',2022)
    s065=clusterhouse(meter1,'065',2022)
    s079=clusterhouse(meter1,'079',2022)
    s029=clusterhouse(meter1,'029',2022)
    sbuffermeter1=pd.concat([s079,s029,s083,s111,s037,s059,s073,s065,s071],axis=0)
    print('15m buffer cluster finished!')
    s111=clusterhouse(meter2,'111',2022)
    s083=clusterhouse(meter2,'083',2022)
    s037=clusterhouse(meter2,'037',2022)
    s059=clusterhouse(meter2,'059',2022)
    s073=clusterhouse(meter2,'073',2022)
    s071=clusterhouse(meter2,'071',2022)
    s065=clusterhouse(meter2,'065',2022)
    s079=clusterhouse(meter2,'079',2022)
    s029=clusterhouse(meter2,'029',2022)
    sbuffermeter2=pd.concat([s079,s029,s083,s111,s037,s059,s073,s065,s071],axis=0)
    if housefhsz.crs!=sbuffermeter1.crs:
        housefhsz=housefhsz.to_crs(sbuffermeter1.crs)
    # COUNTYFP_left[housefhsz],COUNTYFP_right[sbuffermeter1]
    try:
        sbuffer15joinhouse=gpd.sjoin(housefhsz,sbuffermeter1,how='left')
        sbuffer50joinhouse=gpd.sjoin(housefhsz,sbuffermeter2,how='left')
        return sbuffer15joinhouse,sbuffer50joinhouse
    except Exception as e:
        print(f"An error occurred: {e}")
        return sbuffermeter1,sbuffermeter2
 
housefhsz=housefhsz.to_crs('epsg:5070')
sbuffer15joinhouse,sbuffer50joinhouse=cluster15and50(housefhsz,15,50)

56602


  buffer_minus_houses = housef.geometry.buffer(meter1).unary_union


22360


  buffer_minus_houses = housef.geometry.buffer(meter1).unary_union


188363


  buffer_minus_houses = housef.geometry.buffer(meter1).unary_union


55123


  buffer_minus_houses = housef.geometry.buffer(meter1).unary_union


173550


  buffer_minus_houses = housef.geometry.buffer(meter1).unary_union


97708


  buffer_minus_houses = housef.geometry.buffer(meter1).unary_union


92871


  buffer_minus_houses = housef.geometry.buffer(meter1).unary_union


28876


  buffer_minus_houses = housef.geometry.buffer(meter1).unary_union


28928


  buffer_minus_houses = housef.geometry.buffer(meter1).unary_union


15m buffer cluster finished!
56602


  buffer_minus_houses = housef.geometry.buffer(meter1).unary_union


22360


  buffer_minus_houses = housef.geometry.buffer(meter1).unary_union


188363


  buffer_minus_houses = housef.geometry.buffer(meter1).unary_union


55123


  buffer_minus_houses = housef.geometry.buffer(meter1).unary_union


173550


  buffer_minus_houses = housef.geometry.buffer(meter1).unary_union


97708


  buffer_minus_houses = housef.geometry.buffer(meter1).unary_union


92871


  buffer_minus_houses = housef.geometry.buffer(meter1).unary_union


28876


  buffer_minus_houses = housef.geometry.buffer(meter1).unary_union


28928


  buffer_minus_houses = housef.geometry.buffer(meter1).unary_union


In [29]:
### 属于countryA边界的房子在聚类时可能落在countryA的buffer中，也可能落在邻近的countryB的房子的buffer中,由于这个房子的质心位于countryA,这个房子属于的buffer应该是countryA的buffer.
sbuffer15joinhouse1=sbuffer15joinhouse.query('COUNTYFP_left==COUNTYFP_right')
sbuffer50joinhouse1=sbuffer50joinhouse.query('COUNTYFP_left==COUNTYFP_right')
print(len(sbuffer15joinhouse.query('COUNTYFP_left!=COUNTYFP_right')))


29


In [30]:
def addhousecount(df1,df2):
    df=df1.merge(df2[['FID_Califo','cluster50']],on='FID_Califo',how='left')
    df=df.assign(allhousecountbybuffer50=df.groupby('cluster50')['FID_Califo'].transform(lambda x: x.nunique()),
                allhousecountbybuffer15=df.groupby('cluster15')['FID_Califo'].transform(lambda x: x.nunique()))  
                                                                                                     
    return df
housefhsz=addhousecount(sbuffer15joinhouse1,sbuffer50joinhouse1)

In [31]:
def addhousetypesbybuffer1550(df):
    
    df['housingclass'] = df['allhousecountbybuffer50'].apply(
        lambda x: 'clustered' if x >50 else 'scattered')
    df.loc[df['allhousecountbybuffer50'] <=3, 'housingclass'] = 'isolated'
    
    df.loc[(df['allhousecountbybuffer15'] <= 10) & (df['housingclass'].isin(['clustered'])), 'housingclass'] = 'dense'
    
    df.loc[(df['allhousecountbybuffer15'] > 10) & (df['housingclass'].isin(['clustered'])), 'housingclass'] = 'very dense'
  
    print(df['housingclass'].value_counts())
    return df
housefhsz=addhousetypesbybuffer1550(housefhsz)

housingclass
very dense    481619
dense         156448
scattered      78854
isolated       27460
Name: count, dtype: int64


In [35]:
housefhsz=housefhsz.rename(columns={'COUNTYFP_left':'COUNTYFP'}).drop(columns=['index_right','COUNTYFP_right'])

In [36]:
housefhsz.columns

Index(['FID_Califo', 'WUICLASS_2', 'WUIFLAG202', 'GEOID', 'FHSZ_7Clas',
       'COUNTYFP', 'geometry', 'newhouse', 'cluster', 'cluster15', 'cluster50',
       'allhousecountbybuffer50', 'allhousecountbybuffer15', 'housingclass'],
      dtype='object')

In [37]:
housefhsz.to_parquet('../Data/Processed/buildings/housefhsz_addhousetypes.parquet')