In [None]:
import os
import shapely
import numpy as np
import sys 
import pandas as pd 
import gc
from shapely import wkt
from matplotlib import pyplot as plt
from shapely.geometry import Point,mapping,Polygon,box,MultiPoint
import geopandas as gpd 
import seaborn as sns 
 
# for parallelization
sys.path.append('/mnt/c/Users/tandon/OneDrive - TomTom/Desktop/tomtom/Workspace/01_Rooftop_accuracy/APT-Realignment/src/apt_realignment')
from process_geometries import ProcessGeometricData  as ProcessAPT_data
from utils.haversine_distance import get_distance
from utils.geometric_utils import geojson2shpfile


def save_geometry(dataframe,geometry,out_path,fname='dataframe.geojson'):
    df_to_save = gpd.GeoDataFrame(geometry=dataframe[geometry],crs="EPSG:4326")
    df_to_save = df_to_save.to_crs("epsg:4326")
    df_to_save.to_file(os.path.join(out_path,fname),driver="GeoJSON")

## Download Parcel Data 

In [8]:
import pandas as pd 
from tqdm import tqdm 
import urllib

FIPS_CSV_PATH = "/mnt/c/Users/tandon/OneDrive - TomTom/Desktop/tomtom/Workspace/01_Rooftop_accuracy/APT-Realignment/us-state-ansi-fips.csv"
ALL_FIPS = "/mnt/c/Users/tandon/OneDrive - TomTom/Desktop/tomtom/Workspace/01_Rooftop_accuracy/APT-Realignment/All_counties_code.csv"

data_path = "mnt/c/Users/tandon/OneDrive - TomTom/Desktop/tomtom/Workspace/01_Rooftop_accuracy/APT-Realignment/data/"

fips_df = pd.read_csv(FIPS_CSV_PATH)
fips_df.head()

Unnamed: 0,stname,st,stusps
0,Alabama,1,AL
1,Alaska,2,AK
2,Arizona,4,AZ
3,Arkansas,5,AR
4,California,6,CA


In [9]:
FIPSCode_df = pd.read_csv(ALL_FIPS)
FIPSCode_df.head()

Unnamed: 0,fips,name,state
0,0,UNITED STATES,
1,1000,ALABAMA,
2,1001,Autauga County,AL
3,1003,Baldwin County,AL
4,1005,Barbour County,AL


In [10]:
County = 'Washington'

county_info = fips_df.loc[fips_df['stname']==County]

data = county_info.values.tolist()[0]

print("County FIPS Code : ",data[1])
print("County Name Code : ",data[2].strip())

out_path = os.path.join(data_path,County)

if not os.path.isdir(out_path):
    os.mkdir(out_path)

print(out_path)

County FIPS Code :  53
County Name Code :  WA
mnt/c/Users/tandon/OneDrive - TomTom/Desktop/tomtom/Workspace/01_Rooftop_accuracy/APT-Realignment/data/Washington


## Get All Counties of Texas 

In [11]:
fipss = list(map(lambda x: str(x),list(FIPSCode_df['fips'].loc[FIPSCode_df['state']==data[2].strip()])))
fipss = list(map(lambda x: "0{}".format(x) if len(x)==4 else x ,fipss))

print('FIPS found:',len(fipss))
fipss

FIPS found: 39


['53001',
 '53003',
 '53005',
 '53007',
 '53009',
 '53011',
 '53013',
 '53015',
 '53017',
 '53019',
 '53021',
 '53023',
 '53025',
 '53027',
 '53029',
 '53031',
 '53033',
 '53035',
 '53037',
 '53039',
 '53041',
 '53043',
 '53045',
 '53047',
 '53049',
 '53051',
 '53053',
 '53055',
 '53057',
 '53059',
 '53061',
 '53063',
 '53065',
 '53067',
 '53069',
 '53071',
 '53073',
 '53075',
 '53077']

In [12]:
errors = set()
for fips in tqdm(fipss,total=len(fipss)):    
    if not os.path.isfile(os.path.join(out_path,f"Parcels_{fips}.zip")):
        try:
            urllib.request.urlretrieve(f"https://sts-rescat-prod.s3-eu-west-1.amazonaws.com/Rescat/Global/Active/USA_Nation/USA000003213/AUTHORIZED%20USERS%20ONLY/July%202018/Parcels_{fips}.zip", f"/mnt/c/Users/tandon/OneDrive - TomTom/Desktop/tomtom/Workspace/01_Rooftop_accuracy/APT-Realignment/data/{County}/parcels/Parcels_{fips}.zip")
        except:
            try:
                urllib.request.urlretrieve(f"https://sts-rescat-prod.s3-eu-west-1.amazonaws.com/Rescat/Global/Active/USA_Nation/USA000003213/AUTHORIZED%20USERS%20ONLY/April%202018/Parcels_{fips}.zip", f"/mnt/c/Users/tandon/OneDrive - TomTom/Desktop/tomtom/Workspace/01_Rooftop_accuracy/APT-Realignment/data/{County}/parcels/Parcels_{fips}.zip")
            except:
                try:

                    urllib.request.urlretrieve(f"https://sts-rescat-prod.s3-eu-west-1.amazonaws.com/Rescat/Global/Active/USA_Nation/USA000003213/AUTHORIZED%20USERS%20ONLY/Jan%202018/Parcels_{fips}.zip", f"/mnt/c/Users/tandon/OneDrive - TomTom/Desktop/tomtom/Workspace/01_Rooftop_accuracy/APT-Realignment/data/{County}/parcels/Parcels_{fips}.zip")
                except:
                    try:
                        urllib.request.urlretrieve(f"https://sts-rescat-prod.s3-eu-west-1.amazonaws.com/Rescat/Global/Active/USA_Nation/USA000003213/AUTHORIZED%20USERS%20ONLY/Oct%202017/Parcels_{fips}.zip", f"/mnt/c/Users/tandon/OneDrive - TomTom/Desktop/tomtom/Workspace/01_Rooftop_accuracy/APT-Realignment/data/{County}/parcels/Parcels_{fips}.zip")
                    except:
                        try:
                            urllib.request.urlretrieve(f"https://sts-rescat-prod.s3-eu-west-1.amazonaws.com/Rescat/Global/Active/USA_Nation/USA000003213/AUTHORIZED%20USERS%20ONLY/July%202017/Parcels_{fips}.zip", f"/mnt/c/Users/tandon/OneDrive - TomTom/Desktop/tomtom/Workspace/01_Rooftop_accuracy/APT-Realignment/data/{County}/parcels/Parcels_{fips}.zip")
                        except:
                            try:
                                urllib.request.urlretrieve(f"https://sts-rescat-prod.s3-eu-west-1.amazonaws.com/Rescat/Global/Active/USA_Nation/USA000003213/AUTHORIZED%20USERS%20ONLY/April%202017/Parcels_{fips}.zip", f"/mnt/c/Users/tandon/OneDrive - TomTom/Desktop/tomtom/Workspace/01_Rooftop_accuracy/APT-Realignment/data/{County}/parcels/Parcels_{fips}.zip")
                            except:
                                try:
                                    urllib.request.urlretrieve(f"https://sts-rescat-prod.s3-eu-west-1.amazonaws.com/Rescat/Global/Active/USA_Nation/USA000003213/AUTHORIZED%20USERS%20ONLY/Jan%202017/Parcels_{fips}.zip", f"/mnt/c/Users/tandon/OneDrive - TomTom/Desktop/tomtom/Workspace/01_Rooftop_accuracy/APT-Realignment/data/{County}/parcels/Parcels_{fips}.zip")
                                except:
                                    print("No information for :",fips)
                                    errors.add(fips)
                                    continue

  0%|          | 0/39 [00:00<?, ?it/s]

In [None]:

errors = set()

for fips in tqdm(fipss):
            try:
                urllib.request.urlretrieve(f"https://sts-rescat-prod.s3-eu-west-1.amazonaws.com/Rescat/Global/Active/USA_Nation/USA000003213/AUTHORIZED%20USERS%20ONLY/July%202018/Parcels_{fips}.zip", f"/mnt/c/Users/tandon/OneDrive - TomTom/Desktop/tomtom/Workspace/01_Rooftop_accuracy/APT-Realignment/data/{County}/Parcels_{fips}.zip")
            except KeyboardInterrupt as e:
                raise e 
            except:
                try:
                    urllib.request.urlretrieve(f"https://sts-rescat-prod.s3-eu-west-1.amazonaws.com/Rescat/Global/Active/USA_Nation/USA000003213/AUTHORIZED%20USERS%20ONLY/April%202018/Parcels_{fips}.zip", f"/mnt/c/Users/tandon/OneDrive - TomTom/Desktop/tomtom/Workspace/01_Rooftop_accuracy/APT-Realignment/data/{County}/Parcels_{fips}.zip")
                except KeyboardInterrupt as e:
                    raise e 
                except:
                    try:
                        urllib.request.urlretrieve(f"https://sts-rescat-prod.s3-eu-west-1.amazonaws.com/Rescat/Global/Active/USA_Nation/USA000003213/AUTHORIZED%20USERS%20ONLY/Jan%202018/Parcels_{fips}.zip", f"/mnt/c/Users/tandon/OneDrive - TomTom/Desktop/tomtom/Workspace/01_Rooftop_accuracy/APT-Realignment/data/{County}/Parcels_{fips}.zip")
                    except KeyboardInterrupt as e:
                        raise e 
                    except:
                        try:
                            urllib.request.urlretrieve(f"https://sts-rescat-prod.s3-eu-west-1.amazonaws.com/Rescat/Global/Active/USA_Nation/USA000003213/AUTHORIZED%20USERS%20ONLY/Oct%202017/Parcels_{fips}.zip", f"/mnt/c/Users/tandon/OneDrive - TomTom/Desktop/tomtom/Workspace/01_Rooftop_accuracy/APT-Realignment/data/{County}/Parcels_{fips}.zip")
                        except KeyboardInterrupt as e:
                            raise e 
                        except:
                            try: 
                                urllib.request.urlretrieve(f"https://sts-rescat-prod.s3-eu-west-1.amazonaws.com/Rescat/Global/Active/USA_Nation/USA000003213/AUTHORIZED%20USERS%20ONLY/July%202017/Parcels_{fips}.zip", f"/mnt/c/Users/tandon/OneDrive - TomTom/Desktop/tomtom/Workspace/01_Rooftop_accuracy/APT-Realignment/data/{County}/Parcels_{fips}.zip")
                            except KeyboardInterrupt as e:
                                raise e 
                            except:
                                try:
                                    urllib.request.urlretrieve(f"https://sts-rescat-prod.s3-eu-west-1.amazonaws.com/Rescat/Global/Active/USA_Nation/USA000003213/AUTHORIZED%20USERS%20ONLY/April%202017/Parcels_{fips}.zip", f"/mnt/c/Users/tandon/OneDrive - TomTom/Desktop/tomtom/Workspace/01_Rooftop_accuracy/APT-Realignment/data/{County}/Parcels_{fips}.zip")
                                except KeyboardInterrupt as e:
                                    raise e 
                                except:
                                    try:
                                        urllib.request.urlretrieve(f"https://sts-rescat-prod.s3-eu-west-1.amazonaws.com/Rescat/Global/Active/USA_Nation/USA000003213/AUTHORIZED%20USERS%20ONLY/Jan%202017/Parcels_{fips}.zip", f"/mnt/c/Users/tandon/OneDrive - TomTom/Desktop/tomtom/Workspace/01_Rooftop_accuracy/APT-Realignment/data/{County}/Parcels_{fips}.zip")
                                    except KeyboardInterrupt as e:
                                        raise e 
                                    except:
                                        try:
                                            urllib.request.urlretrieve(f"https://sts-rescat-prod.s3-eu-west-1.amazonaws.com/Rescat/Global/Active/USA_Nation/USA000003213/AUTHORIZED%20USERS%20ONLY/Oct%202016/Parcels_{fips}.zip", f"/mnt/c/Users/tandon/OneDrive - TomTom/Desktop/tomtom/Workspace/01_Rooftop_accuracy/APT-Realignment/data/{County}/Parcels_{fips}.zip")
                                        except KeyboardInterrupt as e:
                                            raise e 
                                        except:
                                            print("No data for : ",fips)
                                            errors.add(fips)
                                            continue
            try:
                print("downloded : ",fips)
                # gdf = gpd.read_file(f'parcels_for_postgress/Parcels_{fips}.shp').to_crs(3857)
                # gdf['state'] = state
                # gdf.to_crs(3857, inplace=True)
                # gdf.to_postgis(name="land_parcels", schema='dev_ppa', con=con, index=False, if_exists='append')
            except KeyboardInterrupt as e:
                raise e 
            except:
                errors.add(fips)
                continue

### Merge data

In [None]:
from glob import glob 
pkl_path = osp.join(data_path,state,city,'Apt_realignment')
pkl_files = glob(pkl_path+'/*/*.pkl')
print(pkl_files)

In [None]:

## Show single bfp within Parcel data
df_one_bfp = pd.read_pickle(pkl_files[0])
df_one_bfp = df_one_bfp.reset_index(drop=True)
print("simple scenario sahpe",df_one_bfp.shape[0])

df_two_bfp = pd.read_pickle(pkl_files[1])
df_two_bfp = df_two_bfp.reset_index(drop=True)
print(" complx scenario shape:",df_two_bfp.shape[0])

merge_df= pd.concat([df_one_bfp,df_two_bfp])
print("Merged Shape",merge_df.shape)
merge_df.to_pickle(os.path.join(pkl_path,'FinalUpdated_APT_{}_{}.pkl'.format(state,city)))

## IIlunois Cook

In [None]:
import os
import shapely
import numpy as np
import sys 
import pycoredb 
import pandas as pd 
import gc
from shapely import wkt
from matplotlib import pyplot as plt
from shapely.geometry import Point,mapping,Polygon,box,MultiPoint
import geopandas as gpd 
import seaborn as sns 
 
# for parallelization
sys.path.append('/mnt/c/Users/tandon/OneDrive - TomTom/Desktop/tomtom/Workspace/01_Rooftop_accuracy/APT-Realignment/src/apt_realignment')
from process_geometries import ProcessGeometricData  as ProcessAPT_data
from utils.haversine_distance import get_distance
from utils.geometric_utils import geojson2shpfile

data_path  = "/mnt/c/Users/tandon/OneDrive - TomTom/Desktop/tomtom/Workspace/01_Rooftop_accuracy/BFP_Analysis_USA/data/data"
state = "Illinois"
city = "cook"
apt_data_path = os.path.join(data_path,state,"APT__2022_09_001_nam_usa_uil.shp" )
parcel_path = os.path.join(data_path,state,city,"Parcels_17031/Parcels_17031.shp")
building_geojson = os.path.join(data_path,state,'Illinois.geojson')

In [None]:
# geojson2shpfile(building_geojson,verbose=0)

### Single BFP within a Parcel 

In [None]:
output_path = os.path.join(data_path,state,city,'Apt_realignment') 
apt_preprocess = ProcessAPT_data(parcel_shapefile=parcel_path,
                building_shapefile=building_geojson,
                apt_shape_file=apt_data_path , 
                output_path=output_path)

processed_df_cook = apt_preprocess.process_dataframe(bfp_count_per_parcel=1,filename='APT_realigned_usa_il_cook')


In [None]:
print(processed_df_cook.shape[0])
processed_df_cook.head(4)

In [None]:
distance_greater_than_50m = processed_df_cook.loc[processed_df_cook['APT_to_Centroid_distance']>50].shape[0]
print("Data points greater than 50m: {:.2f}%".format(100*(distance_greater_than_50m/processed_df_cook.shape[0])))

distance_greater_than_50m = processed_df_cook.loc[processed_df_cook['APT_to_Centroid_distance']<50].shape[0]
print("Data points less than 50m: {:.2f}%".format(100*(distance_greater_than_50m/processed_df_cook.shape[0])))


In [None]:
fig, ax = plt.subplots(figsize=(17,9))

ax.set_title("Histogram of APT to centroid distance on/not on BFP")
ax.set_xlabel("APT point to centroid distance(meters)")
ax.set_ylabel("counts")

frqTrue, edgesTrue = np.histogram(processed_df_cook['APT_to_Centroid_distance'].loc[processed_df_cook['APT_to_Centroid_distance']<150].values, bins = np.arange(0,150,10))
p1 = ax.bar(edgesTrue[:-1], frqTrue, width=np.diff(edgesTrue), edgecolor="black", align="edge",alpha=0.4,label='Address Points on Rooftop',color='orange')

plt.legend()
plt.show()
gc.collect()

#### Two BFP's within a Parcel 

In [None]:
## Process complexity 2-- 2 bfp 

output_path = os.path.join(data_path,state,city,'Apt_realignment') 
apt_preprocess = ProcessAPT_data(parcel_shapefile=parcel_path,
                building_shapefile=building_geojson,
                apt_shape_file=apt_data_path , 
                output_path=output_path)

processed_df_cook = apt_preprocess.process_dataframe(bfp_count_per_parcel=2,filename='APT_realigned_usa_uil_cook')

In [None]:
print(processed_df_cook.shape[0])
processed_df_cook.head(4)

In [None]:
distance_greater_than_50m = processed_df_cook.loc[processed_df_cook['APT_to_Centroid_distance']>50].shape[0]
print("Data points greater than 50m: {:.2f}%".format(100*(distance_greater_than_50m/processed_df_cook.shape[0])))

distance_greater_than_50m = processed_df_cook.loc[processed_df_cook['APT_to_Centroid_distance']<50].shape[0]
print("Data points less than 50m: {:.2f}%".format(100*(distance_greater_than_50m/processed_df_cook.shape[0])))


In [None]:
fig, ax = plt.subplots(figsize=(17,9))

ax.set_title("Histogram of APT to centroid distance on/not on BFP")
ax.set_xlabel("APT point to centroid distance(meters)")
ax.set_ylabel("counts")

frqTrue, edgesTrue = np.histogram(processed_df_cook['APT_to_Centroid_distance'].loc[processed_df_cook['APT_to_Centroid_distance']<150].values, bins = np.arange(0,150,10))
p1 = ax.bar(edgesTrue[:-1], frqTrue, width=np.diff(edgesTrue), edgecolor="black", align="edge",alpha=0.4,label='Address Points on Rooftop',color='orange')

plt.legend()
plt.show()
gc.collect()

### Merge Data

In [None]:
from glob import glob 
pkl_path = osp.join(data_path,state,city,'Apt_realignment')
pkl_files = glob(pkl_path+'/*/*.pkl')
print(pkl_files)


In [None]:
## Show single bfp within Parcel data
df_one_bfp = pd.read_pickle(pkl_files[0])
df_one_bfp = df_one_bfp.reset_index(drop=True)
print("simple scenario sahpe",df_one_bfp.shape[0])

df_two_bfp = pd.read_pickle(pkl_files[1])
df_two_bfp = df_two_bfp.reset_index(drop=True)
print(" complx scenario shape:",df_two_bfp.shape[0])

merge_df= pd.concat([df_one_bfp,df_two_bfp])
print("Merged Shape",merge_df.shape)
merge_df.to_pickle(os.path.join(pkl_path,'FinalUpdated_APT_{}_{}.pkl'.format(state,city)))


## Michigan Wayne 

In [None]:
import os
import shapely
import numpy as np
import sys 
import pycoredb 
import pandas as pd 
import gc
from shapely import wkt
from matplotlib import pyplot as plt
from shapely.geometry import Point,mapping,Polygon,box,MultiPoint
import geopandas as gpd 
import seaborn as sns 
 
# for parallelization
sys.path.append('/mnt/c/Users/tandon/OneDrive - TomTom/Desktop/tomtom/Workspace/01_Rooftop_accuracy/APT-Realignment/src/apt_realignment')
from process_geometries import ProcessGeometricData  as ProcessAPT_data
from utils.haversine_distance import get_distance

data_path  = "/mnt/c/Users/tandon/OneDrive - TomTom/Desktop/tomtom/Workspace/01_Rooftop_accuracy/BFP_Analysis_USA/data/data"
state = "MI"
city = "wayne"
apt_data_path = os.path.join(data_path,state,"APT__2022_09_001_nam_usa_umx.shp" )
parcel_path = os.path.join(data_path,state,city,"Parcels_26163/Parcels_26163.shp")
building_geojson = os.path.join(data_path,state,'Michigan.geojson')

In [None]:
# geojson2shpfile(building_geojson,verbose=0)

### Single BFP within a Parcel

In [None]:
output_path = os.path.join(data_path,state,city,'Apt_realignment') 
apt_preprocess = ProcessAPT_data(parcel_shapefile=parcel_path,
                building_shapefile=building_geojson,
                apt_shape_file=apt_data_path , 
                output_path=output_path)

processed_df_wayne = apt_preprocess.process_dataframe(bfp_count_per_parcel=1,filename='APT_realigned_usa_umx_wayne')

In [None]:
print("Data Shape:",processed_df_wayne.shape)
processed_df_wayne.head()

In [None]:
distance_greater_than_50m = processed_df_wayne.loc[processed_df_wayne['APT_to_Centroid_distance']>50].shape[0]
print("Data points greater than 50m: {:.2f}%".format(100*(distance_greater_than_50m/processed_df_wayne.shape[0])))

distance_greater_than_50m = processed_df_wayne.loc[processed_df_wayne['APT_to_Centroid_distance']<50].shape[0]
print("Data points less than 50m: {:.2f}%".format(100*(distance_greater_than_50m/processed_df_wayne.shape[0])))

In [None]:
fig, ax = plt.subplots(figsize=(17,9))

ax.set_title("Histogram of APT to centroid distance on/not on BFP")
ax.set_xlabel("APT point to centroid distance(meters)")
ax.set_ylabel("counts")

frqTrue, edgesTrue = np.histogram(processed_df_wayne['APT_to_Centroid_distance'].loc[processed_df_wayne['APT_to_Centroid_distance']<150].values, bins = np.arange(0,150,10))
p1 = ax.bar(edgesTrue[:-1], frqTrue, width=np.diff(edgesTrue), edgecolor="black", align="edge",alpha=0.4,label='Address Points on Rooftop',color='orange')

plt.legend()
plt.show()
gc.collect()

#### Two BFP's within a Parcel 

In [None]:
output_path = os.path.join(data_path,state,city,'Apt_realignment') 
apt_preprocess = ProcessAPT_data(parcel_shapefile=parcel_path,
                building_shapefile=building_geojson,
                apt_shape_file=apt_data_path , 
                output_path=output_path)

processed_df_wayne = apt_preprocess.process_dataframe(bfp_count_per_parcel=2,filename='APT_realigned_usa_umx_wayne')


In [None]:
print(processed_df_wayne.shape[0])
processed_df_wayne.head(10)

In [None]:
distance_greater_than_50m = processed_df_wayne.loc[processed_df_wayne['APT_to_Centroid_distance']>50].shape[0]
print("Data points greater than 50m: {:.2f}%".format(100*(distance_greater_than_50m/processed_df_wayne.shape[0])))

distance_greater_than_50m = processed_df_wayne.loc[processed_df_wayne['APT_to_Centroid_distance']<50].shape[0]
print("Data points less than 50m: {:.2f}%".format(100*(distance_greater_than_50m/processed_df_wayne.shape[0])))


In [None]:
fig, ax = plt.subplots(figsize=(17,9))

ax.set_title("Histogram of APT to centroid distance on/not on BFP")
ax.set_xlabel("APT point to centroid distance(meters)")
ax.set_ylabel("counts")

frqTrue, edgesTrue = np.histogram(processed_df_wayne['APT_to_Centroid_distance'].loc[processed_df_wayne['APT_to_Centroid_distance']<150].values, bins = np.arange(0,150,10))
p1 = ax.bar(edgesTrue[:-1], frqTrue, width=np.diff(edgesTrue), edgecolor="black", align="edge",alpha=0.4,label='Address Points on Rooftop',color='orange')

plt.legend()
plt.show()
gc.collect()

### Merge both data 

In [None]:
from glob import glob 
import os
import os.path as osp 
pkl_path = osp.join(data_path,state,city,'Apt_realignment')
pkl_files = glob(pkl_path+'/*/*.pkl')
print(pkl_files)

In [None]:

## Show single bfp within Parcel data
df_one_bfp = pd.read_pickle(pkl_files[0])
df_one_bfp = df_one_bfp.reset_index(drop=True)
print("simple scenario sahpe",df_one_bfp.shape[0])

df_two_bfp = pd.read_pickle(pkl_files[1])
df_two_bfp = df_two_bfp.reset_index(drop=True)
print(" complx scenario shape:",df_two_bfp.shape[0])

merge_df= pd.concat([df_one_bfp,df_two_bfp])
print("Merged Shape",merge_df.shape)
merge_df.to_pickle(os.path.join(pkl_path,'FinalUpdated_APT_{}_{}.pkl'.format(state,city)))

## Dallas Texas 

In [None]:
import os
import shapely
import numpy as np
import sys 
import pycoredb 
import pandas as pd 
import gc
from shapely import wkt
from matplotlib import pyplot as plt
from shapely.geometry import Point,mapping,Polygon,box,MultiPoint
import geopandas as gpd 
import seaborn as sns 
 
# for parallelization
sys.path.append('/mnt/c/Users/tandon/OneDrive - TomTom/Desktop/tomtom/Workspace/01_Rooftop_accuracy/APT-Realignment/src/apt_realignment')
from process_geometries import ProcessGeometricData  as ProcessAPT_data
from utils.haversine_distance import get_distance

data_path  = "/mnt/c/Users/tandon/OneDrive - TomTom/Desktop/tomtom/Workspace/01_Rooftop_accuracy/BFP_Analysis_USA/data/data"
state = "Texas"
city = "Dallas"
apt_data_path = os.path.join(data_path,state,"APT_2022_09_000_nam_usa_utx.shp" )
parcel_path = os.path.join(data_path,state,city,"Parcels_48113/Parcels_48113.shp")
building_geojson = os.path.join(data_path,state,'Texas.geojson')

### Single BFP within Parcel 

In [None]:
output_path = os.path.join(data_path,state,city,'Apt_realignment') 
apt_preprocess = ProcessAPT_data(parcel_shapefile=parcel_path,
                building_shapefile=building_geojson,
                apt_shape_file=apt_data_path , 
                output_path=output_path)

processed_df_dallas = apt_preprocess.process_dataframe(bfp_count_per_parcel=1,filename='APT_realigned_usa_utx_dallas')
processed_df_dallas.head(10)

In [None]:
distance_greater_than_50m = processed_df_dallas.loc[processed_df_dallas['APT_to_Centroid_distance']>50].shape[0]
print("Data points greater than 50m: {:.2f}%".format(100*(distance_greater_than_50m/processed_df_dallas.shape[0])))

distance_greater_than_50m = processed_df_dallas.loc[processed_df_dallas['APT_to_Centroid_distance']<50].shape[0]
print("Data points less than 50m: {:.2f}%".format(100*(distance_greater_than_50m/processed_df_dallas.shape[0])))


In [None]:
fig, ax = plt.subplots(figsize=(17,9))

ax.set_title("Histogram of APT to centroid distance on/not on BFP")
ax.set_xlabel("APT point to centroid distance(meters)")
ax.set_ylabel("counts")

frqTrue, edgesTrue = np.histogram(processed_df_dallas['APT_to_Centroid_distance'].loc[processed_df_dallas['APT_to_Centroid_distance']<50].values, bins = np.arange(2,50,5))
p1 = ax.bar(edgesTrue[:-1], frqTrue, width=np.diff(edgesTrue), edgecolor="black", align="edge",alpha=0.4,label='Address Points on Rooftop',color='orange')

plt.legend()
plt.show()
gc.collect()

### Two BFP within Parcel 

In [None]:
output_path = os.path.join(data_path,state,city,'Apt_realignment') 
apt_preprocess = ProcessAPT_data(parcel_shapefile=parcel_path,building_shapefile=building_geojson,
                                    apt_shape_file=apt_data_path , output_path=output_path)

processed_df_dallas = apt_preprocess.process_dataframe(bfp_count_per_parcel=2,filename='APT_realigned_usa_utx_dallas')
processed_df_dallas.head(10)

In [None]:
distance_greater_than_50m = processed_df_dallas.loc[processed_df_dallas['APT_to_Centroid_distance']>50].shape[0]
print("Data points greater than 50m: {:.2f}%".format(100*(distance_greater_than_50m/processed_df_dallas.shape[0])))

distance_greater_than_50m = processed_df_dallas.loc[processed_df_dallas['APT_to_Centroid_distance']<50].shape[0]
print("Data points less than 50m: {:.2f}%".format(100*(distance_greater_than_50m/processed_df_dallas.shape[0])))


In [None]:
fig, ax = plt.subplots(figsize=(17,9))

ax.set_title("Histogram of APT to centroid distance on/not on BFP")
ax.set_xlabel("APT point to centroid distance(meters)")
ax.set_ylabel("counts")

frqTrue, edgesTrue = np.histogram(processed_df_dallas['APT_to_Centroid_distance'].loc[processed_df_dallas['APT_to_Centroid_distance']<50].values, bins = np.arange(2,50,5))
p1 = ax.bar(edgesTrue[:-1], frqTrue, width=np.diff(edgesTrue), edgecolor="black", align="edge",alpha=0.4,label='Address Points on Rooftop',color='orange')

plt.legend()
plt.show()
gc.collect()

### Merge Data

In [None]:
from glob import glob 
pkl_path = osp.join(data_path,state,city,'Apt_realignment')
pkl_files = glob(pkl_path+'/*/*.pkl')
print(pkl_files)

In [None]:

## Show single bfp within Parcel data
df_one_bfp = pd.read_pickle(pkl_files[0])
df_one_bfp = df_one_bfp.reset_index(drop=True)
print("simple scenario sahpe",df_one_bfp.shape[0])

df_two_bfp = pd.read_pickle(pkl_files[1])
df_two_bfp = df_two_bfp.reset_index(drop=True)
print(" complx scenario shape:",df_two_bfp.shape[0])

merge_df= pd.concat([df_one_bfp,df_two_bfp])
print("Merged Shape",merge_df.shape)
merge_df.to_pickle(os.path.join(pkl_path,'FinalUpdated_APT_{}_{}.pkl'.format(state,city)))

### Compare MNR data with MSFT dBFP ata

In [None]:
import os
import shapely
import numpy as np
import sys 

import pycoredb 
import pandas as pd 
import gc

from shapely import wkt
from matplotlib import pyplot as plt
from shapely.geometry import Point,mapping,Polygon,box,MultiPoint
import geopandas as gpd 
import seaborn as sns 

# for parallelization
sys.path.append('/mnt/c/Users/tandon/OneDrive - TomTom/Desktop/tomtom/Workspace/01_Rooftop_accuracy/APT-Realignment/src/apt_realignment')
from process_geometries import ProcessGeometricData  as ProcessAPT_data
from utils.haversine_distance import get_distance

In [None]:
data_path  = "/mnt/c/Users/tandon/OneDrive - TomTom/Desktop/tomtom/Workspace/01_Rooftop_accuracy/BFP_Analysis_USA/data/data"
state = "Texas"
building_geojson = os.path.join(data_path,state,'Texas.geojson')
mnr_building_geojson = os.path.join(data_path,state,'BFP__2022_09_012_nam_usa_utx.shp')

In [None]:
bfp_df = gpd.read_file(building_geojson)
bfp_df = gpd.GeoDataFrame(bfp_df,crs="EPSG:4326",geometry='geometry')
bfp_df = bfp_df.to_crs("epsg:4326")
print("Total number of data points collected from MNR database ",bfp_df.shape[0])
gc.collect()
bfp_df.head()

In [None]:
mnr_bfp_df = gpd.read_file(mnr_building_geojson)
mnr_bfp_df = gpd.GeoDataFrame(mnr_bfp_df,crs="EPSG:4326",geometry='geometry')
mnr_bfp_df = mnr_bfp_df.to_crs("epsg:4326")
print("Total number of data points collected from MNR database ",mnr_bfp_df.shape[0])
gc.collect()
mnr_bfp_df.head()

In [None]:
def get_buildingfootprint(val):
    return bfp_df['geometry'].loc[val]

join_df = gpd.sjoin(mnr_bfp_df, bfp_df, op='intersects', how='left')
join_df.dropna(inplace=True)
join_df['MSFT_building_geom'] = join_df['index_right'].apply(lambda x: get_buildingfootprint(x))


In [None]:
def get_buildingfootprint(val):
    return bfp_df['geometry'].loc[val]

join_df.dropna(inplace=True)
join_df['MSFT_building_geom'] = join_df['index_right'].apply(lambda x: get_buildingfootprint(x))


In [None]:
join_df

In [None]:
def get_poly_iou(data: gpd.GeoSeries):
    try:
        intersect = data['geometry'].intersection(data['MSFT_building_geom']).area
        union = data['geometry'].union(data['MSFT_building_geom']).area
        iou = intersect / union
    except:
        iou = None
    return iou
join_df['IOU'] = join_df.apply(lambda x: get_poly_iou(x),axis=1)

In [None]:
join_df.dropna(inplace=True)
print(join_df['IOU'].mean())


In [None]:
join_df