### <center>Additional prepreprocessing for NOAA SLR data</center>

The National Oceanic and Atmospheric Administration (NOAA) has identified coastal plains with the US that would be impacted at various levels of sea level rise. Processing these files takes several hours for each scenario (1ft, 3ft, 6ft, 7ft and 10ft) given the complexity of the underlying geospatial data (identification and extraction of the geospatial layer for each region, spatial joins on multi polygon shapes). Therefore, the relevant files are pre-processed here and written to disk locally. They can then be read in directly during the analysis stage (Analysis_and_Visulization.ipynb).

In [1]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import geopandas as gpd
import fiona

import os
import zipfile

import matplotlib.pyplot as plt

In [17]:
def data_files():
    """ Reading in the supporting files needed for preprocessing the SLR data"""
    
    ccounties = pd.read_pickle("Data/Final_Data/cleaned_coastal_counties.pkl")
    gs_df_cc = pd.read_pickle("Data/Final_Data/cleaned_coastal_geodf.pkl")
    return ccounties, gs_df_cc

ccounties, gs_df_cc = data_files()
gs_df_cc.sample(3)

Unnamed: 0,state_code,county_name,state_name,geometry,county_code,census_tract_code,ccounty_flag,region
21118,NY,Suffolk,New York,"POLYGON ((-72.86509 40.83438, -72.84120 40.829...",36103,36103159411,1,NorthEast
29088,CA,Santa Clara,California,"POLYGON ((-121.93184 37.31695, -121.91500 37.3...",6085,6085502103,1,SouthWest
2757,NY,New York,New York,"POLYGON ((-73.99506 40.72881, -73.99146 40.731...",36061,36061005700,1,NorthEast


In [4]:
##the relevant files have already been unzipped so we should not need to run this
def SLR_unzip(folder_path = 'Data/Interim_Data/SLR/raw/'):
    
    """ This function unzips all the geospatial files that have been bulk downloaded from the NOAA website. These files 
        contain the SLR shapefiles for the coastal states/counties in scope for this project
    
        I/P: Optional parent folder path
        O/P: None (Unzipped files are saved directly in the specified folder)"""

    for item in os.listdir(f'{folder_path}'): 
        archive = zipfile.ZipFile(f'{folder_path}{item}')
        for file in archive.namelist():
            if file != "TX_Central_slr_final_dist.gdb":
                if file.endswith('.gdbtable') or file.endswith('.gpkg'): 
                    archive.extract(file, f'{folder_path[:-4]}/interim/')
            else:
                archive.extract(file, f'{folder_path[:-4]}/interim/')              
    return

# SLR_data_preprocessing()

In [5]:
##function needed to create the SLR datafame
def slr_flagging(gs_df_cc, temp_df, st_cd):
    
    """Given the complexity of the slr shapes (multipolygon) the sjoin runs very slowly when the entire slr gdB and the main 
    gdB are joined. Therefore, the gdB for each slr file needs to be joined individually. This function performs an inner
    sjoin and returns the intersecting census tracts. 
    
    I/P: main geospatial database, slr geospatial database, state code to be used
    O/P: indices in the main dB that are found intersecting with the slr dB"""
    
    gs_df_st = gs_df_cc[gs_df_cc.state_code == st_cd]
    intersecting_census_tracts = temp_df.sjoin(gs_df_st, how ='inner').ud_census_tract_code.unique()

    return list(intersecting_census_tracts)

In [6]:
##function needed to create the SLR datafame
def SLR_df(gs_df_cc, slr_ft, folder_path = 'Data/Interim_Data/SLR/interim/'):
    
    """For each SLR shapefile, this function extracts the layer relevant to the sea level rise being considered and 
    creates the corresponding gdB. This gdB is passed to a sister function slr flagging where a sjoin is performed with
    the main gdB and the corresponding intersecting census tracts are returned. Finally, all such intersecting census 
    tracts are consolidated (slr_df). The slr_df is used to set SLR flags in the main gdB in the analysis 
    stage (Analysis_and_Visualization.ipynb)
    
    I/P: main geospatial dB, sea level rise scenario to consider, optional folder path
    O/P: dataframe of consolidated interesting census tracts"""
    
    slr_ct = []  #master list of intersecting census tracts
    for item in os.listdir(f'{folder_path}'):
        
        if item.endswith(".gpkg"):    #metadata here: https://www.fisheries.noaa.gov/inport/item/48106
            relevant_layer =  [layer for layer in fiona.listlayers(f'{folder_path}{item}') if f'slr_{slr_ft}_0ft' in layer][0]

        else: #for items that are folders
            try:
                relevant_layer = [layer for layer in fiona.listlayers(f'{folder_path}{item}') if f'slr_{slr_ft}ft' in layer][0]
            except: 
                print(f"this item failed to create temp_df: {item}")  #testing (#no layers in the TX_Central_slr_final_dist.gdb)
                continue

            temp_df = gpd.read_file(f'{folder_path}{item}', layer = relevant_layer)

            try:
                slr_ct = set(list(slr_ct) + slr_flagging(gs_df_cc, temp_df[["geometry"]], item[:2]))
                print(f"completed for {item} and {relevant_layer}") #testing
            except:
                print(f"FAILED for {item} and {relevant_layer}") #testing
                pass #no data for provided slr level

    #consolidate dataframe of interesting census tracts
    slr_df = pd.DataFrame({"slr_census_tracts": list(slr_ct), f"slr_{slr_ft}_ft" : [1] * len(slr_ct)})
    slr_df.to_pickle(f'Data/Final_Data/slr_{slr_ft}_censustracts.pkl')
    
    return slr_df

In [7]:
## slr_1ft_df = SLR_df(gs_df_cc, "1") #do not re-run; takes forever. Read in the file instead
# slr_1ft_df.head(3)

In [8]:
# slr_2ft_df = SLR_df(gs_df_cc, "2") #do not re-run; takes forever. Read in the file instead
# slr_2ft_df.head(3)

In [9]:
# slr_3ft_df = SLR_df(gs_df_cc, "3") #do not re-run; takes forever. Read in the file instead
# slr_3ft_df.head(3)

In [10]:
# slr_4ft_df = SLR_df(gs_df_cc, "4") #do not re-run; takes forever. Read in the file instead
# slr_4ft_df.head(3)

In [15]:
# slr_5ft_df = SLR_df(gs_df_cc, "5") #do not re-run; takes forever. Read in the file instead
# slr_5ft_df.head(3)

In [12]:
# slr_6ft_df = SLR_df(gs_df_cc, "6")  #do not re-run; takes forever. Read in the file instead
# slr_6ft_df.head(3)

In [13]:
# slr_7ft_df = SLR_df(gs_df_cc, "7") #do not re-run; takes forever. Read in the file instead
# slr_7ft_df.head(3)

In [14]:
## slr_10ft_df = SLR_df(gs_df_cc, "10")  #do not re-run; takes forever. Read in the file instead
# slr_10ft_df.head(3)