In [1]:
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import xarray as xr
import os
import netCDF4
from netCDF4 import Dataset

In [9]:
def ero_categories(directory):
    
    # set file for counties and remove non contiguous states
    counties=gpd.read_file(r'/home/meirahwilliamson/blue/netcdf_ero/county_data/USA_Counties/USA_Counties.shp')
    counties=counties[~(counties['STATE_NAME']=='Puerto Rico') & ~(counties['STATE_NAME']=='Alaska')
        & ~(counties['STATE_NAME']=='Hawaii')]
    
    #create dataframe with only FIPS
    FIPS_df = pd.DataFrame(counties["FIPS"], columns = ['FIPS']) 
    
    #loop through all files
    for filename in directory:
    
        #open netcdf and make it a dataframe
        ERO_netcdf=xr.open_dataset(filename)
        ERO_df=ERO_netcdf.to_dataframe()

        #reset the index (bc it's currently the counties)
        ERO_df=ERO_df.reset_index()

        # use geopandas points_from_xy() to transform Longitude and Latitude into a list of shapely.Point objects and set it as a geometry while creating the GeoDataFrame
        ERO_gdf = gpd.GeoDataFrame(ERO_df, geometry=gpd.points_from_xy(ERO_df.lon, ERO_df.lat))

        # set counties coordinates to equal ERO coords
            # not degrees!! xlim, ylim in millions
                # counties = counties.to_crs(epsg=5070)
            # yes degrees!! xlim, ylim are lat, lon coords
                # counties = counties.to_crs(epsg=4326)
        ERO_gdf.crs = counties.crs
        
        print(ERO_gdf.crs, counties.crs)

        # spatial join ERO to counties
        counties_points_join=gpd.sjoin(counties,ERO_gdf)

        # count number of points for each risk level in each county
        group_points_county=counties_points_join.groupby(["FIPS","FCST_ERO_Surface_FULL"]).size()

        # make it a dataframe bc idxmax won't work on geodataframe, then fix the index
        group_points_county_df = pd.DataFrame(group_points_county)
        group_points_county_df=group_points_county_df.reset_index(level=['FIPS', 'FCST_ERO_Surface_FULL'])

        # find category that has the most points in each county, drop the counts, and FCST_ERO column to date
        points_county_max=group_points_county_df.loc[group_points_county_df.groupby('FIPS')['FCST_ERO_Surface_FULL'].idxmax()]
        points_county_max=points_county_max.drop(0,axis=1)
        points_county_max=points_county_max.rename(columns={"FCST_ERO_Surface_FULL": filename[19:27] })
        
        # merge columns onto previous columns (use merge bc not all the same length -- VA and RI)
        FIPS_df=FIPS_df.merge(points_county_max,on="FIPS",how="left")
        
        break;
                                
    return FIPS_df

In [10]:
# apparently you also have to set the directory first -\_o_/-
os.chdir(r'/home/meirahwilliamson/blue/netcdf_ero/djf')

# choose directory
directory=os.listdir(r'/home/meirahwilliamson/blue/netcdf_ero/djf')

# run function and set variable so you can make sure it looks okay and then send it to xlsx (or something)
ero_categories_df=ero_categories(directory)

ero_categories_df.to_csv(r'/home/meirahwilliamson/blue/test/test.csv')

epsg:4326 epsg:4326


Unnamed: 0,FIPS,20171202
0,06053,0.0
1,06087,0.0
2,06085,0.0
3,06069,0.0
4,06111,0.0
...,...,...
3103,23025,0.0
3104,23003,0.0
3105,23019,0.0
3106,23021,0.0
