#### This notebook will find the  average one year value of variables in CONUS where the CAMS data is regridded from 0.75 X 0.75 to a finer grid

In [1]:
## conda environment Weather_Prediction

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import geopandas as gpd
import xarray as xr
import regionmask

In [2]:
data = xr.open_dataset("data.nc")
data

In [3]:
## resample the data to 1 year average

year_avg = data.resample(valid_time='1YE').mean(dim='valid_time')
year_avg

In [4]:
## delete variables that have NaN in them

vars_with_nan = [var for var in year_avg if year_avg[var].isnull().any()]
year_avg= year_avg.drop_vars(vars_with_nan)
year_avg

In [5]:
## note that even though the time dimesion, essentially shows 2010-12-31,
## what it actually means is yearly average

year_avg.to_dataframe()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,d2m,t2m,bcaod550,chnk,duaod550,lai_hv,lai_lv,msl,omaod550,pm2p5,...,aermssdul,aermssbchphil,aermssomhphil,aermssbchphob,aermssomhphob,aermsssss,aermssssm,aermssssl,aermsssu,aermssso2
valid_time,latitude,longitude,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2010-12-31,49.5,-126.00,278.348907,281.556824,0.006512,0.014148,0.001253,3.752065,2.928701,101390.726562,0.055563,5.288607e-09,...,5.697568e-07,1.359443e-07,2.107467e-06,5.559153e-07,0.000009,6.538563e-07,0.000050,0.000015,0.000003,6.057056e-07
2010-12-31,49.5,-125.25,279.088837,282.437836,0.006916,0.013934,0.001246,4.228424,1.811178,101402.250000,0.060980,6.618710e-09,...,5.552510e-07,1.506114e-07,2.379295e-06,5.843853e-07,0.000010,6.358721e-07,0.000048,0.000014,0.000003,7.764698e-07
2010-12-31,49.5,-124.50,279.470001,282.923370,0.007213,0.013523,0.001238,2.194234,0.692861,101427.570312,0.063776,7.792362e-09,...,5.443322e-07,1.616488e-07,2.564180e-06,6.047914e-07,0.000011,6.118582e-07,0.000046,0.000013,0.000003,9.925919e-07
2010-12-31,49.5,-123.75,279.041290,282.562347,0.007478,0.013581,0.001204,0.905955,0.624898,101459.429688,0.067136,1.080445e-08,...,5.231616e-07,1.798799e-07,2.806439e-06,6.146706e-07,0.000011,5.744274e-07,0.000043,0.000011,0.000003,1.408917e-06
2010-12-31,49.5,-123.00,277.851837,281.525085,0.008017,0.014813,0.001140,3.628265,2.724324,101485.656250,0.076543,1.931933e-08,...,4.871432e-07,2.252995e-07,3.389544e-06,6.264335e-07,0.000012,5.279849e-07,0.000039,0.000010,0.000004,2.356623e-06
2010-12-31,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2010-12-31,24.0,-69.00,293.523407,297.542999,0.004872,0.012293,0.024328,0.000000,0.000000,101571.789062,0.046011,9.519893e-09,...,1.086019e-05,3.103107e-08,5.009322e-07,4.866120e-07,0.000009,7.770465e-07,0.000062,0.000016,0.000005,1.695241e-07
2010-12-31,24.0,-68.25,293.495728,297.488953,0.004856,0.012303,0.024477,0.000000,0.000000,101578.000000,0.045661,9.399162e-09,...,1.102058e-05,3.011268e-08,4.867323e-07,4.857583e-07,0.000009,7.737776e-07,0.000062,0.000015,0.000005,1.793884e-07
2010-12-31,24.0,-67.50,293.465454,297.455261,0.004833,0.012302,0.024507,0.000000,0.000000,101583.164062,0.045540,9.288230e-09,...,1.103588e-05,2.886006e-08,4.733052e-07,4.846137e-07,0.000009,7.727766e-07,0.000061,0.000015,0.000005,1.564795e-07
2010-12-31,24.0,-66.75,293.443970,297.427094,0.004824,0.012318,0.024585,0.000000,0.000000,101590.195312,0.045691,9.213097e-09,...,1.106749e-05,2.775320e-08,4.601205e-07,4.847639e-07,0.000009,7.745404e-07,0.000062,0.000015,0.000005,1.387977e-07


In [6]:
## lets add new data points in the longitude to convert the resolution into a smaller resolution (0.75X0.75)...
## to a finer resolution

new_lon = np.linspace(year_avg.longitude[0],
                      year_avg.longitude[-1], year_avg.sizes["longitude"] * 10)

In [7]:
## lets add new data points in the latitude to convert the dataframe into a finer grid

new_lat = np.linspace(year_avg.latitude[0], year_avg.latitude[-1],
                                                            year_avg.sizes["latitude"] * 10)

In [8]:
## find the corresponding values of the variables in the finer grid

year_avg_finer= year_avg.interp(longitude=new_lon, latitude=new_lat)
year_avg_finer

In [9]:
## Lets load the county shapefile

SHAPE_PATH = os.path.join("County_shapefile",'gz_2010_us_050_00_500k.shp')
county_gdf = gpd.read_file(SHAPE_PATH)

county_gdf.head(12)

Unnamed: 0,GEO_ID,STATE,COUNTY,NAME,LSAD,CENSUSAREA,geometry
0,0500000US01029,1,29,Cleburne,County,560.1,"POLYGON ((-85.38872 33.91304, -85.38088 33.873..."
1,0500000US01031,1,31,Coffee,County,678.972,"POLYGON ((-86.03044 31.61894, -86.00408 31.619..."
2,0500000US01037,1,37,Coosa,County,650.926,"POLYGON ((-86.00928 33.10164, -86.00917 33.090..."
3,0500000US01039,1,39,Covington,County,1030.456,"POLYGON ((-86.34851 30.99434, -86.35023 30.994..."
4,0500000US01041,1,41,Crenshaw,County,608.84,"POLYGON ((-86.14699 31.68045, -86.14711 31.663..."
5,0500000US01045,1,45,Dale,County,561.15,"POLYGON ((-85.79043 31.32027, -85.79033 31.323..."
6,0500000US01049,1,49,DeKalb,County,777.093,"POLYGON ((-85.57593 34.82373, -85.56142 34.750..."
7,0500000US01053,1,53,Escambia,County,945.08,"POLYGON ((-87.16308 30.99904, -87.16408 30.999..."
8,0500000US01057,1,57,Fayette,County,627.66,"POLYGON ((-87.63593 33.87874, -87.63604 33.872..."
9,0500000US01061,1,61,Geneva,County,574.408,"POLYGON ((-85.77267 30.99462, -85.77966 30.994..."


In [10]:
## lets create the fips column in the dataframe which is the sum of state and county both as dtype string.
## Note: not a numerical sum
## This step is necessary because there can be several counties with idential names. 

county_gdf['fips']=county_gdf['STATE'] + county_gdf['COUNTY']
county_gdf.dtypes
## convert the data type
county_gdf['fips']=county_gdf['fips'].astype(str).astype(int)
county_gdf.dtypes

GEO_ID          object
STATE           object
COUNTY          object
NAME            object
LSAD            object
CENSUSAREA     float64
geometry      geometry
fips             int32
dtype: object

In [11]:
county_gdf = county_gdf.drop([ 'GEO_ID','CENSUSAREA','STATE','COUNTY','LSAD'], axis=1)
county_gdf

Unnamed: 0,NAME,geometry,fips
0,Cleburne,"POLYGON ((-85.38872 33.91304, -85.38088 33.873...",1029
1,Coffee,"POLYGON ((-86.03044 31.61894, -86.00408 31.619...",1031
2,Coosa,"POLYGON ((-86.00928 33.10164, -86.00917 33.090...",1037
3,Covington,"POLYGON ((-86.34851 30.99434, -86.35023 30.994...",1039
4,Crenshaw,"POLYGON ((-86.14699 31.68045, -86.14711 31.663...",1041
...,...,...,...
3216,San Sebastián,"POLYGON ((-66.90748 18.25314, -66.90739 18.253...",72131
3217,Santa Isabel,"POLYGON ((-66.37968 17.94398, -66.38029 17.943...",72133
3218,Toa Baja,"MULTIPOLYGON (((-66.13957 18.46232, -66.13956 ...",72137
3219,Trujillo Alto,"POLYGON ((-66.02917 18.37590, -66.02828 18.376...",72139


In [12]:
county_gdf.dtypes

NAME          object
geometry    geometry
fips           int32
dtype: object

In [13]:
## The life expectency data

le=pd.read_csv('LE_2010.csv')
le = le.drop([ 'Unnamed: 0'], axis=1)
le

Unnamed: 0,location_name,fips,year,MeanLifeExpectency
0,Autauga County (Alabama),1001,2010,75.728489
1,Baldwin County (Alabama),1003,2010,77.826608
2,Barbour County (Alabama),1005,2010,75.841973
3,Bibb County (Alabama),1007,2010,73.705432
4,Blount County (Alabama),1009,2010,75.753407
...,...,...,...,...
3122,Sweetwater County (Wyoming),56037,2010,77.582422
3123,Teton County (Wyoming),56039,2010,83.771012
3124,Uinta County (Wyoming),56041,2010,77.836526
3125,Washakie County (Wyoming),56043,2010,78.674180


In [14]:
## the counties that are in life expectency data but the shape file does not exist

county_noshape = le.loc[~le['fips'].isin(county_gdf['fips'])].copy()
county_noshape

Unnamed: 0,location_name,fips,year,MeanLifeExpectency
80,Chugach Census Area (Alaska),2063,2010,77.733009
81,Copper River Census Area (Alaska),2066,2010,77.733009
83,Kusilvak Census Area (Alaska),2158,2010,69.723907
85,Kobuk Census Area (Alaska),2140,2010,73.339132
87,Aleutian Islands Census Area (Alaska),2010,2010,82.631153
96,Skagway-Yakutat-Angoon Census Area (Alaska),2231,2010,80.20283
97,Skagway-Hoonah-Angoon Census Area (Alaska),2232,2010,80.20283
102,Prince of Wales-Outer Ketchikan Census Area (A...,2201,2010,78.06861
104,Wrangell-Petersburg Census Area (Alaska),2280,2010,78.06861
338,Dade County (Florida),12025,2010,80.576828


### Note that the CAMS weather data is only for CONUS, Alaska and Hawaii has not been included, which is, 38 census area of Alaska and 5 counties of Hawaii. Since, there 15 locations whose shapfile does not exist as seen above and 9 of them are of Alaska; 3127-38-5-6=3078 shapefile of the life expectency data in total exist.

In [15]:
county_list=county_gdf['fips'].tolist()

In [16]:
var=county_list[4]
print(var)
print(len(county_list))

1041
3221


In [17]:
def single_county(var):
    
    ''' Function to find the average value of weather data variables of a single county
        as listed in county_list.
         Args:
         --------
             var (str): The name of the state.

         Returns:
         --------
            final_df: Dataframe consisting of the columns in the shape file and corresponding value of the variables
                      which in this case is the d2m and t2m.
    '''
    
    county = county_gdf[county_gdf.fips.isin([var])]
    county_mask = regionmask.mask_3D_geopandas(county,year_avg_finer.longitude,year_avg_finer.latitude)
    df_masked = year_avg_finer.where(county_mask)
    summary = df_masked.groupby("region").mean(["latitude", "longitude","valid_time"])
    df2=summary.to_dataframe()   
    
    df = pd.merge(county_gdf, df2, left_index=True, right_index=True, how='left') 
    
    ## It is VERY IMPORTANT that the left dataframe that is merged is a Geopandas Dataframe
    ## Not pandas dataframe. While a Geopandas dataframe can be easily plotted, a pandas  
    ## even if it consists Geometry column, pandas will not recognize it and it will not be plotted

    final_df=df.dropna()
    return final_df

In [18]:
single_county(county_list[0])

Unnamed: 0,NAME,geometry,fips,d2m,t2m,bcaod550,chnk,duaod550,lai_hv,lai_lv,...,aermssdul,aermssbchphil,aermssomhphil,aermssbchphob,aermssomhphob,aermsssss,aermssssm,aermssssl,aermsssu,aermssso2
0,Cleburne,"POLYGON ((-85.38872 33.91304, -85.38088 33.873...",1029,282.534639,289.760917,0.006227,0.018,0.007887,3.538058,2.750113,...,2e-06,1.375496e-07,3e-06,5.240626e-07,1.3e-05,1.994048e-07,1.3e-05,7.304962e-07,9e-06,4e-06


In [19]:
%%time

## get the average for all of the counties

df_list=[]

for i in range(0,len(county_list)):
    try:
        df_list.append(single_county(county_list[i]))
    except:
        pass
df_list[:4] ## see first 4 df

  mask_3D = _mask_3D(
  mask_3D = _mask_3D(
  mask_3D = _mask_3D(
  mask_3D = _mask_3D(
  mask_3D = _mask_3D(
  mask_3D = _mask_3D(
  mask_3D = _mask_3D(
  mask_3D = _mask_3D(
  mask_3D = _mask_3D(
  mask_3D = _mask_3D(
  mask_3D = _mask_3D(
  mask_3D = _mask_3D(
  mask_3D = _mask_3D(
  mask_3D = _mask_3D(
  mask_3D = _mask_3D(
  mask_3D = _mask_3D(
  mask_3D = _mask_3D(
  mask_3D = _mask_3D(
  mask_3D = _mask_3D(
  mask_3D = _mask_3D(
  mask_3D = _mask_3D(
  mask_3D = _mask_3D(
  mask_3D = _mask_3D(
  mask_3D = _mask_3D(
  mask_3D = _mask_3D(
  mask_3D = _mask_3D(
  mask_3D = _mask_3D(
  mask_3D = _mask_3D(
  mask_3D = _mask_3D(
  mask_3D = _mask_3D(
  mask_3D = _mask_3D(
  mask_3D = _mask_3D(
  mask_3D = _mask_3D(
  mask_3D = _mask_3D(
  mask_3D = _mask_3D(
  mask_3D = _mask_3D(
  mask_3D = _mask_3D(
  mask_3D = _mask_3D(
  mask_3D = _mask_3D(
  mask_3D = _mask_3D(
  mask_3D = _mask_3D(
  mask_3D = _mask_3D(
  mask_3D = _mask_3D(
  mask_3D = _mask_3D(
  mask_3D = _mask_3D(
  mask_3D 

CPU times: total: 5min 37s
Wall time: 7min 38s


  mask_3D = _mask_3D(
  mask_3D = _mask_3D(
  mask_3D = _mask_3D(


[       NAME                                           geometry  fips  \
 0  Cleburne  POLYGON ((-85.38872 33.91304, -85.38088 33.873...  1029   
 
           d2m         t2m  bcaod550   chnk  duaod550    lai_hv    lai_lv  ...  \
 0  282.534639  289.760917  0.006227  0.018  0.007887  3.538058  2.750113  ...   
 
    aermssdul  aermssbchphil  aermssomhphil  aermssbchphob  aermssomhphob  \
 0   0.000002   1.375496e-07       0.000003   5.240626e-07       0.000013   
 
       aermsssss  aermssssm     aermssssl  aermsssu  aermssso2  
 0  1.994048e-07   0.000013  7.304962e-07  0.000009   0.000004  
 
 [1 rows x 49 columns],
      NAME                                           geometry  fips  \
 1  Coffee  POLYGON ((-86.03044 31.61894, -86.00408 31.619...  1031   
 
           d2m         t2m  bcaod550      chnk  duaod550    lai_hv    lai_lv  \
 1  284.470145  291.463619  0.006492  0.017627  0.011786  3.963167  2.382854   
 
    ...  aermssdul  aermssbchphil  aermssomhphil  aermssbchphob  aer

In [20]:
initial_df=pd.concat(df_list)
initial_df

Unnamed: 0,NAME,geometry,fips,d2m,t2m,bcaod550,chnk,duaod550,lai_hv,lai_lv,...,aermssdul,aermssbchphil,aermssomhphil,aermssbchphob,aermssomhphob,aermsssss,aermssssm,aermssssl,aermsssu,aermssso2
0,Cleburne,"POLYGON ((-85.38872 33.91304, -85.38088 33.873...",1029,282.534639,289.760917,0.006227,0.018000,0.007887,3.538058,2.750113,...,0.000002,1.375496e-07,0.000003,5.240626e-07,0.000013,1.994048e-07,0.000013,7.304962e-07,0.000009,3.833579e-06
1,Coffee,"POLYGON ((-86.03044 31.61894, -86.00408 31.619...",1031,284.470145,291.463619,0.006492,0.017627,0.011786,3.963167,2.382854,...,0.000004,1.419754e-07,0.000002,5.477895e-07,0.000013,2.550690e-07,0.000018,1.327673e-06,0.000008,9.686894e-07
2,Coosa,"POLYGON ((-86.00928 33.10164, -86.00917 33.090...",1037,283.231267,290.567934,0.006220,0.018000,0.009614,3.838011,2.561341,...,0.000003,1.305443e-07,0.000003,5.303232e-07,0.000013,2.181405e-07,0.000015,8.944704e-07,0.000008,2.498515e-06
3,Covington,"POLYGON ((-86.34851 30.99434, -86.35023 30.994...",1039,285.035842,291.786795,0.006599,0.017083,0.012393,3.800936,2.682312,...,0.000004,1.454181e-07,0.000002,5.557222e-07,0.000013,2.641616e-07,0.000019,1.472378e-06,0.000008,9.377627e-07
4,Crenshaw,"POLYGON ((-86.14699 31.68045, -86.14711 31.663...",1041,283.953110,291.404574,0.006432,0.017995,0.011633,3.990976,2.514900,...,0.000004,1.367316e-07,0.000002,5.466086e-07,0.000013,2.487132e-07,0.000017,1.237158e-06,0.000008,1.040012e-06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3138,Niobrara,"POLYGON ((-104.05298 42.85955, -104.05286 42.7...",56027,271.615412,282.249120,0.005404,0.018000,0.007840,3.185860,0.679093,...,0.000008,1.083389e-07,0.000002,4.657902e-07,0.000010,2.130015e-07,0.000013,8.459998e-07,0.000004,9.995847e-07
3139,Platte,"POLYGON ((-104.77417 42.60996, -104.76422 42.6...",56031,270.554427,281.107911,0.005047,0.018000,0.006249,4.388569,0.800860,...,0.000006,1.017176e-07,0.000002,4.345306e-07,0.000009,2.000219e-07,0.000012,7.582755e-07,0.000004,1.087591e-06
3140,Sweetwater,"POLYGON ((-109.05008 41.00066, -109.17368 41.0...",56037,268.847690,278.335979,0.004754,0.018000,0.004109,2.347858,0.500051,...,0.000004,9.289265e-08,0.000002,4.122359e-07,0.000008,2.042348e-07,0.000013,9.199331e-07,0.000003,1.318196e-06
3141,Washakie,"POLYGON ((-107.12892 43.99455, -107.12797 43.9...",56043,270.008487,279.067422,0.005383,0.018000,0.003833,4.696960,0.551908,...,0.000003,9.979085e-08,0.000002,4.718273e-07,0.000009,2.166879e-07,0.000013,9.534947e-07,0.000003,6.046665e-07


#### There must be a total of 3078 counties as mentioned above Cell 15.

In [21]:
final_df=le.merge(initial_df,how='left', on='fips')
final_df=final_df.dropna()
final_df

Unnamed: 0,location_name,fips,year,MeanLifeExpectency,NAME,geometry,d2m,t2m,bcaod550,chnk,...,aermssdul,aermssbchphil,aermssomhphil,aermssbchphob,aermssomhphob,aermsssss,aermssssm,aermssssl,aermsssu,aermssso2
0,Autauga County (Alabama),1001,2010,75.728489,Autauga,"POLYGON ((-86.52469 32.70706, -86.52443 32.707...",283.453027,291.087236,0.006284,0.018000,...,3.296266e-06,1.285195e-07,0.000003,5.390769e-07,0.000013,2.304495e-07,0.000016,1.010670e-06,0.000008,1.625641e-06
1,Baldwin County (Alabama),1003,2010,77.826608,Baldwin,"POLYGON ((-87.41247 30.57386, -87.41271 30.573...",286.729490,292.234757,0.006414,0.015324,...,4.357242e-06,1.203718e-07,0.000002,5.610699e-07,0.000013,2.886364e-07,0.000021,2.072605e-06,0.000008,1.146249e-06
2,Barbour County (Alabama),1005,2010,75.841973,Barbour,"POLYGON ((-85.13285 31.80037, -85.13283 31.798...",284.125137,291.030279,0.006434,0.018000,...,3.328249e-06,1.460616e-07,0.000002,5.375131e-07,0.000013,2.382998e-07,0.000016,1.124454e-06,0.000008,1.211736e-06
3,Bibb County (Alabama),1007,2010,73.705432,Bibb,"POLYGON ((-87.11632 32.83560, -87.15529 32.835...",283.446516,290.754046,0.006236,0.018000,...,3.210069e-06,1.258110e-07,0.000003,5.367472e-07,0.000013,2.223786e-07,0.000015,9.170631e-07,0.000008,1.961398e-06
4,Blount County (Alabama),1009,2010,75.753407,Blount,"POLYGON ((-86.73121 34.01470, -86.72710 34.016...",282.866441,289.765376,0.006106,0.018000,...,2.564322e-06,1.291962e-07,0.000003,5.195478e-07,0.000013,1.987653e-07,0.000013,7.087353e-07,0.000008,2.442076e-06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3122,Sweetwater County (Wyoming),56037,2010,77.582422,Sweetwater,"POLYGON ((-109.05008 41.00066, -109.17368 41.0...",268.847690,278.335979,0.004754,0.018000,...,3.771786e-06,9.289265e-08,0.000002,4.122359e-07,0.000008,2.042348e-07,0.000013,9.199331e-07,0.000003,1.318196e-06
3123,Teton County (Wyoming),56039,2010,83.771012,Teton,"POLYGON ((-111.04668 43.80830, -111.04672 43.8...",267.882993,274.406839,0.004907,0.018000,...,9.098615e-07,9.830079e-08,0.000002,4.228952e-07,0.000008,2.082116e-07,0.000013,1.040999e-06,0.000003,2.642149e-07
3124,Uinta County (Wyoming),56041,2010,77.836526,Uinta,"POLYGON ((-110.04864 41.04008, -110.04848 40.9...",270.021646,276.911810,0.004992,0.018000,...,1.640751e-06,1.026755e-07,0.000002,4.277129e-07,0.000008,2.131036e-07,0.000013,1.039536e-06,0.000003,6.158064e-07
3125,Washakie County (Wyoming),56043,2010,78.674180,Washakie,"POLYGON ((-107.12892 43.99455, -107.12797 43.9...",270.008487,279.067422,0.005383,0.018000,...,3.163035e-06,9.979085e-08,0.000002,4.718273e-07,0.000009,2.166879e-07,0.000013,9.534947e-07,0.000003,6.046665e-07
