### This Notebook finds the values in a single county from a gridded data. A sample of 100 latitude and longitude pair is taken from the shapefile of a county. The values of the variables from the netcdf are then interpolated from the grid to this set of latitude and longitude.

In [1]:
## conda environment Weather_Prediction

import os
import numpy as np
import pandas as pd
import geopandas as gpd
import xarray as xr

In [2]:
data = xr.open_dataset("data.nc") ## data with one month average of the variables
data

In [3]:
## resample the data to 1 year average

year_avg = data.resample(valid_time='1YE').mean(dim='valid_time')
year_avg

In [4]:
## delete variables that have NaN in them

vars_with_nan = [var for var in year_avg if year_avg[var].isnull().any()]
year_avg= year_avg.drop_vars(vars_with_nan)
year_avg

In [5]:
## note that even though the time dimesion, essentially shows 2010-12-31,
## what it actually means is yearly average
year_avg.to_dataframe()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,d2m,t2m,bcaod550,chnk,duaod550,lai_hv,lai_lv,msl,omaod550,pm2p5,...,aermssdul,aermssbchphil,aermssomhphil,aermssbchphob,aermssomhphob,aermsssss,aermssssm,aermssssl,aermsssu,aermssso2
valid_time,latitude,longitude,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2010-12-31,49.5,-126.00,278.348907,281.556824,0.006512,0.014148,0.001253,3.752065,2.928701,101390.726562,0.055563,5.288607e-09,...,5.697568e-07,1.359443e-07,2.107467e-06,5.559153e-07,0.000009,6.538563e-07,0.000050,0.000015,0.000003,6.057056e-07
2010-12-31,49.5,-125.25,279.088837,282.437836,0.006916,0.013934,0.001246,4.228424,1.811178,101402.250000,0.060980,6.618710e-09,...,5.552510e-07,1.506114e-07,2.379295e-06,5.843853e-07,0.000010,6.358721e-07,0.000048,0.000014,0.000003,7.764698e-07
2010-12-31,49.5,-124.50,279.470001,282.923370,0.007213,0.013523,0.001238,2.194234,0.692861,101427.570312,0.063776,7.792362e-09,...,5.443322e-07,1.616488e-07,2.564180e-06,6.047914e-07,0.000011,6.118582e-07,0.000046,0.000013,0.000003,9.925919e-07
2010-12-31,49.5,-123.75,279.041290,282.562347,0.007478,0.013581,0.001204,0.905955,0.624898,101459.429688,0.067136,1.080445e-08,...,5.231616e-07,1.798799e-07,2.806439e-06,6.146706e-07,0.000011,5.744274e-07,0.000043,0.000011,0.000003,1.408917e-06
2010-12-31,49.5,-123.00,277.851837,281.525085,0.008017,0.014813,0.001140,3.628265,2.724324,101485.656250,0.076543,1.931933e-08,...,4.871432e-07,2.252995e-07,3.389544e-06,6.264335e-07,0.000012,5.279849e-07,0.000039,0.000010,0.000004,2.356623e-06
2010-12-31,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2010-12-31,24.0,-69.00,293.523407,297.542999,0.004872,0.012293,0.024328,0.000000,0.000000,101571.789062,0.046011,9.519893e-09,...,1.086019e-05,3.103107e-08,5.009322e-07,4.866120e-07,0.000009,7.770465e-07,0.000062,0.000016,0.000005,1.695241e-07
2010-12-31,24.0,-68.25,293.495728,297.488953,0.004856,0.012303,0.024477,0.000000,0.000000,101578.000000,0.045661,9.399162e-09,...,1.102058e-05,3.011268e-08,4.867323e-07,4.857583e-07,0.000009,7.737776e-07,0.000062,0.000015,0.000005,1.793884e-07
2010-12-31,24.0,-67.50,293.465454,297.455261,0.004833,0.012302,0.024507,0.000000,0.000000,101583.164062,0.045540,9.288230e-09,...,1.103588e-05,2.886006e-08,4.733052e-07,4.846137e-07,0.000009,7.727766e-07,0.000061,0.000015,0.000005,1.564795e-07
2010-12-31,24.0,-66.75,293.443970,297.427094,0.004824,0.012318,0.024585,0.000000,0.000000,101590.195312,0.045691,9.213097e-09,...,1.106749e-05,2.775320e-08,4.601205e-07,4.847639e-07,0.000009,7.745404e-07,0.000062,0.000015,0.000005,1.387977e-07


In [6]:
## Lets load the county shapefile

SHAPE_PATH = os.path.join("County_shapefile",'gz_2010_us_050_00_500k.shp')
county_gdf = gpd.read_file(SHAPE_PATH)

county_gdf.head()

Unnamed: 0,GEO_ID,STATE,COUNTY,NAME,LSAD,CENSUSAREA,geometry
0,0500000US01029,1,29,Cleburne,County,560.1,"POLYGON ((-85.38872 33.91304, -85.38088 33.873..."
1,0500000US01031,1,31,Coffee,County,678.972,"POLYGON ((-86.03044 31.61894, -86.00408 31.619..."
2,0500000US01037,1,37,Coosa,County,650.926,"POLYGON ((-86.00928 33.10164, -86.00917 33.090..."
3,0500000US01039,1,39,Covington,County,1030.456,"POLYGON ((-86.34851 30.99434, -86.35023 30.994..."
4,0500000US01041,1,41,Crenshaw,County,608.84,"POLYGON ((-86.14699 31.68045, -86.14711 31.663..."


In [7]:
geometry_column=county_gdf['geometry'] ## get the geometry column
geometry_column

0       POLYGON ((-85.38872 33.91304, -85.38088 33.873...
1       POLYGON ((-86.03044 31.61894, -86.00408 31.619...
2       POLYGON ((-86.00928 33.10164, -86.00917 33.090...
3       POLYGON ((-86.34851 30.99434, -86.35023 30.994...
4       POLYGON ((-86.14699 31.68045, -86.14711 31.663...
                              ...                        
3216    POLYGON ((-66.90748 18.25314, -66.90739 18.253...
3217    POLYGON ((-66.37968 17.94398, -66.38029 17.943...
3218    MULTIPOLYGON (((-66.13957 18.46232, -66.13956 ...
3219    POLYGON ((-66.02917 18.37590, -66.02828 18.376...
3220    POLYGON ((-66.85229 17.95500, -66.85280 17.955...
Name: geometry, Length: 3221, dtype: geometry

In [8]:
type(geometry_column)

geopandas.geoseries.GeoSeries

In [9]:
lat_lon=geometry_column.get_coordinates(index_parts=True) ## get lat and lon from the geometry
lat_lon

Unnamed: 0,Unnamed: 1,x,y
0,0,-85.388717,33.913044
0,1,-85.380885,33.873508
0,2,-85.379455,33.866291
0,3,-85.377426,33.856047
0,4,-85.376403,33.850656
...,...,...,...
3220,202,-66.833718,17.989763
3220,203,-66.835282,17.988274
3220,204,-66.835429,17.986323
3220,205,-66.836682,17.965971


In [10]:
type(lat_lon)

pandas.core.frame.DataFrame

## Find value of the variable in the county with index 0 in the shape file

In [11]:
longitude= lat_lon.loc[(0), 'x']  # get the longitude values
longitude=longitude.sample(n=10)  ## extract a random sample of 10 longitude
longitude

33    -85.352576
62    -85.851890
17    -85.313999
49    -85.680346
6     -85.361844
9     -85.355252
21    -85.314852
109   -85.495289
71    -85.724953
103   -85.601858
Name: x, dtype: float64

In [12]:
latitude= lat_lon.loc[(0), 'y']   # get the latitude values
latitude=latitude.sample(n=10)    ## extract a random sample of 10 latitude
latitude

63     33.499690
88     33.773339
115    33.927068
77     33.625105
92     33.787755
68     33.600002
75     33.612909
52     33.496862
32     33.498866
26     33.494700
Name: y, dtype: float64

In [13]:
lat_list=latitude.tolist()
lat_list

[33.49969,
 33.773339,
 33.927068064286296,
 33.625105,
 33.787755,
 33.600001999999996,
 33.612909,
 33.496862,
 33.498866,
 33.4947]

In [14]:
lon_list=longitude.tolist()
lon_list

[-85.352576,
 -85.85189,
 -85.313999,
 -85.680346,
 -85.36184399999999,
 -85.35525221574429,
 -85.314852,
 -85.495289,
 -85.724953,
 -85.601858]

In [15]:
## find the corresponding values of the variables in the finer grid

year_avg_finer= year_avg.interp(longitude=lon_list, latitude=lat_list)
year_avg_finer

### Note that xarray will provide a 10 X 10 = 100 pair of latitude and longitude, that is, interpolated at 100 points.

In [16]:
year_avg_finer.to_dataframe()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,d2m,t2m,bcaod550,chnk,duaod550,lai_hv,lai_lv,msl,omaod550,pm2p5,...,aermssdul,aermssbchphil,aermssomhphil,aermssbchphob,aermssomhphob,aermsssss,aermssssm,aermssssl,aermsssu,aermssso2
valid_time,latitude,longitude,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2010-12-31,33.49969,-85.352576,282.626894,289.846740,0.006262,0.018,0.007976,3.651370,2.729497,101706.803001,0.077320,1.693269e-08,...,0.000002,1.400559e-07,0.000003,5.252117e-07,0.000013,2.015570e-07,0.000013,7.535849e-07,0.000009,0.000004
2010-12-31,33.49969,-85.851890,282.803184,289.971640,0.006194,0.018,0.008337,3.528290,2.721260,101707.754081,0.076725,1.627143e-08,...,0.000003,1.345085e-07,0.000003,5.235354e-07,0.000013,2.037862e-07,0.000014,7.666836e-07,0.000009,0.000003
2010-12-31,33.49969,-85.313999,282.627318,289.836568,0.006274,0.018,0.007955,3.672237,2.720927,101706.411332,0.077453,1.697360e-08,...,0.000002,1.409249e-07,0.000003,5.256258e-07,0.000013,2.014389e-07,0.000013,7.530411e-07,0.000009,0.000004
2010-12-31,33.49969,-85.680346,282.716456,289.929703,0.006204,0.018,0.008202,3.549417,2.741242,101708.020108,0.076768,1.651758e-08,...,0.000003,1.355938e-07,0.000003,5.235811e-07,0.000013,2.029196e-07,0.000013,7.613112e-07,0.000009,0.000004
2010-12-31,33.49969,-85.361844,282.626793,289.849184,0.006259,0.018,0.007982,3.646357,2.731556,101706.897098,0.077289,1.692286e-08,...,0.000002,1.398471e-07,0.000003,5.251122e-07,0.000013,2.015854e-07,0.000013,7.537155e-07,0.000009,0.000004
2010-12-31,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2010-12-31,33.49470,-85.355252,282.631126,289.851267,0.006261,0.018,0.007985,3.651024,2.730142,101706.792842,0.077294,1.692273e-08,...,0.000002,1.399748e-07,0.000003,5.251665e-07,0.000013,2.016526e-07,0.000013,7.544613e-07,0.000009,0.000004
2010-12-31,33.49470,-85.314852,282.631558,289.840658,0.006273,0.018,0.007962,3.672853,2.721172,101706.382572,0.077433,1.696546e-08,...,0.000002,1.408846e-07,0.000003,5.255986e-07,0.000013,2.015287e-07,0.000013,7.538924e-07,0.000009,0.000004
2010-12-31,33.49470,-85.495289,282.629630,289.888040,0.006217,0.018,0.008064,3.575359,2.761236,101708.214934,0.076814,1.677462e-08,...,0.000003,1.368211e-07,0.000003,5.236686e-07,0.000013,2.020817e-07,0.000013,7.564334e-07,0.000009,0.000004
2010-12-31,33.49470,-85.724953,282.742573,289.944479,0.006202,0.018,0.008244,3.545817,2.735625,101707.906081,0.076745,1.644840e-08,...,0.000003,1.352932e-07,0.000003,5.235801e-07,0.000013,2.032377e-07,0.000013,7.635801e-07,0.000009,0.000004


In [17]:
## group by the valid time dimension. The result is same as above as there is only 1 time.
## this step is necessary to merge with the shape file

summary = year_avg_finer.groupby("valid_time").mean(["latitude", "longitude"])
summary.to_dataframe()

Unnamed: 0_level_0,d2m,t2m,bcaod550,chnk,duaod550,lai_hv,lai_lv,msl,omaod550,pm2p5,...,aermssdul,aermssbchphil,aermssomhphil,aermssbchphob,aermssomhphob,aermsssss,aermssssm,aermssssl,aermsssu,aermssso2
valid_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-12-31,282.576162,289.779605,0.006238,0.018,0.007921,3.563574,2.738629,101708.386647,0.077423,1.683235e-08,...,2e-06,1.382703e-07,3e-06,5.244256e-07,1.3e-05,1.999225e-07,1.3e-05,7.356056e-07,9e-06,4e-06


## In the next notebook, we will create a function to extract values in all counties.