In [1]:
## conda environment Weather_Prediction

import os
import numpy as np
import pandas as pd
import geopandas as gpd
import xarray as xr

### This notebook calculates the fraction of time the 2m temperature was above a ceratain threshold.

In [2]:
data = xr.open_dataset("data_sfc.nc") ## data with 3 hour value for the entire year of 2010
                                      ## of the variables 2m temperature and 2m dew point temperature
data

In [3]:
data.to_dataframe()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,d2m,t2m
valid_time,latitude,longitude,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-01-01 00:00:00,48.75,-125.00,277.681854,279.335510
2010-01-01 00:00:00,48.75,-124.25,276.716034,277.768127
2010-01-01 00:00:00,48.75,-123.50,276.760956,277.172424
2010-01-01 00:00:00,48.75,-122.75,276.804901,276.965393
2010-01-01 00:00:00,48.75,-122.00,274.138885,274.655823
...,...,...,...,...
2010-12-31 21:00:00,24.00,-69.50,287.732727,295.728790
2010-12-31 21:00:00,24.00,-68.75,287.549133,295.571564
2010-12-31 21:00:00,24.00,-68.00,287.462219,295.528595
2010-12-31 21:00:00,24.00,-67.25,287.574524,295.526642


In [4]:
## Lets load the county shapefile

SHAPE_PATH = os.path.join("County_shapefile",'gz_2010_us_050_00_500k.shp')
county_gdf = gpd.read_file(SHAPE_PATH)

county_gdf.head()

Unnamed: 0,GEO_ID,STATE,COUNTY,NAME,LSAD,CENSUSAREA,geometry
0,0500000US01029,1,29,Cleburne,County,560.1,"POLYGON ((-85.38872 33.91304, -85.38088 33.873..."
1,0500000US01031,1,31,Coffee,County,678.972,"POLYGON ((-86.03044 31.61894, -86.00408 31.619..."
2,0500000US01037,1,37,Coosa,County,650.926,"POLYGON ((-86.00928 33.10164, -86.00917 33.090..."
3,0500000US01039,1,39,Covington,County,1030.456,"POLYGON ((-86.34851 30.99434, -86.35023 30.994..."
4,0500000US01041,1,41,Crenshaw,County,608.84,"POLYGON ((-86.14699 31.68045, -86.14711 31.663..."


In [5]:
geometry_column=county_gdf['geometry'] ## get the geometry column
geometry_column

0       POLYGON ((-85.38872 33.91304, -85.38088 33.873...
1       POLYGON ((-86.03044 31.61894, -86.00408 31.619...
2       POLYGON ((-86.00928 33.10164, -86.00917 33.090...
3       POLYGON ((-86.34851 30.99434, -86.35023 30.994...
4       POLYGON ((-86.14699 31.68045, -86.14711 31.663...
                              ...                        
3216    POLYGON ((-66.90748 18.25314, -66.90739 18.253...
3217    POLYGON ((-66.37968 17.94398, -66.38029 17.943...
3218    MULTIPOLYGON (((-66.13957 18.46232, -66.13956 ...
3219    POLYGON ((-66.02917 18.37590, -66.02828 18.376...
3220    POLYGON ((-66.85229 17.95500, -66.85280 17.955...
Name: geometry, Length: 3221, dtype: geometry

In [6]:
lat_lon=geometry_column.get_coordinates(index_parts=True) ## get lat and lon from the geometry
lat_lon

Unnamed: 0,Unnamed: 1,x,y
0,0,-85.388717,33.913044
0,1,-85.380885,33.873508
0,2,-85.379455,33.866291
0,3,-85.377426,33.856047
0,4,-85.376403,33.850656
...,...,...,...
3220,202,-66.833718,17.989763
3220,203,-66.835282,17.988274
3220,204,-66.835429,17.986323
3220,205,-66.836682,17.965971


In [7]:
longitude= lat_lon.loc[(2000), 'x']  # get the longitude values
longitude=longitude.sample(n=10)  ## extract a random sample of 10 longitude

latitude= lat_lon.loc[(2000), 'y']   # get the latitude values
latitude=latitude.sample(n=10)    ## extract a random sample of 10 latitude

lat_list=latitude.tolist()
print(lat_list)
lon_list=longitude.tolist()
print(lon_list)

[47.165901, 47.240452999999995, 46.629928, 47.066548999999995, 46.988997, 47.226476, 46.631209999999996, 46.916083, 46.683972999999995, 47.240235]
[-98.033862, -98.326816, -97.78337499999999, -97.681559, -97.68178499999999, -98.43942799999999, -97.961208, -98.46732399999999, -98.43914, -98.195405]


In [8]:
## find the corresponding values of the variables in the finer grid

year_avg_finer= data.interp(longitude=lon_list, latitude=lat_list)
year_avg_finer

In [9]:
year_avg_finer.to_dataframe()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,d2m,t2m
valid_time,latitude,longitude,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-01-01 00:00:00,47.165901,-98.033862,250.642213,254.060783
2010-01-01 00:00:00,47.165901,-98.326816,250.640397,254.038372
2010-01-01 00:00:00,47.165901,-97.783375,250.725667,254.154624
2010-01-01 00:00:00,47.165901,-97.681559,250.764792,254.197513
2010-01-01 00:00:00,47.165901,-97.681785,250.764705,254.197418
...,...,...,...,...
2010-12-31 21:00:00,47.240235,-98.439428,249.208447,252.182526
2010-12-31 21:00:00,47.240235,-97.961208,249.638591,252.822415
2010-12-31 21:00:00,47.240235,-98.467324,249.185186,252.147991
2010-12-31 21:00:00,47.240235,-98.439140,249.208688,252.182882


In [10]:
## group by the valid time dimension, and take the average in the latitude and longitude

summary = year_avg_finer.groupby("valid_time").mean(["latitude", "longitude"])
summary.to_dataframe()

Unnamed: 0_level_0,d2m,t2m
valid_time,Unnamed: 1_level_1,Unnamed: 2_level_1
2010-01-01 00:00:00,250.838278,254.311444
2010-01-01 03:00:00,249.926309,253.025958
2010-01-01 06:00:00,248.917514,251.652985
2010-01-01 09:00:00,247.499077,250.511276
2010-01-01 12:00:00,245.926678,248.851010
...,...,...
2010-12-31 09:00:00,250.805628,253.950999
2010-12-31 12:00:00,249.674571,252.599750
2010-12-31 15:00:00,248.906027,252.028685
2010-12-31 18:00:00,249.264247,252.591976


In [11]:
type(summary)

xarray.core.dataset.Dataset

In [12]:
df=summary.to_dataframe()
df

Unnamed: 0_level_0,d2m,t2m
valid_time,Unnamed: 1_level_1,Unnamed: 2_level_1
2010-01-01 00:00:00,250.838278,254.311444
2010-01-01 03:00:00,249.926309,253.025958
2010-01-01 06:00:00,248.917514,251.652985
2010-01-01 09:00:00,247.499077,250.511276
2010-01-01 12:00:00,245.926678,248.851010
...,...,...
2010-12-31 09:00:00,250.805628,253.950999
2010-12-31 12:00:00,249.674571,252.599750
2010-12-31 15:00:00,248.906027,252.028685
2010-12-31 18:00:00,249.264247,252.591976


In [13]:
total_rows=len(df)
total_rows

2920

In [14]:
threshold_temp=290 ## threshold temperature. The temperature is in Kelvin

In [15]:
## data points above the threshold temperature
c=0
for i in range(0, total_rows):
    if df['t2m'].values[i] > threshold_temp :
        c=c+1
print(c)

743


In [16]:
## convert the number to a fraction 

frac_time=(c/total_rows)*100
frac_time

25.445205479452053