In [1]:
import numpy as np
import pandas as pd
import xarray as xr
import rioxarray
import rioxarray as rxr
import glob
from datetime import datetime
import pytz
from pyproj import Proj, Transformer
import os
from scipy import interpolate

In [2]:
edt = pytz.timezone('US/Eastern')
#mdt = pytz.timezone('US/Mountain')
az = pytz.timezone('US/Arizona')

def toTimezone(dt, to_timezone=pytz.utc, your_timezone=edt):
    return your_timezone.normalize(your_timezone.localize(dt)).astimezone(to_timezone)

In [4]:
times_GOES = pd.read_csv('../../heat_data/GOES_DMV/GOES_times_DMV.csv')

In [5]:
times_GOES = (times_GOES.value/1000).apply(datetime.fromtimestamp).apply(toTimezone)

In [6]:
times_GOES

0        2022-01-01 00:00:20.500000+00:00
1        2022-01-01 00:10:20.500000+00:00
2        2022-01-01 00:20:20.500000+00:00
3        2022-01-01 00:30:20.500000+00:00
4        2022-01-01 00:40:20.500000+00:00
                       ...               
104725   2023-12-31 23:10:20.500000+00:00
104726   2023-12-31 23:20:20.500000+00:00
104727   2023-12-31 23:30:20.500000+00:00
104728   2023-12-31 23:40:20.500000+00:00
104729   2023-12-31 23:50:20.500000+00:00
Name: value, Length: 104730, dtype: datetime64[ns, UTC]

In [7]:
times_Landsat = pd.read_csv('../../heat_data/Landsat_Sentinel_DMV_reflect/Landsat_times_DMV.csv')

In [8]:
times_Landsat = (times_Landsat.value/1000).apply(datetime.fromtimestamp)#.apply(toUTC)

In [9]:
times_Landsat

0    2022-01-08 10:46:41.472
1    2022-01-24 10:46:36.811
2    2022-02-09 10:46:33.801
3    2022-02-25 10:46:28.661
4    2022-03-13 11:46:24.066
5    2022-03-29 11:46:11.301
6    2022-04-14 11:46:17.412
7    2022-04-30 11:46:13.727
8    2022-05-16 11:46:23.184
9    2022-06-01 11:46:27.125
10   2022-06-17 11:46:38.602
11   2022-07-03 11:46:41.752
12   2022-07-19 11:46:43.635
13   2022-08-04 11:46:54.405
14   2022-08-20 11:46:59.186
15   2022-09-05 11:47:00.375
16   2022-09-21 11:47:06.017
17   2022-10-07 11:47:01.930
18   2022-10-23 11:47:03.286
19   2022-11-08 10:47:04.288
20   2022-11-24 10:47:00.122
21   2022-12-10 10:46:58.854
22   2022-12-26 10:46:51.072
23   2023-01-11 10:46:48.979
24   2023-02-28 10:46:30.544
25   2023-03-16 11:46:23.913
26   2023-04-01 11:46:13.064
27   2023-04-17 11:45:58.932
28   2023-05-03 11:45:53.830
29   2023-05-19 11:45:43.340
30   2023-06-04 11:45:48.086
31   2023-06-20 11:45:55.135
32   2023-07-06 11:46:07.055
33   2023-07-22 11:46:10.726
34   2023-08-0

In [10]:
GOES_tif_list = glob.glob('../../heat_data/GOES_DMV/*.tif')
GOES_tif_list

['../../heat_data/GOES_DMV/GOES_image_202201010010.tif',
 '../../heat_data/GOES_DMV/GOES_image_202201010000.tif']

In [11]:
def sort_func(s):
    return int(s.split('image_')[1].split('.tif')[0])

In [12]:
GOES_tif_list = sorted(GOES_tif_list, key=sort_func)
GOES_tif_list

['../../heat_data/GOES_DMV/GOES_image_202201010000.tif',
 '../../heat_data/GOES_DMV/GOES_image_202201010010.tif']

In [13]:
Landsat_tif_list = glob.glob('../../heat_data/Landsat_Sentinel_DMV_reflect/*.tif')
Landsat_tif_list

['../../heat_data/Landsat_Sentinel_DMV_reflect/Landsat_Sentinel_image_202202251546.tif',
 '../../heat_data/Landsat_Sentinel_DMV_reflect/Landsat_Sentinel_image_202203131546.tif',
 '../../heat_data/Landsat_Sentinel_DMV_reflect/Landsat_Sentinel_image_202202091546.tif',
 '../../heat_data/Landsat_Sentinel_DMV_reflect/Landsat_Sentinel_image_202203291546.tif',
 '../../heat_data/Landsat_Sentinel_DMV_reflect/Landsat_Sentinel_image_202201081546.tif',
 '../../heat_data/Landsat_Sentinel_DMV_reflect/Landsat_Sentinel_image_202201241546.tif']

In [14]:
Landsat_tif_list = sorted(Landsat_tif_list, key=sort_func)
Landsat_tif_list

['../../heat_data/Landsat_Sentinel_DMV_reflect/Landsat_Sentinel_image_202201081546.tif',
 '../../heat_data/Landsat_Sentinel_DMV_reflect/Landsat_Sentinel_image_202201241546.tif',
 '../../heat_data/Landsat_Sentinel_DMV_reflect/Landsat_Sentinel_image_202202091546.tif',
 '../../heat_data/Landsat_Sentinel_DMV_reflect/Landsat_Sentinel_image_202202251546.tif',
 '../../heat_data/Landsat_Sentinel_DMV_reflect/Landsat_Sentinel_image_202203131546.tif',
 '../../heat_data/Landsat_Sentinel_DMV_reflect/Landsat_Sentinel_image_202203291546.tif']

In [15]:
def to_binary_string(num):
    if np.isnan(num):
        return np.NaN
    else:
        return f'{int(num):b}'

In [16]:
to_binary_string(55052)

'1101011100001100'

In [17]:
"""
Export coordinates for each urban area
KEY: [utm zone, T/F Northern Hemisphere]
"""
proj_zone = {
    'DMV':[18, True], 'NYC':[18, True], 'Phoenix':[12, True], 'Miami':[17, True], 'Chicago':[16, True], 'Denver':[13, True],
    'Seattle':[10, True], 'San_Francisco':[10, True], 'Los_Angeles':[11, True], 'Atlanta':[16, True], 'Toronto':[17, True],
    'Mexico_City':[14, True], 'Las_Vegas':[11, True], 'Salt_Lake_City':[12, True], 'Dallas':[14, True], 'Houston':[15, True],
    'New_Orleans':[15, True], 'St_Louis':[15, True], 'Minneapolis':[15, True], 'Jacksonville':[17, True], 'Charlotte':[17, True],
    'Philadelphia':[18, True], 'San_Diego':[11, True], 'San_Juan':[19, True], 'Montreal':[18, True], 'Guadalajara':[13, True],
    'Monterrey':[14, True], 'Cancun':[16, True], 'Billings':[12, True], 'Guatemala_City':[15, True], 'San_Jose':[16, True],
    'Havana':[17, True], 'Santo_Domingo':[19, True], 'Tegucigalpa':[16, True], 'Managua':[16, True], 'Panama_City':[17, True],
    'Bogota':[18, True], 'Lima':[18, False], 'Quito':[17, True], # Check hemisphere on this one because it's at the equator
    'Santiago':[19, False], 'Buenos_Aires':[21, False], 'Sao_Paulo':[23, False], 'Manaus':[20, False], 'Punta_Arenas':[19, False],
    'La_Paz':[19, False], 'Montevideo':[21, False], 'Brasilia':[22, False], 'Caracas':[19, True]
}

In [18]:
city = 'DMV'
city_zone = proj_zone[city]
city_zone

[18, True]

In [19]:
# Define UTM18N projection (WGS84 datum)
global utm_proj 
utm_proj = Proj(proj="utm", zone=city_zone[0], datum="WGS84", northern=city_zone[1])

# Sample Code

## Landsat

In [20]:
dsLS = rxr.open_rasterio('/Users/jonstar/Documents/heat_data/Landsat_Sentinel_DMV/Landsat_Sentinel_image_202201081546.tif')
geotiff_dsLS = dsLS.to_dataset('band')

In [21]:
geotiff_dsLS

In [23]:
#coords = geotiff_dsLS.stack(utm_coords=('x','y')).utm_coords.values
#coords

In [24]:
def stacked_to_latlon(pt):
    return utm_proj(pt[0], pt[1], inverse=True)

In [25]:
#latlon_pts = np.array(list(map(stacked_to_latlon, coords)))

In [26]:
#latlon_pts = latlon_pts.reshape((3000,3000,2))
#latlon_pts

In [27]:
#xr.DataArray(latlon_pts, dims=['utm_x','utm_y','latlon_pts']).to_netcdf('../../heat_data/latlons_processed/DMV_latlons.nc')

In [28]:
#ll = xr.open_dataarray('../../heat_data/latlons_processed/DMV_latlons.nc')

In [29]:
# For Landsat/Sentinel tifs
geotiff_dsLS = geotiff_dsLS.rename({1:'Blue_sfc_reflectance', 2:'Green_sfc_reflectance', 3:'Red_sfc_reflectance', 4:'NIR_sfc_reflectance',
                                    5:'SWIR1_sfc_reflectance', 6:'SWIR2_sfc_reflectance', 7:'Landsat_LST', 8:'Landsat_Cloud_Mask',
                                    9:'VV_SAR', 10:'VH_SAR', 11:'SAR_angle'})

In [32]:
geotiff_dsLS['VV_SAR'].values

array([[ -9.92071265, -10.84745936, -11.83182635, ..., -10.03776755,
         -9.29846014, -11.89384557],
       [-14.50690736, -14.34508118, -10.66607685, ..., -12.11965069,
         -9.99666216, -10.81896107],
       [-15.69751381, -16.04306312, -14.71314861, ..., -11.0500571 ,
        -13.63753225, -13.88801518],
       ...,
       [-10.02668964,  -9.24007812,  -7.98450085, ..., -20.62141482,
        -20.78696402, -18.63715653],
       [ -9.77686224,  -9.75350271,  -8.04629463, ..., -22.71200549,
        -19.72524323, -18.40786058],
       [ -9.46218831,  -7.93371171, -12.62253956, ..., -19.26779407,
        -21.56325403, -19.21791073]])

In [33]:
geotiff_dsLS['Landsat_Cloud_Mask'].values

array([[29986., 29986., 30242., ..., 24082., 22280., 22280.],
       [29986., 29986., 29986., ..., 23826., 24082., 24082.],
       [30048., 30048., 29986., ..., 24082., 22280., 22280.],
       ...,
       [22280., 24082., 22280., ..., 21952., 21952., 21952.],
       [23826., 24082., 24082., ..., 21952., 21952., 21952.],
       [24082., 30242., 30242., ..., 21952., 21952., 21952.]])

In [34]:
binary_vectorized = np.vectorize(to_binary_string)
mask = binary_vectorized(geotiff_dsLS.Landsat_Cloud_Mask.values)

In [35]:
mask

array([['111010100100010', '111010100100010', '111011000100010', ...,
        '101111000010010', '101011100001000', '101011100001000'],
       ['111010100100010', '111010100100010', '111010100100010', ...,
        '101110100010010', '101111000010010', '101111000010010'],
       ['111010101100000', '111010101100000', '111010100100010', ...,
        '101111000010010', '101011100001000', '101011100001000'],
       ...,
       ['101011100001000', '101111000010010', '101011100001000', ...,
        '101010111000000', '101010111000000', '101010111000000'],
       ['101110100010010', '101111000010010', '101111000010010', ...,
        '101010111000000', '101010111000000', '101010111000000'],
       ['101111000010010', '111011000100010', '111011000100010', ...,
        '101010111000000', '101010111000000', '101010111000000']],
      dtype='<U16')

In [39]:
geotiff_dsLS['Landsat_Cloud_Mask'] = (('y', 'x'), mask)

In [40]:
geotiff_dsLS['Landsat_Cloud_Mask'].attrs['bitmask_key'] = "Bit 0: Fill\nBit 1: Dilated Cloud\nBit 2: Cirrus (high confidence)\nBit 3: Cloud\nBit 4: Cloud Shadow\n\
Bit 5: Snow\nBit 6: Clear\n    0: Cloud or Dilated Cloud bits are set\n    1: Cloud and Dilated Cloud bits are not set\nBit 7: Water\n\
Bits 8-9: Cloud Confidence\n    0: None\n    1: Low\n    2: Medium\n    3: High\n\
Bits 10-11: Cloud Shadow Confidence\n    0: None\n    1: Low\n    2: Medium\n    3: High\n\
Bits 12-13: Snow/Ice Confidence\n    0: None\n    1: Low\n    2: Medium\n    3: High\n\
Bits 14-15: Cirrus Confidence\n    0: None\n    1: Low\n    2: Medium\n    3: High"

In [42]:
geotiff_dsLS

In [47]:
geotiff_dsLS = geotiff_dsLS.assign_coords({'datetime':str(times_Landsat[0])})

In [48]:
geotiff_dsLS

In [49]:
geotiff_dsLS.to_netcdf('../../heat_data/Landsat_Sentinel_DMV_processed/processed_Landsat_0.nc')

## GOES

In [119]:
dsG = rxr.open_rasterio('/Users/jonstar/Documents/heat_data/GOES_DMV/GOES_image_202201010000.tif')
#dsG = dsG.reindex(y=dsG.y[::-1])
geotiff_dsG = dsG.to_dataset('band')

In [121]:
geotiff_dsG

In [122]:
# For GOES tifs
geotiff_dsG = geotiff_dsG.rename({1:'GOES_C13_LWIR', 2:'GOES_C14_LWIR',
                                3:'GOES_C15_LWIR', 4:'GOES_C16_LWIR'})

In [123]:
geotiff_dsG['GOES_C16_LWIR'].values

array([[277.22007993, 276.74828503, 275.88332773, ..., 274.90042171,
        275.88332773, 276.86623376],
       [278.75341333, 277.69187482, 276.35512263, ..., 274.90042171,
        275.88332773, 276.86623376],
       [279.77563559, 278.6354646 , 278.16366971, ..., 275.41153284,
        277.69187482, 277.22007993],
       ...,
       [284.65084948, 284.13973834, 284.21837083, ..., 281.7021314 ,
        281.7021314 , 280.79785786],
       [284.65084948, 284.13973834, 283.78589217, ..., 281.7021314 ,
        281.7021314 , 280.79785786],
       [283.78589217, 283.58931097, 283.58931097, ..., 282.80298615,
        281.82008012, 281.15170403]])

In [50]:
#coords = geotiff_dsG.stack(utm_coords=('y','x')).utm_coords.values
#coords

In [51]:
#latlon_pts_2km_1d = np.array(list(map(stacked_to_latlon, coords)))

In [52]:
#latlon_pts_2km = latlon_pts_2km_1d.reshape((45,45,2))
#latlon_pts_2km

In [18]:
geotiff_dsG = geotiff_dsG.assign_coords({'datetime':str(times_GOES[0])})

In [19]:
geotiff_dsG

### Working microwave into GOES

In [20]:
dsMW = xr.open_dataset('/Users/jonstar/Documents/heat_data/mw_data/MW_LST_DTC_20230531_x1y.h5')

In [21]:
dsMW

In [22]:
dsMW = dsMW.assign_coords(
    datetime=(
        "phony_dim_0", pd.date_range(start="20230531", periods=96, freq="15min")
    ),
    longitude=(
        "phony_dim_1", np.arange(-180,180,0.25)
    ),
    latitude=(
        "phony_dim_2", np.arange(-60,90,0.25)[::-1]
    ))

In [23]:
#dsMW = dsMW.rename({'phony_dim_0':'datetime', 'phony_dim_1':'longitude', 'phony_dim_2':'latitude'})#.set_index(('datetime', 'longitude', 'latitude'))

In [23]:
dsMW['datetime'][75]

In [25]:
import matplotlib.pyplot as plt
for i in list(range(20)):
    #cb = plt.pcolormesh(dsMW['TB37V_LST_DTC'][i], vmin=13000, vmax=15000)
    dsMW['TB37V_LST_DTC'].isel(datetime=i).plot.pcolormesh("longitude", "latitude", vmin=13500, vmax=15500, figsize=(10,5))
    #plt.colorbar(cb)
    plt.title(dsMW['datetime'][i])
    #if i%24 == 0:
        #plt.savefig(f'../../photos/mw_{i}.jpg')
    plt.show()

ValueError: Dimensions {'datetime'} do not exist. Expected one or more of ('phony_dim_0', 'phony_dim_1', 'phony_dim_2')

In [24]:
np.max(latlon_pts_2km[:,:,0]), np.min(latlon_pts_2km[:,:,0]), np.max(latlon_pts_2km[:,:,1]), np.min(latlon_pts_2km[:,:,1])

(np.float64(-76.3682743333849),
 np.float64(-77.40623787635198),
 np.float64(39.48217480279245),
 np.float64(38.67320868993897))

In [25]:
def get_next_latlon_coord(n, above=True):
    if above:
        return np.ceil(n*4)/4
    else:
        return np.floor(n*4)/4

In [26]:
np.where(dsMW['latitude'] == 38.75)[0][0]

np.int64(204)

In [27]:
dsMW['TB37V_LST_DTC']

In [28]:
mw_dataArray = dsMW['TB37V_LST_DTC'][75,410:416,201:206] # DMV
mw_dataArray

In [145]:
#geotiff_dsG.coords['latlons'] = (('y', 'x', 'pts'), latlon_pts_2km)
geotiff_dsG

In [90]:
mw_latlons = np.array(list(mw_dataArray.stack(coords=('longitude','latitude'))['coords'].values))
#mw_latlons = np.array(list(mw_dataArray.stack(coords=('latitude','longitude'))['coords'].values))

In [139]:
latlon_pts_2km

array([[[-77.40623788,  39.46550554],
        [-77.38300815,  39.46598414],
        [-77.35977786,  39.4664581 ],
        ...,
        [-76.4301761 ,  39.48161212],
        [-76.40692784,  39.48189578],
        [-76.38367924,  39.4821748 ]],

       [[-77.40561771,  39.44750017],
        [-77.38239395,  39.44797846],
        [-77.35916964,  39.44845212],
        ...,
        [-76.42980722,  39.4635965 ],
        [-76.40656494,  39.46387998],
        [-76.38332234,  39.46415882]],

       [[-77.40499809,  39.42949473],
        [-77.3817803 ,  39.42997272],
        [-77.35856196,  39.43044608],
        ...,
        [-76.42943866,  39.44558082],
        [-76.40620237,  39.44586412],
        [-76.38296576,  39.44614278]],

       ...,

       [[-77.38066328,  38.70922486],
        [-77.35767988,  38.7096908 ],
        [-77.33469595,  38.71015222],
        ...,
        [-76.41496415,  38.72490543],
        [-76.39196297,  38.72518159],
        [-76.36896147,  38.72545323]],

       [[-77.38

In [170]:
y, x = np.meshgrid(mw_dataArray['latitude'], mw_dataArray['longitude'])
mw_latlons = np.stack((x,y)).T.reshape(-1,2)
mw_latlons

array([[-77.5 ,  39.5 ],
       [-77.25,  39.5 ],
       [-77.  ,  39.5 ],
       [-76.75,  39.5 ],
       [-76.5 ,  39.5 ],
       [-76.25,  39.5 ],
       [-77.5 ,  39.25],
       [-77.25,  39.25],
       [-77.  ,  39.25],
       [-76.75,  39.25],
       [-76.5 ,  39.25],
       [-76.25,  39.25],
       [-77.5 ,  39.  ],
       [-77.25,  39.  ],
       [-77.  ,  39.  ],
       [-76.75,  39.  ],
       [-76.5 ,  39.  ],
       [-76.25,  39.  ],
       [-77.5 ,  38.75],
       [-77.25,  38.75],
       [-77.  ,  38.75],
       [-76.75,  38.75],
       [-76.5 ,  38.75],
       [-76.25,  38.75],
       [-77.5 ,  38.5 ],
       [-77.25,  38.5 ],
       [-77.  ,  38.5 ],
       [-76.75,  38.5 ],
       [-76.5 ,  38.5 ],
       [-76.25,  38.5 ]])

In [175]:
mw_vals = mw_dataArray.T.values.reshape(-1)
mw_vals

array([14636, 14570, 14495, 14460, 14476, 14615, 14701, 14525, 14343,
       14313, 14461, 15071, 14595, 14453, 14068, 14062, 14456, 14739,
       14606, 14481, 13985,     0, 14507, 14647, 14590, 14406,     0,
           0,     0, 14875], dtype=uint16)

In [180]:
mw_interpolated = interpolate.griddata(mw_latlons, mw_vals, latlon_pts_2km)
mw_interpolated

array([[14605.03779733, 14598.99129599, 14592.94381475, ...,
        14548.3615804 , 14560.77021219, 14573.18750016],
       [14601.63310519, 14595.58812599, 14589.54216755, ...,
        14581.42717547, 14593.83280522, 14606.24708559],
       [14598.22854832, 14592.18508986, 14586.14065284, ...,
        14614.49269388, 14626.89532443, 14639.30660001],
       ...,
       [14543.72202898, 14532.26015057, 14520.79771881, ...,
        13098.43232082, 13127.33802609, 13155.98141645],
       [14542.27087355, 14530.81185264, 14519.35227884, ...,
        12053.07780414, 12081.97002085, 12110.60009041],
       [14540.81998086, 14529.36381492, 14517.90709651, ...,
        11007.7197192 , 11036.5984553 , 11065.21521194]])

In [181]:
geotiff_dsG['microwave_LST'] = (('y','x'), mw_interpolated)

In [182]:
geotiff_dsG = geotiff_dsG.assign_attrs(long_name='(\'CMI_C13\', \'CMI_C14\', \'CMI_C15\', \'CMI_C16\', \'mw_LST\')')

In [183]:
geotiff_dsG = geotiff_dsG.reindex(y=geotiff_dsG.y[::-1])

In [184]:
geotiff_dsG

In [94]:
geotiff_dsG.to_netcdf('../../heat_data/GOES_DMV_2km/processed_GOES_2km_0')

## Export and re-open

In [226]:
# Export the data
#geotiff_dsLS.rio.to_raster("../../Landsat_Sentinel_DMV_processed/Landsat_sample.tif")
#geotiff_dsG.rio.to_raster("../../Landsat_Sentinel_DMV_processed/GOES_sample.tif")
geotiff_dsG.to_netcdf("../../Landsat_Sentinel_DMV_processed/GOES_sample.nc")

In [95]:
ds = xr.open_dataset('../../heat_data/GOES_DMV_2km/processed_GOES_2km_0')
ds

In [63]:
rioxarray.open_rasterio("../../GOES_DMV_processed/processed_GOES_tif_0.tif")

## Processing Functions

In [25]:
"""
Processing of individual .tif files.

Performs a variety of tasks on the data to make it more easy to read and understand.

Attributes:
    tif (str): Path where tif file is located.
    time (datetime or str): Date and time of when the data was collected.
    name (str): Desired name of output file. Just the name, leave out the ".format" part at the end.
    coord_bounds (tuple or list, optional): Coordinate bounds if you wish to filter the data by location. Order should be
                                    (longitude minimum, longitude maximum, latitude minimum, latitude maximum).
    save_as (str, optional): Format to save the processed data in. For now, only 'nc' is built-in. Anything else
                                    simply returns the data.
"""
def process_Landsat_Sentinel_tif(tif, time, name, coord_bounds=None, save_as='nc'):
    #########################################################################################################
    # Open file and rename variables
    dsLS = rxr.open_rasterio(tif)
    dsLS = dsLS.reindex(y=dsLS.y[::-1])
    geotiff_ds = dsLS.to_dataset('band')

    geotiff_ds = geotiff_ds.rename({1:'Landsat_LST', 2:'Landsat_Cloud_Mask', 3:'VV_SAR', 4:'VH_SAR', 5:'SAR_angle'})

    #########################################################################################################
    # Convert cloud mask integers to binary strings
    binary_vectorized = np.vectorize(to_binary_string)
    mask = binary_vectorized(geotiff_ds.Landsat_Cloud_Mask.values)
    geotiff_ds['Landsat_Cloud_Mask'] = (('y', 'x'), mask)

    #cloud_conf_substring = lambda t: t[-10:-8]
    #vfunc = np.vectorize(cloud_conf_substring)
    #cloud_conf = vfunc(mask)
    #cloud_conf_int = (cloud_conf == '11').astype(int)*3
    #cloud_conf_int += (cloud_conf == '10')*2
    #cloud_conf_int += (cloud_conf == '01')
    #cloud_conf_str = cloud_conf_int.astype(str)

    #cloud_shadow_substring = lambda t: t[-12:-10]
    #vfunc = np.vectorize(cloud_shadow_substring)
    #cloud_shadow = vfunc(mask)
    #cloud_shadow_int = (cloud_shadow == '11').astype(int)*3
    #cloud_shadow_int += (cloud_shadow == '10')*2
    #cloud_shadow_int += (cloud_shadow == '01')
    #cloud_shadow_str = cloud_shadow_int.astype(str)

    #snow_ice_substring = lambda t: t[-14:-12]
    #vfunc = np.vectorize(snow_ice_substring)
    #snow_ice = vfunc(mask)
    #snow_ice_int = (snow_ice == '11').astype(int)*3
    #snow_ice_int += (snow_ice == '10')*2
    #snow_ice_int += (snow_ice == '01')
    #snow_ice_str = snow_ice_int.astype(str)

    #cirrus_conf_substring = lambda t: t[-16:-14]
    #vfunc = np.vectorize(cirrus_conf_substring)
    #cirrus_conf = vfunc(mask)
    #cirrus_conf_int = (cirrus_conf == '11').astype(int)*3
    #cirrus_conf_int += (cirrus_conf == '10')*2
    #cirrus_conf_int += (cirrus_conf == '01')
    #cirrus_conf_str = cirrus_conf_int.astype(str)

    #dilated_cloud_substring = lambda t: t[-2]
    #vfunc = np.vectorize(dilated_cloud_substring)
    #dilated_cloud = vfunc(mask)

    #clear_substring = lambda t: t[-7]
    #vfunc = np.vectorize(clear_substring)
    #clear = vfunc(mask)

    #geotiff_ds['Landsat_Cloud_Confidence'] = (('y', 'x'), cloud_conf_str)
    #geotiff_ds['Landsat_Cloud_Shadow_Confidence'] = (('y', 'x'), cloud_shadow_str)
    #geotiff_ds['Landsat_Snow_Ice_Confidence'] = (('y', 'x'), snow_ice_str)
    #geotiff_ds['Landsat_Cirrus_Confidence'] = (('y', 'x'), cirrus_conf_str)
    #geotiff_ds['Landsat_Dilated_Cloud'] = (('y', 'x'), dilated_cloud)
    #geotiff_ds['Landsat_Clear_Pixel'] = (('y', 'x'), clear)

    #geotiff_ds = geotiff_ds.drop_vars('Landsat_Cloud_Mask')

    #########################################################################################################
    # Assign descriptive attributes for the cloud mask and a new coordinate for the file datetime
    geotiff_ds['Landsat_Cloud_Mask'].attrs['bitmask_key'] = "Bit 0: Fill\nBit 1: Dilated Cloud\nBit 2: Cirrus (high confidence)\nBit 3: Cloud\nBit 4: Cloud Shadow\n\
    Bit 5: Snow\nBit 6: Clear\n    0: Cloud or Dilated Cloud bits are set\n    1: Cloud and Dilated Cloud bits are not set\nBit 7: Water\n\
    Bits 8-9: Cloud Confidence\n    0: None\n    1: Low\n    2: Medium\n    3: High\n\
    Bits 10-11: Cloud Shadow Confidence\n    0: None\n    1: Low\n    2: Medium\n    3: High\n\
    Bits 12-13: Snow/Ice Confidence\n    0: None\n    1: Low\n    2: Medium\n    3: High\n\
    Bits 14-15: Cirrus Confidence\n    0: None\n    1: Low\n    2: Medium\n    3: High"

    #geotiff_ds = geotiff_ds.assign_attrs(Datetime=str(time))
    geotiff_ds = geotiff_ds.assign_coords({'datetime':str(time)})
    #geotiff_ds = geotiff_ds.assign_attrs(long_name='(\'ST_B10\', \'VV\', \'VH\', \'angle\', \'QA_PIXEL\')')
    
    #########################################################################################################
    # Optional filtering by lat/lon
    if coord_bounds:
        geotiff_ds = geotiff_ds.sel(longitude=slice(coord_bounds[0], coord_bounds[1])).sel(latitude=slice(coord_bounds[3], coord_bounds[2]))

    #########################################################################################################
    # Export
    if save_as=='nc':
        # Convert file into netCDF
        geotiff_ds.to_netcdf(f'{name}.nc')
    else:
        #print(geotiff_ds)
        geotiff_ds.rio.to_raster(f'{name}.tif')

In [28]:
"""
Processing of individual .tif files.

Performs a variety of tasks on the data to make it more easy to read and understand.

Attributes:
    tif (str): Path where tif file is located.
    time (datetime or str): Date and time of when the data was collected.
    name (str): Desired name of output file. Just the name, leave out the ".format" part at the end.
    latlon_pts (float array): (3000,3000,2) Array of (longitude, latitude) points at each point on the utm grid.
    local_timezone (pytz timezone): Local timezone of the area you are creating data for
    coord_bounds (tuple or list, optional): Coordinate bounds if you wish to filter the data by location. Order should be
                                    (longitude minimum, longitude maximum, latitude minimum, latitude maximum).
    save_as (str, optional): Format to save the processed data in. For now, only 'nc' is built-in. Anything else
                                    simply returns the data.
"""
def process_GOES_tif(tif, time, name, latlon_pts, local_timezone, coord_bounds=None, save_as='nc'):
    #########################################################################################################
    # Open file and rename variables
    dsG = rxr.open_rasterio(tif)
    geotiff_ds = dsG.to_dataset('band')

    geotiff_ds = geotiff_ds.rename({1:'GOES_C13_LWIR', 2:'GOES_C14_LWIR',
                                      3:'GOES_C15_LWIR', 4:'GOES_C16_LWIR'})

    #geotiff_ds = geotiff_ds.assign_attrs(Datetime=str(time))
    geotiff_ds = geotiff_ds.assign_coords({'datetime':str(toTimezone(time, pytz.utc))})
    geotiff_ds = geotiff_ds.assign_attrs(long_name='(\'CMI_C13\', \'CMI_C14\', \'CMI_C15\', \'CMI_C16\', \'mw_LST\')')

    #########################################################################################################
    # Process microwave data
    local_dt = toTimezone(time, local_timezone)
    date_str = str(local_dt)[0:4] + str(local_dt)[5:7] + str(local_dt)[8:10]
    time_index = local_dt.hour*4 + round(local_dt.minute/15+local_dt.second/60) # Used in selection of datetime index from mw file
    
    dsMW = xr.open_dataset(f'/Users/jonstar/Documents/heat_data/mw_data/MW_LST_DTC_{date_str}_x1y.h5')
    dsMW = dsMW.assign_coords(
                datetime=("phony_dim_0", pd.date_range(start=date_str, periods=96, freq="15min")),
                longitude=("phony_dim_1", np.arange(-180,180,0.25)),
                latitude=("phony_dim_2", np.arange(-60,90,0.25)[::-1]))
    #dsMW = dsMW.rename({'phony_dim_0':'datetime', 'phony_dim_1':'longitude', 'phony_dim_2':'latitude'})

    def get_next_latlon_coord(n, above=True):
        if above:
            return np.ceil(n*4)/4
        else:
            return np.floor(n*4)/4

    max_lon_index = np.where(dsMW['longitude'] == get_next_latlon_coord(np.max(latlon_pts[:,:,0]), True))[0][0]
    min_lon_index = np.where(dsMW['longitude'] == get_next_latlon_coord(np.min(latlon_pts[:,:,0]), False))[0][0]
    max_lat_index = np.where(dsMW['latitude'] == get_next_latlon_coord(np.max(latlon_pts[:,:,1]), True))[0][0]
    min_lat_index = np.where(dsMW['latitude'] == get_next_latlon_coord(np.min(latlon_pts[:,:,1]), False))[0][0]

    # Create microwave array for specific area
    # Remember: latitude decreases with index
    mw_clipped = dsMW['TB37V_LST_DTC'][time_index,min_lon_index:max_lon_index+1,max_lat_index:min_lat_index+1]

    def interpolate_mw(mw_DataArray, latlon_pts):
        # Initialize array of zeros with shape of desired array
        mw_values = np.zeros((45,45)).astype('uint16')
    
        # Loop through points in microwave array and add values
        # based on their closeset lat/lon points in the utm array
        for x in list(range(len(mw_DataArray['longitude']))):
            for y in list(range(len(mw_DataArray['latitude']))):
                mw_values += np.logical_and(np.abs(latlon_pts[:,:,0] - mw_DataArray['longitude'][x].values) < 0.125,
                                            np.abs(latlon_pts[:,:,1] - mw_DataArray['latitude'][y].values) < 0.125)\
                                            *mw_DataArray[x,y].values
        return mw_values

    mw_vals = interpolate_mw(mw_clipped, latlon_pts)
    geotiff_ds['microwave_LST'] = (('y','x'), mw_vals.T)

    # Flip coordinates so latitude increases with index
    geotiff_ds = geotiff_ds.reindex(y=geotiff_ds.y[::-1])

    #########################################################################################################
    # Optional filtering by lat/lon
    if coord_bounds:
        geotiff_ds = geotiff_ds.sel(longitude=slice(coord_bounds[0], coord_bounds[1])).sel(latitude=slice(coord_bounds[3], coord_bounds[2]))

    #########################################################################################################
    if save_as=='nc':
        # Convert file into netCDF
        geotiff_ds.to_netcdf(f'{name}.nc')
    else:
        geotiff_ds.rio.to_raster(f'{name}.tif')

In [26]:
# Runs processing function on all the geotif files
#coord_bounds = [-76.761259, -76.404021, 39.202514, 39.422284]

for i in list(range(1)):
    i += 3
    num = i+8
    #print(i, num)
    time = toTimezone(times_Landsat.value[num], pytz.utc)
    process_Landsat_Sentinel_tif(Landsat_tif_list[i], time, f'../../heat_data/Landsat_Sentinel_DMV_processed/processed_Landsat_tif_{time.strftime('%Y%m%d%H%M')}')

In [41]:
# Runs processing function on all the geotif files
#coord_bounds = [-76.761259, -76.404021, 39.202514, 39.422284]

#for i in list(range(len(GOES_tif_list))):
for i in list(range(1)):
    #i += 170
    dt = times_GOES.value[i]
    process_GOES_tif(GOES_tif_list[i], dt,
                     f'../../heat_data/GOES_DMV_processed/processed_GOES_tif_{toTimezone(dt,pytz.utc).strftime('%Y%m%d%H%M')}',
                     latlon_pts_2km, az)

In [157]:
ds = xr.open_dataset('../../heat_data/GOES_DMV_processed/processed_GOES_tif_0.nc')

In [158]:
ds

In [None]:
######################################################
# End of tif processing part
# Below is gridding of Baltimore

In [31]:
grid_lats = [39.2576, 39.3147, 39.3718]
grid_lons = [-76.5299, -76.6204, -76.7108]

In [37]:
grid1 = geotiff_ds.sel(longitude=slice(grid_lons[2], grid_lons[1])).sel(latitude=slice(grid_lats[2], grid_lats[1]))
grid2 = geotiff_ds.sel(longitude=slice(grid_lons[1], grid_lons[0])).sel(latitude=slice(grid_lats[2], grid_lats[1]))
grid3 = geotiff_ds.sel(longitude=slice(grid_lons[2], grid_lons[1])).sel(latitude=slice(grid_lats[1], grid_lats[0]))
grid4 = geotiff_ds.sel(longitude=slice(grid_lons[1], grid_lons[0])).sel(latitude=slice(grid_lats[1], grid_lats[0]))

In [38]:
grid4