In [3]:
%%time 
import xarray as xr
fskin = '/network/group/aopp/predict/TIP016_PAXTON_RPSPEEDY/ML4L/ECMWF_files/raw/ERA_skin/sfc_skin_unstructured_2018_01.grib'
fsfc = '/network/group/aopp/predict/TIP016_PAXTON_RPSPEEDY/ML4L/ECMWF_files/raw/ERA_sfc/sfc_unstructured_2018_01.grib'
fskt = '/network/group/aopp/predict/TIP016_PAXTON_RPSPEEDY/ML4L/ECMWF_files/raw/ERA_skt/skt_unstructured_2018_01.grib'


#Can I open all files at once?

ds_skin = xr.open_dataset(fskin,engine='cfgrib',filter_by_keys={'typeOfLevel': 'surface'},backend_kwargs={'indexpath': ''})
ds_sfc = xr.open_dataset(fsfc,engine='cfgrib',filter_by_keys={'typeOfLevel': 'surface'},backend_kwargs={'indexpath': ''})
ds_skt = xr.open_dataset(fskt,engine='cfgrib',filter_by_keys={'typeOfLevel': 'surface'},backend_kwargs={'indexpath': ''})


CPU times: user 1min 35s, sys: 30.6 s, total: 2min 6s
Wall time: 5min 33s


In [5]:
#Process a month of ERA data
ERA = xr.merge([ds_skin, ds_sfc,ds_skt]) #merge together
land_filter = (ERA.lsm > 0.5)
ERA_land = ERA.where(land_filter,drop=True)


In [6]:
#Relabel longitude coordinate to be consistent with MODIS
ERA_land = ERA_land.assign_coords({"longitude": (((ERA_land.longitude + 180) % 360) - 180)})

In [7]:
import pandas as pd
timestamps = pd.to_datetime(ERA_land.time)
ti = timestamps[56] #crop first values



time_filter = (ERA_land.time == ti)
ERA_land_snapshot = ERA_land.where(time_filter,drop=True)



In [8]:
ERA_land_snapshot

In [9]:
import geopandas as gpd
import matplotlib.pyplot as plt

def plot_map(ds,x,y,q): #slow plotting function

    #Takes a ds, converts to pandas df
    
    if isinstance(ds, pd.DataFrame):
        df = ds
    else:
        df = ds.to_dataframe().reset_index()#.dropna()
        
    gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df[x], df[y]))
    
    #Plot the world
    world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
    ax=world.boundary.plot(figsize=(24,12))
    
    gdf.plot(ax=ax,column=q,cmap='plasma',markersize=1,legend=True)
    
    
    plt.show()


In [10]:
def process_MODIS_file(sat_xr,date_string,latitude_bound): 
    
    # Rename spatial dimensions
    sat_xr = sat_xr.rename({'x':'longitude','y':'latitude'})
    
    
    #Filter by latitude
    space_filter = np.expand_dims(np.abs(sat_xr.latitude) < latitude_bound,axis=(0,-1))
    mask = np.logical_and(np.isfinite(sat_xr),space_filter) #make it a 2d mask

    sat_xr = sat_xr.where(mask,drop=True)
    
    #Create time delta to change local to UTC
    time_delta = pd.to_timedelta(sat_xr.longitude.data/15,unit='H') 
    
    #Convert local satellite time to UTC and round to nearest hour
    time = (pd.to_datetime([date_string + " " + local_times[satellite]]*time_delta.shape[0]) - time_delta).round('H')
    

    # Also filter by latitude
    return sat_xr.where(mask,drop=True), time

In [13]:
%%time
import numpy as np
import pandas as pd
import os
satellite='aquaDay'
satellite_folder = '/network/group/aopp/predict/TIP016_PAXTON_RPSPEEDY/ML4L/ECMWF_files/raw/MODIS'


local_times = {"aquaDay":"13:30",
               "terraDay":"10:30",
               "terraNight":"22:30",
               "aquaNight":"01:30"
              }

min_hours = {"aquaDay":2,
            "terraDay":-1,
            "aquaNight":-1,
            "terraNight":11}

max_hours = {"aquaDay":24,
            "terraDay":22,
            "aquaNight":13,
            "terraNight":24}

latitude_bound=70

timestamps = pd.to_datetime(ERA_land.time)

timestamps = timestamps[3:]

#timestamps = timestamps[5:] #crop first values

counter = 0
previous_datestring = None
for t in timestamps:
    
    
    utc_hour = t.hour
    
    
    #Due to crossing of the datetime, some times will be saved different date
    if utc_hour < min_hours[satellite]:
        file_date = t  - np.timedelta64(1,'D')
    elif utc_hour > max_hours[satellite]:
        file_date = t  + np.timedelta64(1,'D')
    else:
        file_date = t
        
        
        
    y = pd.to_datetime(file_date).year
    m = pd.to_datetime(file_date).month
    d = pd.to_datetime(file_date).day
    date_string = f'{y}-{m:02}-{d:02}'
    
    print(t, utc_hour, date_string, previous_datestring)
    
    if date_string != previous_datestring:
        #Close previous file
        try:
            MODIS_data.close()
            print ('Closing file')
        except:
            pass
        
        
        #Open a new file
        print('Opening new file')
        os.path.isfile(fskin)
        MODIS_data = xr.open_dataarray(f'{satellite_folder}/{satellite}_errorGTE03K_04km_{date_string}.tif',engine="rasterio")
        MODIS_data,time = process_MODIS_file(MODIS_data,date_string,latitude_bound)
              
    previous_datestring=date_string

    
    print ('Filtering MODIS')
    # What date/time does the user want?
    target_time = t #np.datetime64(f'{date} {utc_hour:02}:00:00')
        
    # Is this target time in this data array?
    time_filter = np.expand_dims(time == t,axis=(0,1))
    
    # Make this 1d time filter a 2d mask
    mask = np.logical_and(np.isfinite(MODIS_data),time_filter)
    
    #Apply mask to data array
    MODIS_data_snapshot= MODIS_data.where(mask,drop=True) 
    
    #display()
    #plot_map(MODIS_data_snapshot,'longitude', 'latitude', 'band_data')
    
    
    print ('Filtering ERA')
    time_filter = (ERA_land.time == t)
    ERA_land_snapshot = ERA_land.where(time_filter,drop=True)
    
  
    
    #Filter ERA data to those bounds
    delta = 1.0
    bounds = {"latitude_min" :MODIS_data_snapshot.latitude.data.min()-delta,
                  "latitude_max" :MODIS_data_snapshot.latitude.data.max()+delta,
                  "longitude_min":MODIS_data_snapshot.longitude.data.min()-delta,
                  "longitude_max":MODIS_data_snapshot.longitude.data.max()+delta
          }
    
     # Also filter by latitude/longtiude
    longitude_filter = (ERA_land_snapshot.longitude > bounds['longitude_min']) & (ERA_land_snapshot.longitude < bounds['longitude_max'])
    latitude_filter =  (ERA_land_snapshot.latitude > bounds['latitude_min']) & (ERA_land_snapshot.latitude < bounds['latitude_max'])
    ERA_land_snapshot = ERA_land_snapshot.where(longitude_filter & latitude_filter,drop=True)
    

    
    #Database
#     xb = database[["latitude", "longitude"]].to_numpy().astype('float32')
#     xb = xb.copy(order='C') #C-contigious
    
#     #Query
#     xq = query[["latitude", "longitude"]].to_numpy().astype('float32') 
#     xq = xq.copy(order='C')
    
    
    ERA_land_snapshot[["latitude", "longitude"]].to_numpy().astype('float32') 
    
    
    
    counter += 1
    if counter > 5: break

2018-01-01 03:00:00 3 2018-01-01 None
Opening new file
Filtering MODIS
Filtering ERA


AttributeError: 'Dataset' object has no attribute 'to_numpy'

In [51]:
MODIS_data.dropna()

TypeError: dropna() missing 1 required positional argument: 'dim'

In [52]:
MODIS_data

In [40]:
d = {'latitude': np.arange(10), 'longitude': np.arange(10)}
df = pd.DataFrame(data=d)

In [43]:
xb = df[["latitude", "longitude"]].to_numpy().astype('float32') 
display(xb.shape)

(10, 2)

In [44]:
#ERA_land_snapshot[["latitude", "longitude"]].to_numpy().astype('float32') 
xc = np.array([ERA_land_snapshot.latitude.data,ERA_land_snapshot.latitude.data]).astype('float32') 
display(xc.shape)

(2, 2006)

In [47]:
MODIS_data_snapshot.to_dataframe()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,spatial_ref,band_data
band,latitude,longitude,Unnamed: 3_level_1,Unnamed: 4_level_1
1,69.983333,150.016667,0,
1,69.983333,150.050000,0,
1,69.983333,150.083333,0,
1,69.983333,150.116667,0,
1,69.983333,150.150000,0,
1,...,...,...,...
1,-69.983333,164.850000,0,
1,-69.983333,164.883333,0,
1,-69.983333,164.916667,0,
1,-69.983333,164.950000,0,


In [45]:
ERA_land_snapshot.latitude.data.shape

(2006,)

In [38]:
#xb = database[["latitude", "longitude"]].to_numpy().astype('float32')
xb = xb.copy(order='C') #C-contigious

In [39]:
xb

array([[ 70.96014,  70.96014,  70.96014, ..., -70.96014, -70.96014,
        -70.96014],
       [ 70.96014,  70.96014,  70.96014, ..., -70.96014, -70.96014,
        -70.96014]], dtype=float32)

In [12]:
MODIS_data_snapshot,MODIS_data,MODIS_time,previous_datestring = (True,)*4

In [48]:
MODIS_data_snapshot

True

In [None]:

    
#     #...and add this condition to the mask
#     mask = np.logical_and(mask,space_filter)
    
    
#     #Check we have some true values in our mask
#     if mask.sum() == 0:
#         print('There is no appropriate data')
#         return 0
      
    #Apply mask to data array
     
    
    
    
    
    
    
#     #What date/time does the user want?
#     target_time = np.datetime64(f'{date} {utc_hour:02}:00:00')
        
#     #Is this target time in this data array?
#     time_filter = np.expand_dims(time == target_time,axis=(0,1))
    
#     # Make this 1d time filter a 2d mask
#     mask = np.logical_and(np.isfinite(sat_xr),time_filter)

In [63]:

#m0 = f'{m:02}'


In [64]:
date_string

'2018-01-01'

In [65]:
MODIS_data = 


In [66]:
MODIS_data

In [60]:
import xarray as xr
f = '/network/group/aopp/predict/TIP016_PAXTON_RPSPEEDY/ML4L/ECMWF_files/raw/climate.v020/climate.v020/639l_2/clake'
ds = xr.open_dataset(f,engine='cfgrib',filter_by_keys={'typeOfLevel': 'surface'},backend_kwargs={'indexpath': ''})

In [65]:
ds

In [67]:
import xarray as xr
f = '/network/group/aopp/predict/TIP016_PAXTON_RPSPEEDY/ML4L/ECMWF_files/raw/climate.v020/climate.v020/639l_2/sfc'
ds = xr.open_dataset(f,engine='cfgrib',filter_by_keys={'shortName': 'sr'},backend_kwargs={'indexpath': ''})

In [68]:
ds

In [66]:
import xarray as xr
f = '/network/group/aopp/predict/TIP016_PAXTON_RPSPEEDY/ML4L/ECMWF_files/raw/climate.v020/climate.v020/639l_2/sfc'
#ds = xr.open_dataset(f,engine='cfgrib',filter_by_keys={'typeOfLevel': 'surface'},backend_kwargs={'indexpath': ''})


ds = xr.open_dataset(f,
                     engine='cfgrib',
                     backend_kwargs={'filter_by_keys': {'typeOfLevel': 'surface'}})

Can't create file '/network/group/aopp/predict/TIP016_PAXTON_RPSPEEDY/ML4L/ECMWF_files/raw/climate.v020/climate.v020/639l_2/sfc.923a8.idx'
Traceback (most recent call last):
  File "/home/k/kimpson/bin/conda/envs/GPU/lib/python3.7/site-packages/cfgrib/messages.py", line 522, in from_indexpath_or_filestream
    with compat_create_exclusive(indexpath) as new_index_file:
  File "/home/k/kimpson/bin/conda/envs/GPU/lib/python3.7/contextlib.py", line 112, in __enter__
    return next(self.gen)
  File "/home/k/kimpson/bin/conda/envs/GPU/lib/python3.7/site-packages/cfgrib/messages.py", line 488, in compat_create_exclusive
    fd = os.open(path, os.O_WRONLY | os.O_CREAT | os.O_EXCL)
PermissionError: [Errno 13] Permission denied: '/network/group/aopp/predict/TIP016_PAXTON_RPSPEEDY/ML4L/ECMWF_files/raw/climate.v020/climate.v020/639l_2/sfc.923a8.idx'
Can't read index file '/network/group/aopp/predict/TIP016_PAXTON_RPSPEEDY/ML4L/ECMWF_files/raw/climate.v020/climate.v020/639l_2/sfc.923a8.idx'
Traceb