## Create Seviri time series - full image 

Step by step full image prediction:
* get matching seviri files in time range
* read and concat seviri files

In [11]:
import warnings
warnings.filterwarnings('ignore')

In [12]:
import json
import sys
import os
from datetime import datetime, date, timedelta
import xarray as xr
import pandas as pd
import numpy as np
from pyproj import CRS, Transformer
from satpy import find_files_and_readers

In [13]:
import socket
import sys

# get host name to check if we are on euler/daint/iac
hostname = socket.gethostname()
print(hostname)
if hostname.startswith("eu"):
    host = "euler"
else:
    host = "iac"

if host == "euler":
    sys.path.append("/cluster/home/kjeggle/sat-cirrus")
    sys.path.append("/cluster/home/kjeggle/sat-cirrus/data")
else:
    sys.path.append("/home/kjeggle/sat-cirrus")
    sys.path.append("/home/kjeggle/sat-cirrus/data")

iacdipl-7


In [14]:
# todo - these functions are copied from code of the paper amell et al. 2021
from data.data_module import read_seviri_native_file, _resolve_serialisation

In [15]:
def read_and_crop_seviri_data(filepath, crop=None) -> xr.Dataset:
    """reads and crops spatial extent
    
    args:
        filepath (str):
        crop (tuple): (min, max)
    """
    ds = read_seviri_native_file(filepath)
    ds = _resolve_serialisation(ds)
    ds = add_lonlat(ds)
    if crop:
        lonmin,lonmax,latmin,latmax=crop
        ds = ds.sel(x=slice(lonmax,lonmin),y=slice(latmin,latmax))
        
    return ds
    

In [16]:
def add_lonlat(patch: xr.Dataset,
               hor_res: float = 0.01,
               transformer = None):
    """adds lon / lat to patch"""
    if transformer is None:
        # transform lon/lat to cartesian coordinates [m]
        transformer = Transformer.from_crs(seviri_proj, era5_proj, always_xy=True)
        
    patch_lons, patch_lats = transformer.transform(patch.x.values, patch.y.values)
    
    patch = patch.assign(lon=xr.DataArray(patch_lons, dims="x", coords={'x': patch.x.values}),
                         lat=xr.DataArray(patch_lats, dims="y", coords={'y': patch.y.values}))

    patch = patch.assign(latr=lambda x: np.round((np.round(x.lat * (1 / hor_res)) * hor_res).astype('float64'), 4))
    patch = patch.assign(lonr=lambda x: np.round((np.round(x.lon * (1 / hor_res)) * hor_res).astype('float64'), 4))
    
    return patch

In [17]:
def create_seviri_timeseries(start_time, 
                             end_time, 
                             domain=(-2.732e+06,2.732e+06,-2.996e+06,2.996e+06),
                             seviri_source_dir="/net/n2o/wolke_scratch2/kjeggle/SEVIRI"):
    """
    Args:
        start_time (datetime)
        end_time (datetime)
        domain (tuple): (lonmin,lonmax,latmin,latmax)
        seviri_source_dir (str)
    
    
    """   
    # get available seviri files
    my_files = find_files_and_readers(base_dir=seviri_source_dir,
                                      reader='seviri_l1b_native',
                                      start_time=start_time,
                                      end_time=end_time)["seviri_l1b_native"]
    my_files = np.sort(my_files)
    print(f"{len(my_files)} SEVIRI files found")
    
    # read seviri files and concat along sensing stop dimension
    ds_list = [read_and_crop_seviri_data(fpath, crop=domain) for fpath in my_files]
    print("read all seviri files")
    
    # concat files
    ds = xr.concat(ds_list,dim="sensing_stop")
    print("concatenated all files along time dim")
    
    # remove some parameters -> necessary to be able to save as .nc file
    for i in range(1,12):
        try:
            del ds[f"ch{i}"].attrs['orbital_parameters']
        except KeyError:
            pass
        try:
            del ds[f"ch{i}"].attrs['time_parameters']
            #print(f"deleted time_parameters attribute from ch{i}") 
        except KeyError:
            pass
    
    return ds

In [18]:
def create_and_save_seviri_timeseries(date_obj,
                                      domain=(-2.732e+06,2.732e+06,-2.996e+06,2.996e+06),
                                      seviri_source_dir="/net/n2o/wolke_scratch2/kjeggle/SEVIRI",
                                      target_dir="/net/n2o/wolke_scratch2/kjeggle/VerticalCloud/DataSmallDomain/SeviriWholeAreaInput/"):
    print("create seviri timeseries for", date_obj)
    
    start_time = datetime.combine(date_obj, datetime.min.time())
    end_time = start_time + timedelta(hours=23,minutes=59)
    
    ds = create_seviri_timeseries(start_time, end_time, domain, seviri_source_dir)
    
    date_str = datetime.strftime(start_time.date(),"%Y%m%d")
    fname = f"seviri_timeseries_{date_str}.nc"
    print(f"save seviri timeseries ({start_time} - {end_time}) to {fname}")
    ds.to_netcdf(os.path.join(target_dir,fname))
    print("\n")
    print("#############")
    print("\n")

In [19]:
era5_proj = CRS.from_string('EPSG:4326') # latlon
SEVIRI_PROJECTION_SOURCE = "./data/seviri_proj.json"

with open(SEVIRI_PROJECTION_SOURCE, "r") as json_file:
    data = json.load(json_file)
seviri_proj = CRS.from_json_dict(data)

In [20]:
# create and save dataset for range of dates
start_date = date(2010,6,1)
end_date = date(2010,12,31)
date_list = [start_date + timedelta(days=x) for x in range((end_date-start_date).days+1)]

for d in date_list:
    create_and_save_seviri_timeseries(d)

create seviri timeseries for 2010-06-01
96 SEVIRI files found
read all seviri files
concatenated all files along time dim
save seviri timeseries (2010-06-01 00:00:00 - 2010-06-01 23:59:00) to seviri_timeseries_20100601.nc


#############


create seviri timeseries for 2010-06-02
96 SEVIRI files found
read all seviri files
concatenated all files along time dim
save seviri timeseries (2010-06-02 00:00:00 - 2010-06-02 23:59:00) to seviri_timeseries_20100602.nc


#############


create seviri timeseries for 2010-06-03
96 SEVIRI files found
read all seviri files
concatenated all files along time dim
save seviri timeseries (2010-06-03 00:00:00 - 2010-06-03 23:59:00) to seviri_timeseries_20100603.nc


#############


create seviri timeseries for 2010-06-04
96 SEVIRI files found
read all seviri files
concatenated all files along time dim
save seviri timeseries (2010-06-04 00:00:00 - 2010-06-04 23:59:00) to seviri_timeseries_20100604.nc


#############


create seviri timeseries for 2010-06-05


OSError: [Errno 512] Unknown error 512: '/net/n2o/wolke_scratch2/kjeggle/SEVIRI/MSG2-SEVI-MSG15-0100-NA-20101124004242.462000000Z-NA.nat'

### prep seviri aux file for meta data retrieval

In [90]:
sev_aux = xr.open_dataset("/net/n2o/wolke_scratch2/kjeggle/VerticalCloud/DataSmallDomain/seviri_aux.nc",decode_times=False)

In [91]:
sev_aux = add_lonlat(sev_aux)

lonmin = -2.732e+06
lonmax = 2.732e+06
latmin = -2.996e+06
latmax = 2.996e+06

sev_aux = sev_aux.sel(x=slice(lonmax,lonmin),y=slice(latmin,latmax))

In [93]:
sev_aux.to_netcdf("/net/n2o/wolke_scratch2/kjeggle/VerticalCloud/DataSmallDomain/seviri_aux_latlon.nc")

### manual execution of seviri time series creation

In [12]:
seviri_source_dir = "/net/n2o/wolke_scratch2/kjeggle/SEVIRI"

In [15]:
# Niamey casestudy
#datetime(2006, 8, 22, 0, 0, 0)-datetime(2006, 8, 22, 23, 59, 0))["

In [13]:
my_files = find_files_and_readers(base_dir=seviri_source_dir,
                                  reader='seviri_l1b_native',
                                  start_time=datetime(2012, 12, 22, 0, 0, 0),
                                  end_time=datetime(2012, 12, 22, 23, 59, 0))["seviri_l1b_native"]

In [14]:
my_files

['/net/n2o/wolke_scratch2/kjeggle/SEVIRI/MSG3-SEVI-MSG15-0100-NA-20121222172743.073000000Z-NA.nat',
 '/net/n2o/wolke_scratch2/kjeggle/SEVIRI/MSG3-SEVI-MSG15-0100-NA-20121222012742.803000000Z-NA.nat',
 '/net/n2o/wolke_scratch2/kjeggle/SEVIRI/MSG3-SEVI-MSG15-0100-NA-20121222035742.811000000Z-NA.nat',
 '/net/n2o/wolke_scratch2/kjeggle/SEVIRI/MSG2-SEVI-MSG15-0100-NA-20121222204241.923000000Z-NA.nat',
 '/net/n2o/wolke_scratch2/kjeggle/SEVIRI/MSG3-SEVI-MSG15-0100-NA-20121222141242.613000000Z-NA.nat',
 '/net/n2o/wolke_scratch2/kjeggle/SEVIRI/MSG3-SEVI-MSG15-0100-NA-20121222201243.407000000Z-NA.nat',
 '/net/n2o/wolke_scratch2/kjeggle/SEVIRI/MSG2-SEVI-MSG15-0100-NA-20121222224241.204000000Z-NA.nat',
 '/net/n2o/wolke_scratch2/kjeggle/SEVIRI/MSG3-SEVI-MSG15-0100-NA-20121222071243.316000000Z-NA.nat',
 '/net/n2o/wolke_scratch2/kjeggle/SEVIRI/MSG2-SEVI-MSG15-0100-NA-20121222115742.501000000Z-NA.nat',
 '/net/n2o/wolke_scratch2/kjeggle/SEVIRI/MSG3-SEVI-MSG15-0100-NA-20121222232742.655000000Z-NA.nat',


In [31]:
# my_files = np.sort([file for file in my_files if not "MSG1-" in file]) # use MSG2 only for those years

In [16]:
96*2

192

In [15]:
my_files = np.sort(my_files)
print(f"{len(my_files)} SEVIRI files found")

192 SEVIRI files found


In [45]:
lonmin = -2.732e+06
lonmax = 2.732e+06
latmin = -2.996e+06
latmax = 2.996e+06

crop_area = (lonmin,lonmax,latmin,latmax)

In [None]:
%%time
# read seviri files and concat along sensing stop dimension
ds_list = [read_and_crop_seviri_data(fpath, crop=crop_area) for fpath in my_files]

  proj = self._crs.to_proj4(version=version)
  proj = self._crs.to_proj4(version=version)
  proj = self._crs.to_proj4(version=version)
  proj = self._crs.to_proj4(version=version)
  self._read_header()
  proj = self._crs.to_proj4(version=version)
  self._read_header()
  proj = self._crs.to_proj4(version=version)
  self._read_header()
  proj = self._crs.to_proj4(version=version)
  self._read_header()
  proj = self._crs.to_proj4(version=version)
  self._read_header()
  proj = self._crs.to_proj4(version=version)
  proj = self._crs.to_proj4(version=version)
  self._read_header()
  proj = self._crs.to_proj4(version=version)
  self._read_header()
  proj = self._crs.to_proj4(version=version)
  self._read_header()
  proj = self._crs.to_proj4(version=version)
  proj = self._crs.to_proj4(version=version)
  proj = self._crs.to_proj4(version=version)
  proj = self._crs.to_proj4(version=version)
  proj = self._crs.to_proj4(version=version)
  proj = self._crs.to_proj4(version=version)
  proj = self.

In [None]:
ds = xr.concat(ds_list,dim="sensing_stop")

In [None]:
ds

In [38]:
for i in range(1,12):
    try:
        del ds[f"ch{i}"].attrs['orbital_parameters']
    except KeyError:
        pass
    try:
        del ds[f"ch{i}"].attrs['time_parameters']
        #print(f"deleted time_parameters attribute from ch{i}") 
    except KeyError:
        pass

In [39]:
%%time
ds.to_netcdf("/net/n2o/wolke_scratch2/kjeggle/VerticalCloud/DataSmallDomain/SeviriWholeAreaInput/seviri_whole_area_sample_full_day_20240116.nc")

CPU times: user 313 ms, sys: 12 s, total: 12.3 s
Wall time: 28.9 s


In [None]:
# Niamey station location
niamey_lat= 13.4773
niamey_lon = 2.1758

# niamey_lat_id = 1485

In [None]:
niamey_x_id = np.argmin(np.abs((ds.isel(sensing_stop=0).lon.values - niamey_lon)))
niamey_y_id = np.argmin(np.abs((ds.isel(sensing_stop=0).lat.values - niamey_lat)))

In [None]:
print(f"identified lon {ds.isel(sensing_stop=1,x=niamey_x_id).lon.values:.4f} vs actual lon: {niamey_lon} | index in seviri grid: {niamey_x_id}")
print(f"identified lat {ds.isel(sensing_stop=1,y=niamey_y_id).lat.values:.4f} vs actual lat: {niamey_lat} | index in seviri grid: {niamey_y_id}")

In [None]:
lat_to_km = Transformer.from_crs(era5_proj,seviri_proj, always_xy=True)

In [None]:
era5_proj

In [None]:
x,y = lat_to_km.transform(niamey_lon,niamey_lat)

In [None]:
x

In [None]:
y

In [None]:
ds.sel(x=x,y=y,method="nearest")