# How to create monthly observations datasets (Port Barcelona)

In [1]:
import nes
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import os

In [2]:
%matplotlib inline

## 1. Collect data

We have two dataframes: the first one gives us NO2 hourly data at some stations, and the second one gives us the locations of the stations

### NO2

In [3]:
file_path = '/gpfs/projects/bsc32/models/NES_tutorial_data/Dades_Port_Barcelona_2017-2021_corr.xlsx'
df_no2 = pd.read_excel(file_path, header=3, index_col='Horario: UTC').drop(columns=['SO2-UM', 'SO2-Darsena'])
df_no2

Unnamed: 0_level_0,NO2-UM,NO2-ZAL Prat
Horario: UTC,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-01-01 00:00:00,64.64,49.08
2017-01-01 01:00:00,68.16,53.00
2017-01-01 02:00:00,68.29,46.75
2017-01-01 03:00:00,62.91,39.65
2017-01-01 04:00:00,47.13,28.86
...,...,...
2021-12-31 13:00:00,35.38,24.96
2021-12-31 14:00:00,35.80,24.16
2021-12-31 15:00:00,29.10,25.79
2021-12-31 16:00:00,9.24,29.82


We can see above that the timesteps are slightly wrong (error of data provider), next we make sure that the microseconds are saved as 0

In [4]:
df_no2.index = pd.Index([datetime(year=2017, month=1, day=1, minute=0, second=0, microsecond=0) 
                         + timedelta(hours=i) for i in range(len(df_no2))])
df_no2

Unnamed: 0,NO2-UM,NO2-ZAL Prat
2017-01-01 00:00:00,64.64,49.08
2017-01-01 01:00:00,68.16,53.00
2017-01-01 02:00:00,68.29,46.75
2017-01-01 03:00:00,62.91,39.65
2017-01-01 04:00:00,47.13,28.86
...,...,...
2021-12-31 13:00:00,35.38,24.96
2021-12-31 14:00:00,35.80,24.16
2021-12-31 15:00:00,29.10,25.79
2021-12-31 16:00:00,9.24,29.82


### Stations information

In [5]:
path = '/gpfs/projects/bsc32/models/NES_tutorial_data/estaciones.csv'
df_stations = pd.read_csv(path).drop(columns=['standardised_network_provided_area_classification']).iloc[1:]
df_stations

Unnamed: 0,station.code,lat,lon
1,Unitat Mobil,41.373777,2.184514
2,ZAL Prat,41.317277,2.134501


## 2. Create dataset with all timesteps

### Define coordinates

In [6]:
times = df_no2.index.to_pydatetime()
lat = df_stations['lat'][0:2].to_numpy()
lon = df_stations['lon'][0:2].to_numpy()

In [7]:
nessy = nes.create_nes(comm=None, info=False, projection=None, parallel_method='X',
                   lat=lat, lon=lon, times=times)

### Add data

In [8]:
variables = {'station_name': {'data': df_no2.iloc[:, 0:2].to_numpy(),
                              'dimensions': ('station',),
                              'dtype': str},
             'sconcno2': {'data': df_no2.iloc[:, 0:2].to_numpy(),
                          'dimensions': ('time', 'station',),
                          'dtype': float}}

In [9]:
nessy.variables = variables

### Write dataset

In [10]:
nessy.set_strlen(75)
nessy.to_netcdf('points_port_barcelona_no2.nc', info=True)

Rank 000: Creating points_port_barcelona_no2.nc
Rank 000: NetCDF ready to write


  warn(msg)


Rank 000: Dimensions done
Rank 000: Writing station_name var (1/2)
Rank 000: Var station_name created (1/2)
Rank 000: Filling station_name)
Rank 000: Var station_name data (1/2)
Rank 000: Var station_name completed (1/2)
Rank 000: Writing sconcno2 var (2/2)
Rank 000: Var sconcno2 created (2/2)
Rank 000: Filling sconcno2)
Rank 000: Var sconcno2 data (2/2)
Rank 000: Var sconcno2 completed (2/2)


## 3. Create one dataset per month (Ready for Providentia)

### Add columns with month and year

In [11]:
df_no2['month'] = df_no2.index.month
df_no2['year'] = df_no2.index.year
df_no2

Unnamed: 0,NO2-UM,NO2-ZAL Prat,month,year
2017-01-01 00:00:00,64.64,49.08,1,2017
2017-01-01 01:00:00,68.16,53.00,1,2017
2017-01-01 02:00:00,68.29,46.75,1,2017
2017-01-01 03:00:00,62.91,39.65,1,2017
2017-01-01 04:00:00,47.13,28.86,1,2017
...,...,...,...,...
2021-12-31 13:00:00,35.38,24.96,12,2021
2021-12-31 14:00:00,35.80,24.16,12,2021
2021-12-31 15:00:00,29.10,25.79,12,2021
2021-12-31 16:00:00,9.24,29.82,12,2021


### Iterate through each month

Altitude is added (with NaNs) because Providentia cannot read observational networks without it

In [12]:
for (year, month), current in df_no2.groupby(['year', 'month']):

    # Read time
    times = current.index.to_pydatetime()
    
    # Fill altitude with nans
    altitude = np.full(len(current.columns[2:4]), np.nan)
  
    # Read metadata
    variables = {'station_name': {'data': current.columns[0:2].to_numpy(),
                                  'dimensions': ('station',),
                                  'dtype': str,
                                  'standard_name': ''},
                 'altitude': {'data': altitude,
                              'dimensions': ('station',),
                              'units': 'meters',
                              'standard_name': 'altitude'},
                 'sconcno2': {'data': current.iloc[:, 0:2].to_numpy(),
                              'units': 'µg m-3',
                              'dimensions': ('time', 'station',),
                              'long_name': ''}
               }
    
    # Create object
    nessy = nes.create_nes(comm=None, info=False, projection=None, parallel_method='X',
                           lat=lat, lon=lon, times=times)
    
    # Assign metadata
    nessy.variables = variables
    nessy.set_strlen(75)
                
    # Making directory
    netcdf_path = 'port_barcelona/port-barcelona/hourly/sconcno2/'
    if not os.path.exists(os.path.dirname(netcdf_path)):
        os.makedirs(os.path.dirname(netcdf_path))
        
    # To run Providentia, this folder should be moved to:
    # '/esarchive/obs/' as in '/esarchive/obs/port_barcelona/port-barcelona/hourly/sconcno2'
    
    # Save files
    nessy.to_netcdf(netcdf_path + 'sconcno2_{0}{1}.nc'.format(year, str(month).zfill(2)))
    
    del nessy
    print('Done sconcno2_{0}{1}.nc'.format(year, str(month).zfill(2)))

  warn(msg)
  warn(msg)


Done sconcno2_201701.nc
Done sconcno2_201702.nc


  warn(msg)
  warn(msg)


Done sconcno2_201703.nc
Done sconcno2_201704.nc


  warn(msg)
  warn(msg)


Done sconcno2_201705.nc
Done sconcno2_201706.nc


  warn(msg)
  warn(msg)


Done sconcno2_201707.nc
Done sconcno2_201708.nc


  warn(msg)
  warn(msg)


Done sconcno2_201709.nc


  warn(msg)


Done sconcno2_201710.nc
Done sconcno2_201711.nc


  warn(msg)


Done sconcno2_201712.nc


  warn(msg)


Done sconcno2_201801.nc


  warn(msg)


Done sconcno2_201802.nc


  warn(msg)


Done sconcno2_201803.nc


  warn(msg)


Done sconcno2_201804.nc


  warn(msg)


Done sconcno2_201805.nc


  warn(msg)


Done sconcno2_201806.nc

  warn(msg)



Done sconcno2_201807.nc


  warn(msg)
  warn(msg)


Done sconcno2_201808.nc
Done sconcno2_201809.nc


  warn(msg)
  warn(msg)


Done sconcno2_201810.nc
Done sconcno2_201811.nc


  warn(msg)
  warn(msg)


Done sconcno2_201812.nc
Done sconcno2_201901.nc


  warn(msg)
  warn(msg)


Done sconcno2_201902.nc
Done sconcno2_201903.nc


  warn(msg)
  warn(msg)


Done sconcno2_201904.nc
Done sconcno2_201905.nc


  warn(msg)
  warn(msg)


Done sconcno2_201906.nc
Done sconcno2_201907.nc


  warn(msg)
  warn(msg)


Done sconcno2_201908.nc
Done sconcno2_201909.nc


  warn(msg)
  warn(msg)


Done sconcno2_201910.nc


  warn(msg)


Done sconcno2_201911.nc
Done sconcno2_201912.nc


  warn(msg)
  warn(msg)


Done sconcno2_202001.nc
Done sconcno2_202002.nc


  warn(msg)
  warn(msg)


Done sconcno2_202003.nc
Done sconcno2_202004.nc


  warn(msg)
  warn(msg)


Done sconcno2_202005.nc
Done sconcno2_202006.nc


  warn(msg)
  warn(msg)


Done sconcno2_202007.nc
Done sconcno2_202008.nc


  warn(msg)
  warn(msg)


Done sconcno2_202009.nc
Done sconcno2_202010.nc


  warn(msg)
  warn(msg)


Done sconcno2_202011.nc
Done sconcno2_202012.nc


  warn(msg)
  warn(msg)


Done sconcno2_202101.nc
Done sconcno2_202102.nc


  warn(msg)


Done sconcno2_202103.nc


  warn(msg)


Done sconcno2_202104.nc


  warn(msg)


Done sconcno2_202105.nc


  warn(msg)


Done sconcno2_202106.nc


  warn(msg)


Done sconcno2_202107.nc


  warn(msg)


Done sconcno2_202108.nc


  warn(msg)
  warn(msg)


Done sconcno2_202109.nc
Done sconcno2_202110.nc


  warn(msg)
  warn(msg)


Done sconcno2_202111.nc
Done sconcno2_202112.nc
