# Cross-match L1 and L2 at the TCCONN site in Paris 

- Cross-match between L1 features and L2 $X_{co2}$ lables 

In [1]:
import numpy as np
import pandas as pd
import glob
import sys
import h5py
#from netCDF4 import Dataset
from datetime import datetime
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from scipy.spatial import cKDTree

import pyarrow as pa
import pyarrow.parquet as pq

from functools import reduce
import operator
import gc

In [2]:
# plot settings
plt.rc('font', family='serif') 
plt.rc('font', serif='Times New Roman') 
plt.rcParams.update({'font.size': 16})
plt.rcParams['mathtext.fontset'] = 'stix'

### Read DataFrames

In [3]:
onepdf = pd.read_parquet('/home/shong/work/oco2/data/paris_L1_isite_pandas.parquet.snappy')
twopdf = pd.read_parquet('/home/shong/work/oco2/data/paris_L2_iall_pandas.parquet.snappy')

In [4]:
onepdf.head()

Unnamed: 0,channel_ind,altitude,longitude,latitude,aspect,slope,sol_az,sol_zn,time_str,ipx1024nested
0,0,42.051094,2.3677,48.852226,189.712494,0.490401,207.242752,30.967089,2021-07-22T12:55:08.644Z,715921
1,1,42.971222,2.367236,48.854809,195.084808,0.10498,207.23999,30.96925,2021-07-22T12:55:08.646Z,715921
2,2,43.062939,2.36746,48.855442,188.615356,0.108523,207.239822,30.969864,2021-07-22T12:55:08.636Z,715921
3,0,51.605095,2.390275,48.850983,225.645187,1.106636,207.284897,30.973173,2021-07-22T12:55:08.950Z,715921
4,1,53.896103,2.389698,48.853352,245.7854,1.118489,207.28212,30.975107,2021-07-22T12:55:08.953Z,715921


In [5]:
np.unique(onepdf.ipx1024nested.values)

array([715921], dtype=int32)

In [6]:
twopdf.head()

Unnamed: 0,altitude,longitude,latitude,aspect,slope,sol_az,sol_zn,xco2,time_str,ipx1024nested
0,57.66209,2.33008,48.842953,357.957825,1.240499,206.971252,30.910488,0.000409,2021-07-22T12:54:39.029Z,715923
1,56.111057,2.311695,48.840572,326.19809,1.413464,206.941391,30.902916,0.000409,2021-07-22T12:54:39.056Z,715922
2,43.401897,2.293364,48.838219,301.024231,0.864409,206.911591,30.895386,0.00041,2021-07-22T12:54:39.083Z,715922
3,60.11515,2.424551,48.853153,185.509872,0.913878,207.128494,30.948095,0.000409,2021-07-22T12:54:39.228Z,715924
4,58.226757,2.406187,48.850311,165.121994,0.548199,207.099075,30.94009,0.000409,2021-07-22T12:54:39.255Z,715924


In [7]:
np.unique(twopdf.ipx1024nested.values)

array([715834, 715835, 715838, 715920, 715921, 715922, 715923, 715924,
       715926], dtype=int32)

- As explained before, L1 data at a `isite` tile will match L2's xco2 in `iall` 9 tiles
- `millisecond` value from `time_str` will be used to match $X_{co2}$ to `L1` data. 

### Datetime for L1 and L2

In [8]:
onepdf['time'] = pd.to_datetime(onepdf['time_str'])

In [9]:
twopdf['time'] = pd.to_datetime(twopdf['time_str'])

#### L1 day: hour: minutes

In [11]:
np.unique(onepdf['time'].dt.day)

array([22])

In [12]:
np.unique(onepdf['time'].dt.hour)

array([12])

In [13]:
np.unique(onepdf['time'].dt.minute)

array([51, 52, 53, 54, 55])

#### L2 day: hour: minutes

In [15]:
np.unique(twopdf['time'].dt.day)

array([22, 31])

In [16]:
np.unique(twopdf['time'].dt.hour)

array([12])

In [17]:
np.unique(twopdf['time'].dt.minute)

array([47, 52, 53, 54, 55])

#### Remove day == 31

In [18]:
len(twopdf.index)

4237

In [19]:
twopdf = twopdf[twopdf['time'].dt.day == 22]

In [20]:
len(twopdf.index)

4236

#### Match in MilliSecond Scale

In [23]:
onepdf['millisec'] = np.int64(onepdf['time'].dt.day * 24 * 60 * 60000 \
                              + onepdf['time'].dt.hour * 60 * 60000 \
                              + onepdf['time'].dt.minute * 60000 \
                              + onepdf['time'].dt.second * 1000 \
                              + onepdf['time'].dt.microsecond / 1000)

In [24]:
twopdf['millisec'] = np.int64(twopdf['time'].dt.day * 24 * 60 * 60000 \
                              + twopdf['time'].dt.hour * 60 * 60000 \
                              + twopdf['time'].dt.minute * 60000 \
                              + twopdf['time'].dt.second * 1000 \
                              + twopdf['time'].dt.microsecond / 1000)

In [25]:
onepdf[['time_str','millisec']].head()

Unnamed: 0,time_str,millisec
0,2021-07-22T12:55:08.644Z,1947308644
1,2021-07-22T12:55:08.646Z,1947308646
2,2021-07-22T12:55:08.636Z,1947308636
3,2021-07-22T12:55:08.950Z,1947308950
4,2021-07-22T12:55:08.953Z,1947308953


In [26]:
twopdf[['xco2','millisec']].head()

Unnamed: 0,xco2,millisec
0,0.000409,1947279029
1,0.000409,1947279056
2,0.00041,1947279083
3,0.000409,1947279228
4,0.000409,1947279255


In [27]:
def find_closest_row(df, millisec):
    inearest = df['millisec'].abs().idxmin()
    timediff = np.abs(millisec - df.loc[inearest].millisec)
    return [inearest, millisec,timediff, df.loc[inearest].xco2]

In [28]:
onepdf.head()

Unnamed: 0,channel_ind,altitude,longitude,latitude,aspect,slope,sol_az,sol_zn,time_str,ipx1024nested,time,millisec
0,0,42.051094,2.3677,48.852226,189.712494,0.490401,207.242752,30.967089,2021-07-22T12:55:08.644Z,715921,2021-07-22 12:55:08.644000+00:00,1947308644
1,1,42.971222,2.367236,48.854809,195.084808,0.10498,207.23999,30.96925,2021-07-22T12:55:08.646Z,715921,2021-07-22 12:55:08.646000+00:00,1947308646
2,2,43.062939,2.36746,48.855442,188.615356,0.108523,207.239822,30.969864,2021-07-22T12:55:08.636Z,715921,2021-07-22 12:55:08.636000+00:00,1947308636
3,0,51.605095,2.390275,48.850983,225.645187,1.106636,207.284897,30.973173,2021-07-22T12:55:08.950Z,715921,2021-07-22 12:55:08.950000+00:00,1947308950
4,1,53.896103,2.389698,48.853352,245.7854,1.118489,207.28212,30.975107,2021-07-22T12:55:08.953Z,715921,2021-07-22 12:55:08.953000+00:00,1947308953


In [29]:
find_closest_row(twopdf,1947308644)

[124, 1947308644, 172416, 0.00040828524]

In [30]:
numonepdf = len(onepdf.index)

In [31]:
for ione in range(numonepdf):
    eachmil = onepdf['millisec'].loc[ione]
    print([ione]+find_closest_row(twopdf,eachmil))

[0, 124, 1947308644, 172416, 0.00040828524]
[1, 124, 1947308646, 172418, 0.00040828524]
[2, 124, 1947308636, 172408, 0.00040828524]
[3, 124, 1947308950, 172722, 0.00040828524]
[4, 124, 1947308953, 172725, 0.00040828524]
[5, 124, 1947308942, 172714, 0.00040828524]
[6, 124, 1947308977, 172749, 0.00040828524]
[7, 124, 1947308980, 172752, 0.00040828524]
[8, 124, 1947308969, 172741, 0.00040828524]
[9, 124, 1947309004, 172776, 0.00040828524]
[10, 124, 1947309283, 173055, 0.00040828524]
[11, 124, 1947309286, 173058, 0.00040828524]
[12, 124, 1947309275, 173047, 0.00040828524]
[13, 124, 1947309310, 173082, 0.00040828524]
[14, 124, 1947309313, 173085, 0.00040828524]
[15, 124, 1947309302, 173074, 0.00040828524]
[16, 124, 1947309337, 173109, 0.00040828524]
[17, 124, 1947309340, 173112, 0.00040828524]
[18, 124, 1947309329, 173101, 0.00040828524]
[19, 124, 1947309617, 173389, 0.00040828524]
[20, 124, 1947309619, 173391, 0.00040828524]
[21, 124, 1947309609, 173381, 0.00040828524]
[22, 124, 1947309644

[1564, 124, 1947288286, 152058, 0.00040828524]
[1565, 124, 1947288275, 152047, 0.00040828524]
[1566, 124, 1947288563, 152335, 0.00040828524]
[1567, 124, 1947288567, 152339, 0.00040828524]
[1568, 124, 1947288555, 152327, 0.00040828524]
[1569, 124, 1947288590, 152362, 0.00040828524]
[1570, 124, 1947288593, 152365, 0.00040828524]
[1571, 124, 1947288582, 152354, 0.00040828524]
[1572, 124, 1947288617, 152389, 0.00040828524]
[1573, 124, 1947288619, 152391, 0.00040828524]
[1574, 124, 1947288609, 152381, 0.00040828524]
[1575, 124, 1947288897, 152669, 0.00040828524]
[1576, 124, 1947288900, 152672, 0.00040828524]
[1577, 124, 1947288888, 152660, 0.00040828524]
[1578, 124, 1947288923, 152695, 0.00040828524]
[1579, 124, 1947288926, 152698, 0.00040828524]
[1580, 124, 1947288915, 152687, 0.00040828524]
[1581, 124, 1947288950, 152722, 0.00040828524]
[1582, 124, 1947288953, 152725, 0.00040828524]
[1583, 124, 1947288942, 152714, 0.00040828524]
[1584, 124, 1947289230, 153002, 0.00040828524]
[1585, 124, 1

[2716, 124, 1947278950, 142722, 0.00040828524]
[2717, 124, 1947278953, 142725, 0.00040828524]
[2718, 124, 1947278942, 142714, 0.00040828524]
[2719, 124, 1947278977, 142749, 0.00040828524]
[2720, 124, 1947278980, 142752, 0.00040828524]
[2721, 124, 1947278969, 142741, 0.00040828524]
[2722, 124, 1947279004, 142776, 0.00040828524]
[2723, 124, 1947279007, 142779, 0.00040828524]
[2724, 124, 1947278996, 142768, 0.00040828524]
[2725, 124, 1947279283, 143055, 0.00040828524]
[2726, 124, 1947279286, 143058, 0.00040828524]
[2727, 124, 1947279275, 143047, 0.00040828524]
[2728, 124, 1947279310, 143082, 0.00040828524]
[2729, 124, 1947279313, 143085, 0.00040828524]
[2730, 124, 1947279302, 143074, 0.00040828524]
[2731, 124, 1947279337, 143109, 0.00040828524]
[2732, 124, 1947279340, 143112, 0.00040828524]
[2733, 124, 1947279329, 143101, 0.00040828524]
[2734, 124, 1947279590, 143362, 0.00040828524]
[2735, 124, 1947279617, 143389, 0.00040828524]
[2736, 124, 1947279619, 143391, 0.00040828524]
[2737, 124, 1

In [32]:
twopdf.loc[124]

altitude                                37.827778
longitude                                2.470435
latitude                                48.752361
aspect                                 227.742523
slope                                   10.013433
sol_az                                 206.248337
sol_zn                                  30.695034
xco2                                     0.000408
time_str                 2021-07-22T12:52:16.228Z
ipx1024nested                              715834
time             2021-07-22 12:52:16.228000+00:00
millisec                               1947136228
Name: 124, dtype: object

In [33]:
len(onepdf.index)

3081

In [34]:
onepdf.loc[3078:3080]

Unnamed: 0,channel_ind,altitude,longitude,latitude,aspect,slope,sol_az,sol_zn,time_str,ipx1024nested,time,millisec
3078,0,36.222748,2.442394,48.778557,338.065582,0.101995,206.153351,30.705957,2021-07-22T12:52:12.590Z,715921,2021-07-22 12:52:12.590000+00:00,1947132590
3079,1,35.885571,2.441211,48.781685,203.345032,0.167397,206.148987,30.708429,2021-07-22T12:52:12.593Z,715921,2021-07-22 12:52:12.593000+00:00,1947132593
3080,2,36.213043,2.442527,48.782467,211.480377,0.138892,206.15062,30.709497,2021-07-22T12:52:12.582Z,715921,2021-07-22 12:52:12.582000+00:00,1947132582


In [35]:
twopdf.describe()

Unnamed: 0,altitude,longitude,latitude,aspect,slope,sol_az,sol_zn,xco2,ipx1024nested,millisec
count,4236.0,4236.0,4236.0,4236.0,4236.0,4236.0,4236.0,4236.0,4236.0,4236.0
mean,56.776127,2.358214,48.844494,189.494919,1.888441,206.756409,30.87579,0.000409,715920.575779,1947243000.0
std,19.899004,0.048171,0.040322,107.175545,4.423931,0.422832,0.078647,1e-06,12.783315,56823.88
min,29.466667,2.249182,48.743355,0.213229,0.045934,205.859238,30.673103,0.000398,715834.0,1947136000.0
25%,41.463314,2.32029,48.808303,86.21907,0.528287,206.421947,30.818332,0.000408,715921.0,1947196000.0
50%,52.162025,2.35838,48.845816,204.867706,0.975038,206.764824,30.877131,0.000409,715922.0,1947244000.0
75%,66.526728,2.395922,48.880304,284.799088,1.810062,207.094391,30.933205,0.00041,715924.0,1947291000.0
max,121.429054,2.484542,48.92812,359.987976,89.395584,207.753464,31.055525,0.000418,715926.0,1947344000.0
