# Cross-match L1 and L2 at the TCCONN site in Paris 

- Cross-match between L1 features and L2 $X_{co2}$ lables 

In [1]:
import numpy as np
import pandas as pd
import glob
import sys
import h5py
#from netCDF4 import Dataset
from datetime import datetime
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from scipy.spatial import cKDTree

import pyarrow as pa
import pyarrow.parquet as pq

from functools import reduce
import operator
import gc

In [2]:
# plot settings
plt.rc('font', family='serif') 
plt.rc('font', serif='Times New Roman') 
plt.rcParams.update({'font.size': 16})
plt.rcParams['mathtext.fontset'] = 'stix'

#### Read DataFrames

In [3]:
onepdf = pd.read_parquet('/home/shong/work/oco2/data/paris_L1_isite_pandas.parquet.snappy')
twopdf = pd.read_parquet('/home/shong/work/oco2/data/paris_L2_iall_pandas.parquet.snappy')

In [4]:
onepdf.head()

Unnamed: 0,channel_ind,altitude,longitude,latitude,aspect,slope,sol_az,sol_zn,time_str,ipx1024nested
0,0,42.051094,2.3677,48.852226,189.712494,0.490401,207.242752,30.967089,2021-07-22T12:55:08.644Z,715921
1,1,42.971222,2.367236,48.854809,195.084808,0.10498,207.23999,30.96925,2021-07-22T12:55:08.646Z,715921
2,2,43.062939,2.36746,48.855442,188.615356,0.108523,207.239822,30.969864,2021-07-22T12:55:08.636Z,715921
3,0,51.605095,2.390275,48.850983,225.645187,1.106636,207.284897,30.973173,2021-07-22T12:55:08.950Z,715921
4,1,53.896103,2.389698,48.853352,245.7854,1.118489,207.28212,30.975107,2021-07-22T12:55:08.953Z,715921


In [5]:
np.unique(onepdf.ipx1024nested.values)

array([715921], dtype=int32)

In [6]:
twopdf.head()

Unnamed: 0,altitude,longitude,latitude,aspect,slope,sol_az,sol_zn,xco2,time_str,ipx1024nested
0,57.66209,2.33008,48.842953,357.957825,1.240499,206.971252,30.910488,0.000409,2021-07-22T12:54:39.029Z,715923
1,56.111057,2.311695,48.840572,326.19809,1.413464,206.941391,30.902916,0.000409,2021-07-22T12:54:39.056Z,715922
2,43.401897,2.293364,48.838219,301.024231,0.864409,206.911591,30.895386,0.00041,2021-07-22T12:54:39.083Z,715922
3,60.11515,2.424551,48.853153,185.509872,0.913878,207.128494,30.948095,0.000409,2021-07-22T12:54:39.228Z,715924
4,58.226757,2.406187,48.850311,165.121994,0.548199,207.099075,30.94009,0.000409,2021-07-22T12:54:39.255Z,715924


In [7]:
np.unique(twopdf.ipx1024nested.values)

array([715834, 715835, 715838, 715920, 715921, 715922, 715923, 715924,
       715926], dtype=int32)

- As explained before, L1 data at a `isite` tile will match L2's xco2 in `iall` 9 tiles

#### time string to time stamp (or, datetime)

In [8]:
onepdf['time'] = pd.to_datetime(onepdf['time_str'])

In [12]:
twopdf['time'] = pd.to_datetime(twopdf['time_str'])

In [11]:
onepdf['time'].dt.day

0       22
1       22
2       22
3       22
4       22
        ..
3076    22
3077    22
3078    22
3079    22
3080    22
Name: time, Length: 3081, dtype: int64

In [13]:
twopdf['time'].dt.day

0       22
1       22
2       22
3       22
4       22
        ..
4232    22
4233    22
4234    22
4235    22
4236    31
Name: time, Length: 4237, dtype: int64