# Info
Purpose:

    Describe the details ...

Input:

    arguments

Output:

    Figure and save files

Keywords:

    none

Dependencies:

    - load_utils.py
    - matplotlib
    - numpy
    - Sp_parameters
    - write_utils
    - path_utils
    - hdf5storage
    - scipy

Needed Files:
  - file.rc : for consistent creation of look of matplotlib figures
  - ...

Modification History:

    Written: Samuel LeBlanc, Santa Cruz, CA, 2022-03-17
    Modified:


# Prepare python environment

In [1]:
import numpy as np
import Sp_parameters as Sp
import load_utils as lu
import write_utils as wu
from path_utils import getpath
import hdf5storage as hs
import scipy.io as sio
import matplotlib.pyplot as plt
%matplotlib notebook
import os
import glob

In [2]:
from datetime import datetime,timedelta
import scipy.stats as st
import pandas as pd
import plotting_utils as pu

In [3]:
import sys
if sys.version_info[0] < 3: 
    from StringIO import StringIO, BytesIO
else:
    from io import StringIO, BytesIO

In [4]:
name = 'rooftop'
vv = 'v1'
fp = getpath(name)


Return path named:  rooftop /data/sunsat/rooftop/


# Load files

## Load 4STAR gas summary

In [5]:
files = glob.glob(fp + '/gas_summary_v2_post202208/**/[!4STARB]*gas_summary*.mat', recursive=True)

In [6]:
files.sort()
files

['/data/sunsat/rooftop//gas_summary_v2_post202208/20180209_gas_summary.mat',
 '/data/sunsat/rooftop//gas_summary_v2_post202208/20180210_gas_summary.mat',
 '/data/sunsat/rooftop//gas_summary_v2_post202208/20180211_gas_summary.mat',
 '/data/sunsat/rooftop//gas_summary_v2_post202208/20180212_gas_summary.mat',
 '/data/sunsat/rooftop//gas_summary_v2_post202208/20200624_gas_summary.mat',
 '/data/sunsat/rooftop//gas_summary_v2_post202208/20200630_gas_summary.mat',
 '/data/sunsat/rooftop//gas_summary_v2_post202208/20200701_gas_summary.mat',
 '/data/sunsat/rooftop//gas_summary_v2_post202208/20200708_gas_summary.mat',
 '/data/sunsat/rooftop//gas_summary_v2_post202208/20200805_gas_summary.mat',
 '/data/sunsat/rooftop//gas_summary_v2_post202208/20200819_gas_summary.mat',
 '/data/sunsat/rooftop//gas_summary_v2_post202208/20200824_gas_summary.mat',
 '/data/sunsat/rooftop//gas_summary_v2_post202208/20200918_gas_summary.mat',
 '/data/sunsat/rooftop//gas_summary_v2_post202208/20200929_gas_summary.mat',

In [7]:
gas = {}
for f in files:
    g = sio.loadmat(f)
    daystr = f.replace('/','_').split('_')[-3]
    gas[daystr] = g

In [8]:
gas['20210330'].keys()

dict_keys(['__header__', '__version__', '__globals__', 'no2_molec_cm2', 'no2err_molec_cm2', 'no2DU', 'no2resiDU', 'o3DU', 'o3resiDU', 'hcoh_DU', 'hcohresi', 'cwv', 'cwv_std', 'lat', 'lon', 'alt', 'pst', 'sza', 'm_aero', 'm_o3', 'm_no2', 'tUTC'])

In [9]:
gas['20210330']['tUTC']

array([[15.47163194],
       [15.4721875 ],
       [15.47249111],
       ...,
       [22.16018639],
       [22.16074194],
       [22.16132361]])

In [10]:
kg = list(gas.keys())
kg.sort()

In [11]:
s = {'day':[],'time':[],'tUTC':[],'no2DU':[],'no2resiDU':[],'o3DU':[],'o3resiDU':[]}
for d in kg:
    for kk in ['tUTC','no2DU','no2resiDU','o3DU','o3resiDU']:
        s[kk] = np.append(s[kk],gas[d][kk][:,0])
    s['day'] = np.append(s['day'],[d]*len(gas[d][kk][:,0]))

In [12]:
s['time'] = pd.to_datetime(s['day']).to_numpy() + [np.timedelta64(timedelta(hours=tt)) for tt in s['tUTC']]

In [13]:
s['time']

array(['2018-02-09T16:49:17.578000000', '2018-02-09T16:49:24.764996000',
       '2018-02-09T16:49:25.858995000', ...,
       '2022-04-06T00:37:33.437003000', '2022-04-06T00:37:35.546004000',
       '2022-04-06T00:37:37.639997000'], dtype='datetime64[ns]')

In [14]:
s['no2DU'] = s['no2DU']/10.0
s['no2resiDU'] = s['no2resiDU']/10.0

### Filter out bad data and back interpolate for easier handling

In [161]:
ibad = (s['no2DU']<0.05) | (s['no2DU']>1.0) 

In [148]:
from Sp_parameters import smooth

In [162]:
s['no2DU'][ibad] = np.nan

In [163]:
s_no2DU = smooth(s['no2DU'],1,nan=True,old=True)

In [164]:
plt.figure()
plt.plot(s_no2DU,'.')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x7fad7d383fa0>]

### Load previous 4STAR analysis

In [15]:
files2 = glob.glob(fp + '/gas_summary_v1*/**/[!4STARB]*gas_summary*.mat', recursive=True)
files2.sort()
files2

['/data/sunsat/rooftop/gas_summary_v1_pre2022/20190821_gas_summary.mat',
 '/data/sunsat/rooftop/gas_summary_v1_pre2022/20200623_gas_summary.mat',
 '/data/sunsat/rooftop/gas_summary_v1_pre2022/20200624_gas_summary.mat',
 '/data/sunsat/rooftop/gas_summary_v1_pre2022/20200630_gas_summary.mat',
 '/data/sunsat/rooftop/gas_summary_v1_pre2022/20200701_gas_summary.mat',
 '/data/sunsat/rooftop/gas_summary_v1_pre2022/20200708_gas_summary.mat',
 '/data/sunsat/rooftop/gas_summary_v1_pre2022/20200819_gas_summary.mat',
 '/data/sunsat/rooftop/gas_summary_v1_pre2022/20200918_gas_summary.mat',
 '/data/sunsat/rooftop/gas_summary_v1_pre2022/20200929_gas_summary.mat',
 '/data/sunsat/rooftop/gas_summary_v1_pre2022/20201001_gas_summary.mat',
 '/data/sunsat/rooftop/gas_summary_v1_pre2022/20201006_gas_summary.mat',
 '/data/sunsat/rooftop/gas_summary_v1_pre2022/20201027_gas_summary.mat',
 '/data/sunsat/rooftop/gas_summary_v1_pre2022/20201104_gas_summary.mat',
 '/data/sunsat/rooftop/gas_summary_v1_pre2022/20201

In [16]:
gas2 = {}
for f in files2:
    g2 = sio.loadmat(f)
    daystr = f.replace('/','_').split('_')[-3]
    gas2[daystr] = g2

In [17]:
s2 = {'day':[],'time':[],'tUTC':[],'no2DU':[],'no2resiDU':[],'o3DU':[],'o3resiDU':[]}
kg = list(gas2.keys())
kg.sort()
for d in kg:
    for kk in ['tUTC','no2DU','no2resiDU','o3DU','o3resiDU']:
        s2[kk] = np.append(s2[kk],gas2[d][kk][:,0])
    s2['day'] = np.append(s2['day'],[d]*len(gas2[d][kk][:,0]))

In [18]:
s2['time'] = pd.to_datetime(s2['day']).to_numpy() + [np.timedelta64(timedelta(hours=tt)) for tt in s2['tUTC']]

## Load Pandora

### load files from internet

In [19]:
import requests

In [20]:
url_base = 'http://data.pandonia-global-network.org/MountainViewCA/Pandora34s1/L2/'
url_O3 = url_base+'Pandora34s1_MountainViewCA_L2Tot_rout0p1-7.txt'
url_NO2 = url_base+'Pandora34s1_MountainViewCA_L2Tot_rnvs1p1-7.txt'
url_NO2_trop = url_base+'Pandora34s1_MountainViewCA_L2Trop_rnvh1p1-7.txt'

In [21]:
ro3 = requests.get(url_O3,stream=True)
fo3 = []
for chunk in ro3.iter_content(chunk_size=1024): 
    fo3.append(chunk)

In [22]:
rno2 = requests.get(url_NO2,stream=True)
fno2 = []
for chunk in rno2.iter_content(chunk_size=1024): 
    fno2.append(chunk)

### convert files to something easier to handle

#### Ozone

In [23]:
o3data = BytesIO(b''.join(fo3))

In [24]:
o3data.seek(0)
o3header = []
for i in range(66):
    o3header.append(o3data.readline())
print(*o3header,sep='\n')

b'File name: Pandora34s1_MountainViewCA_L2Tot_rout0p1-7.txt\n'
b'File generation date: 20220831T081508Z\n'
b'Data description: Level 2 total columns file\n'
b'Data file version: rout0p1-7\n'
b'Data product status: Ozone data are official, Sulfur dioxide data are unusable\n'
b'Local principal investigator: Nader Abuhassan\n'
b'Network principal investigator: Alexander Cede\n'
b'Instrument type: Pandora\n'
b'Instrument number: 34\n'
b'Spectrometer number: 1\n'
b'Processing software version used: BlickP v1.7.16\n'
b'Full location name: Ames Research Center\n'
b'Short location name: MountainViewCA\n'
b'Country of location: United States\n'
b'Location latitude [deg]: 37.4200\n'
b'Location longitude [deg]: -122.0568\n'
b'Location altitude [m]: 50\n'
b'Data start time: 20191121T221159Z\n'
b'Data end time: NONE\n'
b'Data caveats: None\n'
b'---------------------------------------------------------------------------------------\n'
b'Column 1: UT date and time for center of measurement, yyyymmddT

In [25]:
col_name = ['Time','doy_jan2000','duration_time','sza','saz','lza','laz','o3_du','unc_o3_du','m_o3','diff_corr','qa_o3','sumi2_dq1',
            'sumi_dq2','so2_du','unc_so2_du','m_so2','diff_corr_so2','qa_no2','sumi2_so2_dq1','sumi2_so2_dq2',
            'fit_resi','resi_rms','expmeas_resi_rms','expinst_resi_rms','mean_val','Pres_mbar','dat_proc',
            'cal_version','cal_val_date','L2fit_QA','sumi2_L2fit_dq1','sumi2_L2fit_dq2','L1_QA','sumi2_L1_dq1',
            'sumi2_L1_dq2','effT_wav','resi_straylight','L1_wv_shift','wv_shift_fit','int','darkcount','pos_filter1','pos_filter2']

In [26]:
col_name_decription = {}
j = 0
header_start = False
for i,h in enumerate(o3header):
    if h.strip().startswith(b'----'): 
        header_start = ~header_start
        continue
    if header_start:
        print(j,'\033[1m'+col_name[j]+'\033[0m',h)
        col_name_decription[col_name[j]] = h
        j = j+1

0 [1mTime[0m b'Column 1: UT date and time for center of measurement, yyyymmddThhmmssZ (ISO 8601)\n'
1 [1mdoy_jan2000[0m b'Column 2: Fractional days since 1-Jan-2000 UT midnight for center of measurement\n'
2 [1mduration_time[0m b'Column 3: Effective duration of measurement in seconds\n'
3 [1msza[0m b'Column 4: Solar zenith angle for center of measurement in degree\n'
4 [1msaz[0m b'Column 5: Solar azimuth for center of measurement in degree, 0=north, increases clockwise\n'
5 [1mlza[0m b'Column 6: Lunar zenith angle for center of measurement in degree\n'
6 [1mlaz[0m b'Column 7: Lunar azimuth for center of measurement in degree, 0=north, increases clockwise\n'
7 [1mo3_du[0m b'Column 8: Ozone total vertical column amount [Dobson Units], -9e99=retrieval not successful\n'
8 [1munc_o3_du[0m b'Column 9: Uncertainty of ozone total vertical column amount [Dobson Units] based on measured uncertainty, -8=retrieval not successful, -1=cross section is zero in this wavelength range,

In [27]:
o3data.seek(0)
pdo3 = pd.read_csv(o3data,encoding='unicode_escape',header=66,delimiter=' ',names=col_name)

In [28]:
pdo3

Unnamed: 0,Time,doy_jan2000,duration_time,sza,saz,lza,laz,o3_du,unc_o3_du,m_o3,...,sumi2_L1_dq1,sumi2_L1_dq2,effT_wav,resi_straylight,L1_wv_shift,wv_shift_fit,int,darkcount,pos_filter1,pos_filter2
0,20191121T221326Z,7264.92600,26.09,66.12,215.93,89.91,276.83,293.36,0.11227,2.428,...,0,0,25.68,0.23,-0.01140,-0.04927,20.9,237,1,5
1,20191121T221453Z,7264.92701,26.62,66.29,216.25,90.19,277.04,293.04,0.11232,2.444,...,0,0,25.68,0.25,-0.01147,-0.04921,21.2,234,1,5
2,20191121T221618Z,7264.92800,25.86,66.46,216.57,90.47,277.24,292.97,0.11349,2.460,...,0,0,25.68,0.23,-0.01111,-0.04953,21.5,232,1,5
3,20191121T222329Z,7264.93298,26.80,67.33,218.14,91.85,278.27,292.34,0.11513,2.544,...,0,0,25.68,0.25,-0.01178,-0.05119,22.6,223,1,5
4,20191121T222455Z,7264.93398,27.79,67.50,218.46,92.12,278.48,292.33,0.11538,2.563,...,0,0,25.68,0.24,-0.01158,-0.05101,22.7,222,1,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
143841,20220831T012504.2Z,8278.05908,49.28,76.09,270.38,57.10,227.20,284.96,0.15658,3.944,...,0,0,27.05,0.23,-0.00357,-0.10056,61.4,156,1,5
143842,20220831T013026.2Z,8278.06280,49.45,77.15,271.19,57.89,228.43,284.39,0.17940,4.227,...,0,0,27.05,0.26,-0.00367,-0.09883,74.7,129,1,5
143843,20220831T013603.8Z,8278.06671,49.57,78.27,272.04,58.73,229.69,283.43,0.21512,4.568,...,0,0,27.05,0.23,-0.00381,-0.10118,96.0,100,1,5
143844,20220831T015712.6Z,8278.08140,49.94,82.47,275.20,62.03,234.22,279.26,0.74438,6.465,...,0,0,26.94,0.39,-0.00382,-0.10531,399.4,24,1,5


In [29]:
pdo3['datetime'] = pd.to_datetime(pdo3['Time'])

#### NO2

In [30]:
no2data = BytesIO(b''.join(fno2))

In [31]:
no2data.seek(0)
no2header = []
for i in range(59):
    no2header.append(no2data.readline())
print(*no2header,sep='\n')

b'File name: Pandora34s1_MountainViewCA_L2Tot_rnvs1p1-7.txt\n'
b'File generation date: 20220831T081508Z\n'
b'Data description: Level 2 total columns file\n'
b'Data file version: rnvs1p1-7\n'
b'Data product status: Nitrogen dioxide data are official\n'
b'Local principal investigator: Nader Abuhassan\n'
b'Network principal investigator: Alexander Cede\n'
b'Instrument type: Pandora\n'
b'Instrument number: 34\n'
b'Spectrometer number: 1\n'
b'Processing software version used: BlickP v1.7.16\n'
b'Full location name: Ames Research Center\n'
b'Short location name: MountainViewCA\n'
b'Country of location: United States\n'
b'Location latitude [deg]: 37.4200\n'
b'Location longitude [deg]: -122.0568\n'
b'Location altitude [m]: 50\n'
b'Data start time: 20191121T221109Z\n'
b'Data end time: NONE\n'
b'Data caveats: None\n'
b'---------------------------------------------------------------------------------------\n'
b'Column 1: UT date and time for center of measurement, yyyymmddThhmmssZ (ISO 8601)\n'
b

In [32]:
col_name_no2 = ['Time','doy_jan2000','duration_time','sza','saz','lza','laz',
                'no2_du','unc_no2_du','m_no2','diff_corr_no2','qa_no2','sumi2_no2_dq1','sumi_no2_dq2',
                'fit_resi','resi_rms','expmeas_resi_rms','expinst_resi_rms','mean_val','Pres_mbar','dat_proc',
            'cal_version','cal_val_date','L2fit_QA','sumi2_L2fit_dq1','sumi2_L2fit_dq2','L1_QA','sumi2_L1_dq1',
            'sumi2_L1_dq2','effT_wav','resi_straylight','L1_wv_shift','wv_shift_fit','int','darkcount','pos_filter1','pos_filter2']

In [33]:
col_name_no2_decription = {}
j = 0
header_start = False
for i,h in enumerate(no2header):
    if h.strip().startswith(b'----'): 
        header_start = ~header_start
        continue
    if header_start:
        print(j,'\033[1m'+col_name_no2[j]+'\033[0m',h)
        col_name_no2_decription[col_name_no2[j]] = h
        j = j+1

0 [1mTime[0m b'Column 1: UT date and time for center of measurement, yyyymmddThhmmssZ (ISO 8601)\n'
1 [1mdoy_jan2000[0m b'Column 2: Fractional days since 1-Jan-2000 UT midnight for center of measurement\n'
2 [1mduration_time[0m b'Column 3: Effective duration of measurement in seconds\n'
3 [1msza[0m b'Column 4: Solar zenith angle for center of measurement in degree\n'
4 [1msaz[0m b'Column 5: Solar azimuth for center of measurement in degree, 0=north, increases clockwise\n'
5 [1mlza[0m b'Column 6: Lunar zenith angle for center of measurement in degree\n'
6 [1mlaz[0m b'Column 7: Lunar azimuth for center of measurement in degree, 0=north, increases clockwise\n'
7 [1mno2_du[0m b'Column 8: Nitrogen dioxide total vertical column amount [Dobson Units], -9e99=retrieval not successful\n'
8 [1munc_no2_du[0m b'Column 9: Uncertainty of nitrogen dioxide total vertical column amount [Dobson Units] based on measured uncertainty, -8=retrieval not successful, -1=cross section is zero i

In [34]:
no2data.seek(0)
pdno2 = pd.read_csv(no2data,encoding='unicode_escape',header=59,delimiter=' ',names=col_name_no2)

In [35]:
pdno2['datetime'] = pd.to_datetime(pdno2['Time'])

In [36]:
pdno2

Unnamed: 0,Time,doy_jan2000,duration_time,sza,saz,lza,laz,no2_du,unc_no2_du,m_no2,...,sumi2_L1_dq2,effT_wav,resi_straylight,L1_wv_shift,wv_shift_fit,int,darkcount,pos_filter1,pos_filter2,datetime
0,20191121T221239Z,7264.92546,19.47,66.03,215.76,89.76,276.72,0.44002,0.000918,2.444,...,0,25.68,1.00,-0.00199,-0.00149,8.4,433,3,4,2019-11-21 22:12:39+00:00
1,20191121T221407Z,7264.92648,21.01,66.20,216.08,90.05,276.93,0.44762,0.000932,2.461,...,0,25.68,1.00,-0.00151,-0.00081,8.5,429,3,4,2019-11-21 22:14:07+00:00
2,20191121T221532Z,7264.92746,19.35,66.37,216.40,90.32,277.13,0.43500,0.000917,2.477,...,0,25.68,1.00,-0.00142,-0.00097,8.5,429,3,4,2019-11-21 22:15:32+00:00
3,20191121T221659Z,7264.92847,20.78,66.54,216.72,90.60,277.34,0.44617,0.000913,2.493,...,2,25.68,0.93,-0.00136,-0.00088,8.6,0,3,4,2019-11-21 22:16:59+00:00
4,20191121T222242Z,7264.93244,19.78,67.23,217.97,91.70,278.16,0.44165,0.000996,2.564,...,0,25.68,1.00,-0.00140,-0.00117,8.7,424,3,4,2019-11-21 22:22:42+00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
551314,20220831T022335.4Z,8278.09972,18.37,87.67,279.15,66.40,239.43,0.26142,-3.000000,15.322,...,0,26.73,1.90,-0.02858,-0.00541,34.5,103,1,4,2022-08-31 02:23:35.400000+00:00
551315,20220831T022354.3Z,8278.09993,18.34,87.73,279.20,66.45,239.49,0.26419,-3.000000,15.488,...,0,26.73,1.95,-0.02251,-0.00519,34.5,103,1,4,2022-08-31 02:23:54.300000+00:00
551316,20220831T022635.4Z,8278.10180,49.16,88.25,279.60,66.91,239.99,0.28370,-3.000000,16.935,...,0,26.73,2.54,-0.02431,-0.02096,56.1,171,1,4,2022-08-31 02:26:35.400000+00:00
551317,20220831T022939.6Z,8278.10393,49.62,88.86,280.07,67.44,240.56,0.26629,-3.000000,18.572,...,0,26.73,3.61,0.00884,-0.05434,105.3,92,1,4,2022-08-31 02:29:39.600000+00:00


# Plot out data

## Plot time series

In [236]:
plt.figure()
plt.plot(s['time'],s['no2DU'],'.')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x7fad9c868f70>]

In [132]:
plt.figure()
plt.plot(s['time'],s['no2DU'],'.',label='4STAR')
plt.plot(s2['time'],s2['no2DU'],'+',label='4STAR pre')
plt.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x7fad842c3f70>

In [237]:
plt.figure()
plt.plot(pdno2['datetime'],pdno2['no2_du'],'.',label='Pandora')
plt.plot(s['time'],s['no2DU'],'.',label='4STAR')
plt.ylim(0,1)
plt.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x7fad9c84d790>

In [43]:
plt.figure()
plt.plot(pdo3['datetime'],pdo3['o3_du'],'.',label='Pandora')
plt.plot(s['time'],s['o3DU'],'.',label='4STAR')
plt.ylim(0,500)
plt.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x7f73008a8a00>

## Match in time to compare each measurement

### Time delta fast version - but buggy?

In [95]:
time_diff = pdno2['datetime'].dt.tz_localize(None).to_numpy()-np.append(s['time'],s['time'][np.zeros((len(pdno2['datetime'])-len(s['time']),1)).astype(int)])

In [126]:
time_diff = time_diff.astype(float)/1E9 #convert to seconds

In [130]:
imatch = time_diff<600

In [165]:
plt.figure()
plt.plot(pdno2['no2_du'][imatch],s_no2DU[imatch[0:len(s['no2DU'])]],'.')


<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x7fad7d3751c0>]

In [117]:
imatch.sum()

5990

### Match in time, slow loop version

In [178]:
def get_unixtime(dt64):
    return dt64.astype('datetime64[ms]').astype('float')/1000.0

In [168]:
t = s['time'][10000]

In [180]:
get_unixtime(t)

1518206526.062

In [170]:
t0 = pdno2['datetime'].dt.tz_localize(None).to_numpy()

In [179]:
get_unixtime(t0[10000])

1577039984.0

In [184]:
from tqdm import tqdm_notebook as tqdm 

In [199]:
np.diff(unix_t[190000:191000:100])

array([110.00099993, 110.01600003, 109.98300004, 110.00099993,
       110.        , 109.99900007, 110.10899997, 110.00099993,
       110.        ])

In [205]:
time_span = 600 #in seconds
match_pandora_no2 = []
match_4star_no2 = []

unix_t0 = get_unixtime(t0)
unix_t = get_unixtime(s['time'])

pbar = tqdm(total = len(unix_t))

for t in unix_t[::100]:
    imatch = (unix_t0<=t+time_span) & (unix_t0>=t-time_span)
    if any(imatch):
        match_pandora_no2.append(np.nanmean(pdno2['no2_du'][imatch]))
        match_4star_no2.append(np.nanmean(s_no2DU[(unix_t<=t+time_span) & (unix_t>=t-time_span)]))
    else:
        match_pandora_no2.append(np.nan)
        match_4star_no2.append(np.nan)
    pbar.update(100)
match_pandora_no2 = np.array(match_pandora_no2)
match_4star_no2 = np.array(match_4star_no2)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  pbar = tqdm(total = len(unix_t))


  0%|          | 0/505275 [00:00<?, ?it/s]

In [228]:
days_since_2020 = (unix_t-get_unixtime(np.datetime64('2020-01-01T00:00:00.0')))/60.0/60.0/24.0

In [234]:
plt.figure()
plt.plot(match_pandora_no2,match_4star_no2,'.',label='averaged 10minute\n'+pu.stats_label(match_pandora_no2,match_4star_no2))
plt.plot([0,1],[0,1],'--',color='lightgrey',label='1:1')
pu.plot_lin(match_pandora_no2,match_4star_no2,x_err=match_pandora_no2*0.2,y_err=match_4star_no2*0.2,use_method='york')
plt.legend()
plt.scatter(match_pandora_no2,match_4star_no2,s=20,c=days_since_2020[::100],marker='o',zorder=10)
plt.colorbar(label='Days since 2020-01-01 [day]')
plt.ylabel('4STAR NO2 [DU]')
plt.xlabel('Pandora NO2 [DU]')
plt.title('NASA Ames rooftop comparison of 4STAR and Pandora NO2\nfrom 2020 to 2022')
plt.ylim(-0.05,1)
plt.xlim(-0.05,1)
plt.savefig(fp+'4STAR_pandora_NO2_comparison.png',dpi=600,transparent=True)

<IPython.core.display.Javascript object>

## one to one plot, matching measurement times

### Ozone

In [48]:
spd = pd.DataFrame(s)
spd['datetime'] = pd.to_datetime(s['time'],utc=True)
spd2 = spd.sort_values(by='datetime')

In [49]:
fullpd = pd.merge_asof(spd2,pdo3,direction='nearest')

In [50]:
fullpd['o3DU'][fullpd['o3DU']<0.0] = np.nan

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fullpd['o3DU'][fullpd['o3DU']<0.0] = np.nan


In [51]:
fullpd['o3_du'][fullpd['qa_o3']>100] = np.nan

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fullpd['o3_du'][fullpd['qa_o3']>100] = np.nan


In [52]:
fullpd['o3_du'] = fullpd['o3_du']/10.0

In [53]:
plt.figure()
plt.plot(fullpd['datetime'],fullpd['o3_du'],'.',label='Pandora')
plt.plot(fullpd['datetime'],fullpd['o3DU'],'.',label='4STAR')
plt.ylim(0,500)
plt.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x7f72ff96fdf0>

In [337]:
import plotting_utils as pu
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [343]:
fl = np.isfinite(fullpd['o3_du']) & np.isfinite(fullpd['o3DU'])

In [364]:
r = st.spearmanr(fullpd['o3_du'],fullpd['o3DU'],nan_policy='omit')

In [367]:
r.correlation

0.5375012765846725

In [372]:
rmse = mean_squared_error(fullpd['o3_du'][fl],fullpd['o3DU'][fl],squared=True)
rmse

In [375]:
mae = mean_absolute_error(fullpd['o3_du'][fl],fullpd['o3DU'][fl])
mae

In [451]:
import importlib
importlib.reload(pu)

<module 'plotting_utils' from '/home/sam/python_codes/plotting_utils.py'>

In [387]:
from Sp_parameters import doublenanmask, nanmasked
from scipy import odr
from linfit import linfit

In [388]:
x = fullpd['o3_du']
y = fullpd['o3DU']
x_err = fullpd['unc_o3_du']
y_err = fullpd['o3resiDU']

In [389]:
xn,yn,mask = doublenanmask(x,y,return_mask=True)

In [390]:
model = odr.Model(lin)
dat = odr.RealData(xn,yn,sx=x_err[mask],sy=y_err[mask])

In [391]:
c,cm = linfit(xn,yn)
p = np.array([c[1],c[0]])

In [417]:
imask = mask & (y_err>0)

In [435]:
ri = np.corrcoef(x_err[imask],y_err[imask])[0,1]

In [442]:
a_bivar, b_bivar, S, cov = pu.bivariate_fit(xn,yn,x_err[imask],y_err[imask],b0=p[1],ri=ri**2)

In [443]:
a_bivar

-239.62155478503934

In [444]:
b_bivar

1.9896480005361032

In [445]:
S

2784015541.484368

In [449]:
np.sqrt(cov[1,1])*b_bivar

0.03470018645758508

In [441]:
plt.figure()
plt.errorbar(fullpd['o3_du'],fullpd['o3DU'],xerr=fullpd['unc_o3_du'],yerr=fullpd['o3resiDU'],marker='.',ls='')
plt.plot(fullpd['o3_du'],fullpd['o3DU'],'.',
         label='all data\nR$_{{spearman}}$={:1.3f}\nRMSE={:1.3f}\nMAE={:1.3f}'.format(r.correlation,rmse,mae))
plt.plot([0,500],[0,500],'--',color='lightgrey',label='1:1')
pu.plot_lin(fullpd['o3_du'],fullpd['o3DU'],x_err=fullpd['unc_o3_du'],y_err=fullpd['o3resiDU'],use_method='york')
plt.legend()
plt.ylabel('4STAR Ozone [DU]')
plt.xlabel('Pandora Ozone [DU]')
plt.ylim(200,400)
plt.xlim(200,400)
plt.savefig(fp+'Winter_2022/plots/4STAR_to_Pandora_O3.png',dpi=600,transparent=True)

<IPython.core.display.Javascript object>

In [401]:
plt.figure()
plt.errorbar(fullpd['o3_du'],fullpd['o3DU'],xerr=fullpd['unc_o3_du'],yerr=fullpd['o3resiDU'],marker='.',ls='')
plt.plot(fullpd['o3_du'],fullpd['o3DU'],'.',
         label='all data\nR$_{{spearman}}$={:1.3f}\nRMSE={:1.3f}\nMAE={:1.3f}'.format(r.correlation,rmse,mae))
plt.plot([0,500],[0,500],'--',color='lightgrey',label='1:1')
pu.plot_lin(fullpd['o3_du'],fullpd['o3DU'],x_err=fullpd['unc_o3_du'],y_err=fullpd['o3resiDU'],use_method='york')
plt.legend()
plt.ylabel('4STAR Ozone [DU]')
plt.xlabel('Pandora Ozone [DU]')
plt.ylim(200,400)
plt.xlim(200,400)
plt.savefig(fp+'Winter_2022/plots/4STAR_to_Pandora_O3.png',dpi=600,transparent=True)

<IPython.core.display.Javascript object>

### NO2

In [40]:
spd = pd.DataFrame(s)
spd['datetime'] = pd.to_datetime(s['time'],utc=True)
spd2 = spd.sort_values(by='datetime')

In [41]:
fullpdn = pd.merge_asof(spd2,pdno2,direction='nearest')

In [52]:

fullpdn['no2DU'][fullpdn['no2DU']<0.02] = np.nan
fullpdn['no2DU'][fullpdn['no2DU']>0.8] = np.nan

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fullpdn['no2DU'][fullpdn['no2DU']<0.02] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fullpdn['no2DU'][fullpdn['no2DU']>0.8] = np.nan


In [43]:
fullpdn['no2_du'][fullpdn['qa_no2']>10] = np.nan

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fullpdn['no2_du'][fullpdn['qa_no2']>10] = np.nan


In [44]:
plt.figure()
plt.plot(fullpdn['datetime'],fullpdn['no2_du'],'.',label='Pandora')
plt.plot(fullpdn['datetime'],fullpdn['no2DU'],'.',label='4STAR')
plt.ylim(0,10)
plt.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x7fadc14a9730>

In [54]:
fln = np.isfinite(fullpdn['no2_du']) & np.isfinite(fullpdn['no2DU'])

In [57]:
plt.figure()
plt.plot(fullpdn['no2_du'],fullpdn['no2DU'],'.',
         label='all data\nR={:1.3f}'.format(np.corrcoef(fullpdn['no2_du'][fln],fullpdn['no2DU'][fln])[0,1]))
plt.plot([0,10],[0,10],'--',color='lightgrey',label='1:1')
pu.plot_lin(fullpdn['no2_du'],fullpdn['no2DU'])
plt.legend()
plt.ylabel('4STAR NO2 [DU]')
plt.xlabel('Pandora NO2 [DU]')
plt.ylim(0,0.8)
plt.xlim(0,0.8)

#plt.savefig(fp+'Winter_2022/plots/4STAR_to_Pandora_NO2.png',dpi=600,transparent=True)

<IPython.core.display.Javascript object>

(0.0, 0.8)

In [55]:
plt.figure()
plt.plot(fullpdn['no2_du'],fullpdn['no2DU'],'.',
         label='all data\n'+pu.stats_label(fullpdn['no2_du'],fullpdn['no2DU']))
plt.plot([0,10],[0,10],'--',color='lightgrey',label='1:1')
pu.plot_lin(fullpdn['no2_du'],fullpdn['no2DU'],x_err=fullpdn['unc_no2_du'],y_err=fullpdn['no2resiDU'],use_method='york')
plt.legend()
plt.ylabel('4STAR NO2 [DU]')
plt.xlabel('Pandora NO2 [DU]')
plt.ylim(0,2)
plt.xlim(0,2)

#plt.savefig(fp+'Winter_2022/plots/4STAR_to_Pandora_NO2.png',dpi=600,transparent=True)

<IPython.core.display.Javascript object>

(0.0, 2.0)