# 1 Importing necessary libraries

In [10]:
import netCDF4 as nc  # needed to read TEMPO data

import os
import sys
import glob

from scipy import stats  # needed for linear regression analysis
import statsmodels.api as sm

import requests  # needed to search for and download Pandora data
import codecs  # needed to read Pandora data
import numpy as np

from urllib.request import urlopen  # needed to search for and download Pandora data
from pathlib import Path  # needed to check whether a needed data file is already downloaded
from datetime import datetime  # needed to work with time in plotting time series

# 2 Defining functions to work with Pandora and TEMPO data

## 2.1 functions to work with Pandora web site and original data

### 2.1.1 function creating the list of available Pandora sites

In [2]:
# function read_pandora_web returns the list of available Pandora sites
def read_pandora_web():
    url = "https://data.pandonia-global-network.org/"
    page = urlopen(url)
    html_bytes = page.read()
    html = html_bytes.decode("utf-8")

    big_line = str(html)
    lines = big_line.split("\n")

    ref_lines = [i for i in lines if "href" in i]
    refs = []
    for line in ref_lines:
        pos1 = line.find('"')
        pos2 = line.rfind('"')
        if pos1 > 0 and pos2 > pos1 and line[pos2 - 1] == "/" and line[pos1 + 1] == ".":
            refs.append(line[pos1 + 3 : pos2 - 1])

    return refs

### 2.1.2 functions allowing user to choose a Pandora site of interest

In [3]:
# function check_site checks whether user entered site is in the list of available Pandora sites
def check_site(site_name, refs):
    site_list = []
    for line in refs:
        if site_name in line:
            site_list.append(line)

    return site_list


# function take_pandora_sites takes user input and checks whether the site is in the list of available Pandora sites
def take_pandora_sites(refs):
    print("please select a Pandora site name from the list")
    for ref in refs:
        print(ref)

    answer = "y"
    while answer == "y":
        site_name = input("Enter a name of a Pandora site: ")
        print(site_name)
        site_list = check_site(site_name, refs)
        site_num = len(site_list)
        if site_num == 0:
            print("site ", site_name, "was not found")
            continue

        if site_num > 1:
            print("there are ", site_num, " site names, select one from")
            for site in site_list:
                print(site)

            site_name = input("Enter a name of a Pandora site: ")
            if site_list.count(site_name) != 1:
                print("Entered name is not the exact match of one of the following sites")
                for site in site_list:
                    print(site)
                print("program terminated")
                sys.exit()

            for site in site_list:
                if site == site_name:
                    pandora_site = site_name
                    print("site ", site_name, "was found and added to the list of sites ")
                    break

        if site_num == 1:
            pandora_site = site_list[0]
            print("site ", site_list[0], "was found and added to the list of sites ")

        answer = "n"

    return pandora_site

### 2.1.3 function creating the list of links to tropospheric NO2 data files at the selected Pandora sites and downloading the data files

In [4]:
# Pandora site may have several instruments. In this case each instrument has its own directory.
# However, the most recent version of the troposperic NO2 data, rnvh3p1-8, is available only in one of these directories.
# The function creates all possible links, but some of them may be non-existing. This is checked and cleared later.
def instrument_path(site):
    # function instrument_path returns links to possible Pandora NO2 retrievals files
    url = "https://data.pandonia-global-network.org/" + site + "/"
    page = urlopen(url)
    html_bytes = page.read()
    html = html_bytes.decode("utf-8")

    big_line = str(html)
    lines = big_line.split("\n")

    ref_lines = [i for i in lines if "href" in i]
    links = []
    for line in ref_lines:
        pos1 = line.find('"')
        pos2 = line.rfind('"')
        if (
            pos1 > 0
            and pos2 > pos1
            and line[pos2 - 1] == "/"
            and line[pos1 + 3 : pos1 + 10] == "Pandora"
        ):
            link = (
                url
                + line[pos1 + 3 : pos2]
                + "L2/"
                + line[pos1 + 3 : pos2 - 1]
                + "_"
                + site
                + "_L2_rnvh3p1-8.txt"
            )
            print(link)
            links.append(link)

    return links


# function downloading Pandora data file with given url
def download(url):
    response = requests.get(url)
    response_code = response.status_code

    file_name = url.split("/")[-1]

    if response_code == 200:
        content = response.content
        data_path = Path(file_name)
        data_path.write_bytes(content)

    return file_name, response_code

### 2.1.4 function reading Pandora NO2 data files rnvh3p1-8

In [5]:
# function reading Pandora NO2 data file rnvh3p1-8
#
# Below is the second version of function read_Pandora_NO2_rnvs3p1_8. It is to be used for the future validation efforts.
# The difference with the original version is that instead of discriminating negative values of the total NO2 column,
# it uses quality flags. It was previously found that QF == 0 does not occure often enough,
# so we will have to use QF == 10 (not-assured high quality).
#
# function read_Pandora_NO2_rnvh3p1_8 reads Pandora total NO2 column data files ending with rnvh3p1-8.
# Arguments:
# fname - name file to be read, string;
#
# if start_date is greater than end_date, the function returns a numpy array
# with shape (0, 8), otherwise it returns an 8-column numpy array
# with with columns being year, month, day, hour, minute, second of observation
# and retrieved total NO2 column along with its total uncertainty.
#
# NO2 column is in mol/m^2, so conversion to molecules/cm^2 is performed by
# multiplication by Avogadro constant, NA =  6.02214076E+23, and division by 1.E+4
def read_Pandora_NO2_rnvh3p1_8(fname):
    #  conversion_coeff = 6.02214076E+19 # Avogadro constant divided by 10000

    data = np.empty([0, 3])
    timestamp = np.empty([0], dtype=object)

    with codecs.open(fname, "r", encoding="utf-8", errors="ignore") as f:
        while True:
            # Get next line from file
            line = f.readline()

            if line.find("Short location name:") >= 0:
                loc_name = line.split()[-1]  # location name, to be used in the output file name
                print("location name ", loc_name)

            if line.find("Location latitude [deg]:") >= 0:
                lat = float(line.split()[-1])  # location latitude
                print("location latitude ", lat)

            if line.find("Location longitude [deg]:") >= 0:
                lon = float(line.split()[-1])  # location longitude
                print("location longitude ", lon)

            if line.find("--------") >= 0:
                break

        while True:
            # Get next line from file
            line = f.readline()

            if line.find("--------") >= 0:
                break

        while True:
            # now reading line with data
            line = f.readline()

            if not line:
                break

            line_split = line.split()

            QF = int(line_split[52])  # total column uncertainty
            Csfc = float(line_split[55])
            column = float(
                line_split[61]
            )  # Nitrogen dioxide tropospheric vertical column amount [moles per square meter]
            if not (QF == 0 or QF == 10) or (Csfc < 0.0)(column < 0.0):
                continue

            column_unc = float(
                line_split[62]
            )  # Independent uncertainty of nitrogen dioxide tropospheric vertical column amount [moles per square meter]
            data = np.append(data, [[Csfc, column, column_unc]], axis=0)
            timestamp = np.append(timestamp, [line_split[0]])

    return lat, lon, loc_name, timestamp, data

# 3 functions writing and reading Pandora NO2 data to and from netCDF files

## 3.1 function writing Pandora NO2 data to netCDF files

In [6]:
def write_Pandora_nc(lat, lon, POI_name, Pandora_dates, Pandora_data):
    current_datetime = datetime.now()
    fname = (
        f"{POI_name}_{lat:7.4f}N_{-lon:08.4f}W_created_"
        + current_datetime.strftime("%Y%m%dT%H%M%SZ")
        + ".nc"
    )

    dates = np.empty([len(Pandora_dates)], dtype=int)
    times = np.empty([len(Pandora_dates)], dtype=np.float32)
    for i, date in enumerate(Pandora_dates):
        dates[i] = int(date[0:8])
        time = np.float32(date[9:17])
        hh = int(time / 10000.0)
        mm = int(time / 100.0) - hh * 100
        ss = time - hh * 10000 - mm * 100
        times[i] = 3600.0 * hh + mm * 60.0 + ss

    ds = nc.Dataset(fname, "w", format="NETCDF4")
    ds.setncattr("place", POI_name)
    ds.setncattr("latitude, deg N", np.float32(lat))
    ds.setncattr("longitude, deg E", np.float32(lon))
    dim = ds.createDimension("ndata", len(Pandora_dates))

    var_new = ds.createVariable("date", "i4", dim, fill_value=-1, compression="zlib")
    var_new[:] = dates

    var_new = ds.createVariable("time", "f4", dim, fill_value=-1.0, compression="zlib")
    var_new[:] = times
    var_new.setncattr("meaning", "seconds from the beginning of the day, see date")
    var_new.units = "seconds"

    var_new = ds.createVariable("NO2_Csfc", "f4", dim, fill_value=-1.0, compression="zlib")
    var_new[:] = Pandora_data[:, 0]
    #    var_new.setncattr("unit", "moles/m^3")
    var_new.units = "moles/m^3"

    var_new = ds.createVariable("NO2_trop_column", "f4", dim, fill_value=-1.0, compression="zlib")
    var_new[:] = Pandora_data[:, 1]
    #    var_new.setncattr("unit", "moles/m^2")
    var_new.units = "moles/m^2"

    var_new = ds.createVariable(
        "NO2_trop_column_unc", "f4", dim, fill_value=-1.0, compression="zlib"
    )
    var_new[:] = Pandora_data[:, 2]
    #    var_new.setncattr("unit", "moles/m^2")
    var_new.units = "moles/m^2"

    ds.close()

## 3.2 function reading Pandora NO2 data from netCDF files

In [7]:
def read_Pandora_nc(fname):
    with nc.Dataset(fname) as ds:
        POI_name = ds.getncattr("place")
        lat = ds.getncattr("latitude, deg N")
        lon = ds.getncattr("longitude, deg E")
        print(POI_name, lat, lon, lat + 1, lon + 1)

        Csfc = ds.variables["NO2_Csfc"][:]

        NO2_trop_col = ds.variables["NO2_trop_column"][:]

        NO2_trop_col_unc = ds.variables["NO2_trop_column_unc"][:]

    return lat, lon, POI_name, Csfc, NO2_trop_col, NO2_trop_col_unc

# 4 Working with Pandora data

## 4.1 Discovering existing Pandora stations and selecting one of them

In [11]:
# Discovering existing Pandora stations and selecting one of them
# Discovering available Pandora site.
# Please bear in mind that some sites do not have tropospheric NO2 data files
print("gathering Pandora sites information")
refs = read_pandora_web()

answer = "Y"
while answer != "N":
    pandora_site = take_pandora_sites(refs)  # create list of Pandora sites of interest
    print("the following sites were selected")
    print(pandora_site)
    print("from the list of existing Pandora sites")

    # create a list of !AVAILABLE! Pandora files for the Pandora site
    pandora_files = []

    links = instrument_path(pandora_site)

    npfiles = 0

    for link in links:
        pandora_fname = link.split("/")[-1]

        # check if file exists in the local directory, if not download from Pandora site
        if not os.path.exists(pandora_fname):
            print(pandora_fname, " does not exit in local directory, downloading from the web")
            print(link)

            pandora_fname, response_code = download(link)

            if response_code == 200:
                print("Pandora L2 file ", pandora_fname, " has been downloaded")
                npfiles = npfiles + 1
                pandora_files.append(pandora_fname)
            else:
                print("Pandora L2 file ", link, " does not exist")

        else:
            print(pandora_fname, " exits in local directory")
            npfiles = npfiles + 1
            pandora_files.append(pandora_fname)

    if npfiles == 0:  # no files were found, STOP here
        print("no files were found for Pandora site ", pandora_site, "program terminated")
        continue
    if npfiles > 1:  # normally there should be only one file per site. if there are more - STOP
        print(
            "there are too many files for site ",
            pandora_site,
            "- STOP and investigate file names below, cycling to another Pandora site",
        )
        for pandora_fname in pandora_files:
            print(pandora_fname)
        continue

    pandora_file = pandora_files[0]
    # Reading Pandora file
    lat, lon, POI_name, Pandora_dates, Pandora_data = read_Pandora_NO2_rnvh3p1_8(pandora_file)
    if lat == -999.0:  # check whether the read was successful
        print("error reading pandora file ", pandora_file, "cycling to another Pandora site")
        continue

    # writhing Pandora netCDF4 file, only data
    write_Pandora_nc(lat, lon, POI_name, Pandora_dates, Pandora_data)

    answer = input("would you like to process another Pandora site? Y or N")

gathering Pandora sites information
please select a Pandora site name from the list
Agam
AldineTX
AliceSprings
Altzomoni
ArlingtonTX
Athens-NOA
AtlantaGA-Conyers
AtlantaGA-GATech
AtlantaGA-SouthDeKalb
AtlantaGA
AustinTX
Bandung
Bangkok
Banting
BayonneNJ
Beijing-RADI
BeltsvilleMD
Berlin
BlueHillMA
BostonMA
BoulderCO-NCAR
BoulderCO
Bremen
BristolPA
BronxNY
Brussels-Uccle
Bucharest
BuenosAires
BuffaloNY
Busan
Cabauw
Calakmul
calibrationfiles
CambridgeBay
CambridgeMA
CameronLA
CapeElizabethME
Cebu
ChapelHillNC
CharlesCityVA
ChelseaMA
ChiangMai
ChicagoIL
Cologne
ComodoroRivadavia
Cordoba
CornwallCT
CorpusChristiTX
Daegu
Dalanzadgad
DaNang
Davos
DearbornMI
DeBilt
Dhaka
Downsview
EastProvidenceRI
EdwardsCA
Egbert
EssexMD
Eureka-0PAL
Eureka-PEARL
FairbanksAK
Fajardo
FortMcKay
FortYatesND
Fukuoka
Gongju-KNU
Granada
GrandForksND
GreenbeltMD
Haldwani-ARIES
HamptonVA-HU
HamptonVA
HanoiCity-NCEM
Heidelberg
Helsinki
HoChiMinhCity-SCEM
HoustonTX-SanJacinto
HoustonTX
HuntsvilleAL
Ilocos
Incheon-ESC
In

Enter a name of a Pandora site:  Richm


Richm
site  RichmondCA was found and added to the list of sites 
the following sites were selected
RichmondCA
from the list of existing Pandora sites
https://data.pandonia-global-network.org/RichmondCA/Pandora52s1/L2/Pandora52s1_RichmondCA_L2_rnvh3p1-8.txt
Pandora52s1_RichmondCA_L2_rnvh3p1-8.txt  does not exit in local directory, downloading from the web
https://data.pandonia-global-network.org/RichmondCA/Pandora52s1/L2/Pandora52s1_RichmondCA_L2_rnvh3p1-8.txt
Pandora L2 file  Pandora52s1_RichmondCA_L2_rnvh3p1-8.txt  has been downloaded
location name  RichmondCA
location latitude  37.913
location longitude  -122.336


would you like to process another Pandora site? Y or N Y


please select a Pandora site name from the list
Agam
AldineTX
AliceSprings
Altzomoni
ArlingtonTX
Athens-NOA
AtlantaGA-Conyers
AtlantaGA-GATech
AtlantaGA-SouthDeKalb
AtlantaGA
AustinTX
Bandung
Bangkok
Banting
BayonneNJ
Beijing-RADI
BeltsvilleMD
Berlin
BlueHillMA
BostonMA
BoulderCO-NCAR
BoulderCO
Bremen
BristolPA
BronxNY
Brussels-Uccle
Bucharest
BuenosAires
BuffaloNY
Busan
Cabauw
Calakmul
calibrationfiles
CambridgeBay
CambridgeMA
CameronLA
CapeElizabethME
Cebu
ChapelHillNC
CharlesCityVA
ChelseaMA
ChiangMai
ChicagoIL
Cologne
ComodoroRivadavia
Cordoba
CornwallCT
CorpusChristiTX
Daegu
Dalanzadgad
DaNang
Davos
DearbornMI
DeBilt
Dhaka
Downsview
EastProvidenceRI
EdwardsCA
Egbert
EssexMD
Eureka-0PAL
Eureka-PEARL
FairbanksAK
Fajardo
FortMcKay
FortYatesND
Fukuoka
Gongju-KNU
Granada
GrandForksND
GreenbeltMD
Haldwani-ARIES
HamptonVA-HU
HamptonVA
HanoiCity-NCEM
Heidelberg
Helsinki
HoChiMinhCity-SCEM
HoustonTX-SanJacinto
HoustonTX
HuntsvilleAL
Ilocos
Incheon-ESC
Innsbruck
IowaCityIA-WHS
Islamabad-NUS

Enter a name of a Pandora site:  Wrigh


Wrigh
site  WrightwoodCA was found and added to the list of sites 
the following sites were selected
WrightwoodCA
from the list of existing Pandora sites
https://data.pandonia-global-network.org/WrightwoodCA/Pandora68s1/L2/Pandora68s1_WrightwoodCA_L2_rnvh3p1-8.txt
Pandora68s1_WrightwoodCA_L2_rnvh3p1-8.txt  does not exit in local directory, downloading from the web
https://data.pandonia-global-network.org/WrightwoodCA/Pandora68s1/L2/Pandora68s1_WrightwoodCA_L2_rnvh3p1-8.txt
Pandora L2 file  Pandora68s1_WrightwoodCA_L2_rnvh3p1-8.txt  has been downloaded
location name  WrightwoodCA
location latitude  34.3819
location longitude  -117.6813


would you like to process another Pandora site? Y or N Y


please select a Pandora site name from the list
Agam
AldineTX
AliceSprings
Altzomoni
ArlingtonTX
Athens-NOA
AtlantaGA-Conyers
AtlantaGA-GATech
AtlantaGA-SouthDeKalb
AtlantaGA
AustinTX
Bandung
Bangkok
Banting
BayonneNJ
Beijing-RADI
BeltsvilleMD
Berlin
BlueHillMA
BostonMA
BoulderCO-NCAR
BoulderCO
Bremen
BristolPA
BronxNY
Brussels-Uccle
Bucharest
BuenosAires
BuffaloNY
Busan
Cabauw
Calakmul
calibrationfiles
CambridgeBay
CambridgeMA
CameronLA
CapeElizabethME
Cebu
ChapelHillNC
CharlesCityVA
ChelseaMA
ChiangMai
ChicagoIL
Cologne
ComodoroRivadavia
Cordoba
CornwallCT
CorpusChristiTX
Daegu
Dalanzadgad
DaNang
Davos
DearbornMI
DeBilt
Dhaka
Downsview
EastProvidenceRI
EdwardsCA
Egbert
EssexMD
Eureka-0PAL
Eureka-PEARL
FairbanksAK
Fajardo
FortMcKay
FortYatesND
Fukuoka
Gongju-KNU
Granada
GrandForksND
GreenbeltMD
Haldwani-ARIES
HamptonVA-HU
HamptonVA
HanoiCity-NCEM
Heidelberg
Helsinki
HoChiMinhCity-SCEM
HoustonTX-SanJacinto
HoustonTX
HuntsvilleAL
Ilocos
Incheon-ESC
Innsbruck
IowaCityIA-WHS
Islamabad-NUS

Enter a name of a Pandora site:  Manh


Manh
there are  2  site names, select one from
ManhattanKS
ManhattanNY-CCNY


Enter a name of a Pandora site:  ManhattanNY-CCNY


site  ManhattanNY-CCNY was found and added to the list of sites 
the following sites were selected
ManhattanNY-CCNY
from the list of existing Pandora sites
https://data.pandonia-global-network.org/ManhattanNY-CCNY/Pandora135s1/L2/Pandora135s1_ManhattanNY-CCNY_L2_rnvh3p1-8.txt
Pandora135s1_ManhattanNY-CCNY_L2_rnvh3p1-8.txt  does not exit in local directory, downloading from the web
https://data.pandonia-global-network.org/ManhattanNY-CCNY/Pandora135s1/L2/Pandora135s1_ManhattanNY-CCNY_L2_rnvh3p1-8.txt
Pandora L2 file  Pandora135s1_ManhattanNY-CCNY_L2_rnvh3p1-8.txt  has been downloaded
location name  ManhattanNY-CCNY
location latitude  40.8153
location longitude  -73.9505


would you like to process another Pandora site? Y or N Y


please select a Pandora site name from the list
Agam
AldineTX
AliceSprings
Altzomoni
ArlingtonTX
Athens-NOA
AtlantaGA-Conyers
AtlantaGA-GATech
AtlantaGA-SouthDeKalb
AtlantaGA
AustinTX
Bandung
Bangkok
Banting
BayonneNJ
Beijing-RADI
BeltsvilleMD
Berlin
BlueHillMA
BostonMA
BoulderCO-NCAR
BoulderCO
Bremen
BristolPA
BronxNY
Brussels-Uccle
Bucharest
BuenosAires
BuffaloNY
Busan
Cabauw
Calakmul
calibrationfiles
CambridgeBay
CambridgeMA
CameronLA
CapeElizabethME
Cebu
ChapelHillNC
CharlesCityVA
ChelseaMA
ChiangMai
ChicagoIL
Cologne
ComodoroRivadavia
Cordoba
CornwallCT
CorpusChristiTX
Daegu
Dalanzadgad
DaNang
Davos
DearbornMI
DeBilt
Dhaka
Downsview
EastProvidenceRI
EdwardsCA
Egbert
EssexMD
Eureka-0PAL
Eureka-PEARL
FairbanksAK
Fajardo
FortMcKay
FortYatesND
Fukuoka
Gongju-KNU
Granada
GrandForksND
GreenbeltMD
Haldwani-ARIES
HamptonVA-HU
HamptonVA
HanoiCity-NCEM
Heidelberg
Helsinki
HoChiMinhCity-SCEM
HoustonTX-SanJacinto
HoustonTX
HuntsvilleAL
Ilocos
Incheon-ESC
Innsbruck
IowaCityIA-WHS
Islamabad-NUS

Enter a name of a Pandora site:  Wash


Wash
site  WashingtonDC was found and added to the list of sites 
the following sites were selected
WashingtonDC
from the list of existing Pandora sites
https://data.pandonia-global-network.org/WashingtonDC/Pandora140s1/L2/Pandora140s1_WashingtonDC_L2_rnvh3p1-8.txt
Pandora140s1_WashingtonDC_L2_rnvh3p1-8.txt  does not exit in local directory, downloading from the web
https://data.pandonia-global-network.org/WashingtonDC/Pandora140s1/L2/Pandora140s1_WashingtonDC_L2_rnvh3p1-8.txt
Pandora L2 file  Pandora140s1_WashingtonDC_L2_rnvh3p1-8.txt  has been downloaded
location name  WashingtonDC
location latitude  38.9218
location longitude  -77.0124


would you like to process another Pandora site? Y or N Y


please select a Pandora site name from the list
Agam
AldineTX
AliceSprings
Altzomoni
ArlingtonTX
Athens-NOA
AtlantaGA-Conyers
AtlantaGA-GATech
AtlantaGA-SouthDeKalb
AtlantaGA
AustinTX
Bandung
Bangkok
Banting
BayonneNJ
Beijing-RADI
BeltsvilleMD
Berlin
BlueHillMA
BostonMA
BoulderCO-NCAR
BoulderCO
Bremen
BristolPA
BronxNY
Brussels-Uccle
Bucharest
BuenosAires
BuffaloNY
Busan
Cabauw
Calakmul
calibrationfiles
CambridgeBay
CambridgeMA
CameronLA
CapeElizabethME
Cebu
ChapelHillNC
CharlesCityVA
ChelseaMA
ChiangMai
ChicagoIL
Cologne
ComodoroRivadavia
Cordoba
CornwallCT
CorpusChristiTX
Daegu
Dalanzadgad
DaNang
Davos
DearbornMI
DeBilt
Dhaka
Downsview
EastProvidenceRI
EdwardsCA
Egbert
EssexMD
Eureka-0PAL
Eureka-PEARL
FairbanksAK
Fajardo
FortMcKay
FortYatesND
Fukuoka
Gongju-KNU
Granada
GrandForksND
GreenbeltMD
Haldwani-ARIES
HamptonVA-HU
HamptonVA
HanoiCity-NCEM
Heidelberg
Helsinki
HoChiMinhCity-SCEM
HoustonTX-SanJacinto
HoustonTX
HuntsvilleAL
Ilocos
Incheon-ESC
Innsbruck
IowaCityIA-WHS
Islamabad-NUS

Enter a name of a Pandora site:  MexicoCity-UNAM


MexicoCity-UNAM
site  MexicoCity-UNAM was found and added to the list of sites 
the following sites were selected
MexicoCity-UNAM
from the list of existing Pandora sites
https://data.pandonia-global-network.org/MexicoCity-UNAM/Pandora142s1/L2/Pandora142s1_MexicoCity-UNAM_L2_rnvh3p1-8.txt
Pandora142s1_MexicoCity-UNAM_L2_rnvh3p1-8.txt  does not exit in local directory, downloading from the web
https://data.pandonia-global-network.org/MexicoCity-UNAM/Pandora142s1/L2/Pandora142s1_MexicoCity-UNAM_L2_rnvh3p1-8.txt
Pandora L2 file  Pandora142s1_MexicoCity-UNAM_L2_rnvh3p1-8.txt  has been downloaded
location name  MexicoCity-UNAM
location latitude  19.3262
location longitude  -99.1761


would you like to process another Pandora site? Y or N Y


please select a Pandora site name from the list
Agam
AldineTX
AliceSprings
Altzomoni
ArlingtonTX
Athens-NOA
AtlantaGA-Conyers
AtlantaGA-GATech
AtlantaGA-SouthDeKalb
AtlantaGA
AustinTX
Bandung
Bangkok
Banting
BayonneNJ
Beijing-RADI
BeltsvilleMD
Berlin
BlueHillMA
BostonMA
BoulderCO-NCAR
BoulderCO
Bremen
BristolPA
BronxNY
Brussels-Uccle
Bucharest
BuenosAires
BuffaloNY
Busan
Cabauw
Calakmul
calibrationfiles
CambridgeBay
CambridgeMA
CameronLA
CapeElizabethME
Cebu
ChapelHillNC
CharlesCityVA
ChelseaMA
ChiangMai
ChicagoIL
Cologne
ComodoroRivadavia
Cordoba
CornwallCT
CorpusChristiTX
Daegu
Dalanzadgad
DaNang
Davos
DearbornMI
DeBilt
Dhaka
Downsview
EastProvidenceRI
EdwardsCA
Egbert
EssexMD
Eureka-0PAL
Eureka-PEARL
FairbanksAK
Fajardo
FortMcKay
FortYatesND
Fukuoka
Gongju-KNU
Granada
GrandForksND
GreenbeltMD
Haldwani-ARIES
HamptonVA-HU
HamptonVA
HanoiCity-NCEM
Heidelberg
Helsinki
HoChiMinhCity-SCEM
HoustonTX-SanJacinto
HoustonTX
HuntsvilleAL
Ilocos
Incheon-ESC
Innsbruck
IowaCityIA-WHS
Islamabad-NUS

Enter a name of a Pandora site:  BoulderCO-


BoulderCO-
site  BoulderCO-NCAR was found and added to the list of sites 
the following sites were selected
BoulderCO-NCAR
from the list of existing Pandora sites
https://data.pandonia-global-network.org/BoulderCO-NCAR/Pandora204s1/L2/Pandora204s1_BoulderCO-NCAR_L2_rnvh3p1-8.txt
Pandora204s1_BoulderCO-NCAR_L2_rnvh3p1-8.txt  does not exit in local directory, downloading from the web
https://data.pandonia-global-network.org/BoulderCO-NCAR/Pandora204s1/L2/Pandora204s1_BoulderCO-NCAR_L2_rnvh3p1-8.txt
Pandora L2 file  Pandora204s1_BoulderCO-NCAR_L2_rnvh3p1-8.txt  has been downloaded
location name  BoulderCO-NCAR
location latitude  40.0375
location longitude  -105.242


would you like to process another Pandora site? Y or N Y


please select a Pandora site name from the list
Agam
AldineTX
AliceSprings
Altzomoni
ArlingtonTX
Athens-NOA
AtlantaGA-Conyers
AtlantaGA-GATech
AtlantaGA-SouthDeKalb
AtlantaGA
AustinTX
Bandung
Bangkok
Banting
BayonneNJ
Beijing-RADI
BeltsvilleMD
Berlin
BlueHillMA
BostonMA
BoulderCO-NCAR
BoulderCO
Bremen
BristolPA
BronxNY
Brussels-Uccle
Bucharest
BuenosAires
BuffaloNY
Busan
Cabauw
Calakmul
calibrationfiles
CambridgeBay
CambridgeMA
CameronLA
CapeElizabethME
Cebu
ChapelHillNC
CharlesCityVA
ChelseaMA
ChiangMai
ChicagoIL
Cologne
ComodoroRivadavia
Cordoba
CornwallCT
CorpusChristiTX
Daegu
Dalanzadgad
DaNang
Davos
DearbornMI
DeBilt
Dhaka
Downsview
EastProvidenceRI
EdwardsCA
Egbert
EssexMD
Eureka-0PAL
Eureka-PEARL
FairbanksAK
Fajardo
FortMcKay
FortYatesND
Fukuoka
Gongju-KNU
Granada
GrandForksND
GreenbeltMD
Haldwani-ARIES
HamptonVA-HU
HamptonVA
HanoiCity-NCEM
Heidelberg
Helsinki
HoChiMinhCity-SCEM
HoustonTX-SanJacinto
HoustonTX
HuntsvilleAL
Ilocos
Incheon-ESC
Innsbruck
IowaCityIA-WHS
Islamabad-NUS

Enter a name of a Pandora site:  Buff


Buff
site  BuffaloNY was found and added to the list of sites 
the following sites were selected
BuffaloNY
from the list of existing Pandora sites
https://data.pandonia-global-network.org/BuffaloNY/Pandora206s1/L2/Pandora206s1_BuffaloNY_L2_rnvh3p1-8.txt
Pandora206s1_BuffaloNY_L2_rnvh3p1-8.txt  does not exit in local directory, downloading from the web
https://data.pandonia-global-network.org/BuffaloNY/Pandora206s1/L2/Pandora206s1_BuffaloNY_L2_rnvh3p1-8.txt
Pandora L2 file  Pandora206s1_BuffaloNY_L2_rnvh3p1-8.txt  has been downloaded
location name  BuffaloNY
location latitude  43.0015
location longitude  -78.7869


would you like to process another Pandora site? Y or N Y


please select a Pandora site name from the list
Agam
AldineTX
AliceSprings
Altzomoni
ArlingtonTX
Athens-NOA
AtlantaGA-Conyers
AtlantaGA-GATech
AtlantaGA-SouthDeKalb
AtlantaGA
AustinTX
Bandung
Bangkok
Banting
BayonneNJ
Beijing-RADI
BeltsvilleMD
Berlin
BlueHillMA
BostonMA
BoulderCO-NCAR
BoulderCO
Bremen
BristolPA
BronxNY
Brussels-Uccle
Bucharest
BuenosAires
BuffaloNY
Busan
Cabauw
Calakmul
calibrationfiles
CambridgeBay
CambridgeMA
CameronLA
CapeElizabethME
Cebu
ChapelHillNC
CharlesCityVA
ChelseaMA
ChiangMai
ChicagoIL
Cologne
ComodoroRivadavia
Cordoba
CornwallCT
CorpusChristiTX
Daegu
Dalanzadgad
DaNang
Davos
DearbornMI
DeBilt
Dhaka
Downsview
EastProvidenceRI
EdwardsCA
Egbert
EssexMD
Eureka-0PAL
Eureka-PEARL
FairbanksAK
Fajardo
FortMcKay
FortYatesND
Fukuoka
Gongju-KNU
Granada
GrandForksND
GreenbeltMD
Haldwani-ARIES
HamptonVA-HU
HamptonVA
HanoiCity-NCEM
Heidelberg
Helsinki
HoChiMinhCity-SCEM
HoustonTX-SanJacinto
HoustonTX
HuntsvilleAL
Ilocos
Incheon-ESC
Innsbruck
IowaCityIA-WHS
Islamabad-NUS

Enter a name of a Pandora site:  Corp


Corp
site  CorpusChristiTX was found and added to the list of sites 
the following sites were selected
CorpusChristiTX
from the list of existing Pandora sites
https://data.pandonia-global-network.org/CorpusChristiTX/Pandora258s1/L2/Pandora258s1_CorpusChristiTX_L2_rnvh3p1-8.txt
Pandora258s1_CorpusChristiTX_L2_rnvh3p1-8.txt  does not exit in local directory, downloading from the web
https://data.pandonia-global-network.org/CorpusChristiTX/Pandora258s1/L2/Pandora258s1_CorpusChristiTX_L2_rnvh3p1-8.txt
Pandora L2 file  Pandora258s1_CorpusChristiTX_L2_rnvh3p1-8.txt  has been downloaded
location name  CorpusChristiTX
location latitude  27.715
location longitude  -97.3287


would you like to process another Pandora site? Y or N Y


please select a Pandora site name from the list
Agam
AldineTX
AliceSprings
Altzomoni
ArlingtonTX
Athens-NOA
AtlantaGA-Conyers
AtlantaGA-GATech
AtlantaGA-SouthDeKalb
AtlantaGA
AustinTX
Bandung
Bangkok
Banting
BayonneNJ
Beijing-RADI
BeltsvilleMD
Berlin
BlueHillMA
BostonMA
BoulderCO-NCAR
BoulderCO
Bremen
BristolPA
BronxNY
Brussels-Uccle
Bucharest
BuenosAires
BuffaloNY
Busan
Cabauw
Calakmul
calibrationfiles
CambridgeBay
CambridgeMA
CameronLA
CapeElizabethME
Cebu
ChapelHillNC
CharlesCityVA
ChelseaMA
ChiangMai
ChicagoIL
Cologne
ComodoroRivadavia
Cordoba
CornwallCT
CorpusChristiTX
Daegu
Dalanzadgad
DaNang
Davos
DearbornMI
DeBilt
Dhaka
Downsview
EastProvidenceRI
EdwardsCA
Egbert
EssexMD
Eureka-0PAL
Eureka-PEARL
FairbanksAK
Fajardo
FortMcKay
FortYatesND
Fukuoka
Gongju-KNU
Granada
GrandForksND
GreenbeltMD
Haldwani-ARIES
HamptonVA-HU
HamptonVA
HanoiCity-NCEM
Heidelberg
Helsinki
HoChiMinhCity-SCEM
HoustonTX-SanJacinto
HoustonTX
HuntsvilleAL
Ilocos
Incheon-ESC
Innsbruck
IowaCityIA-WHS
Islamabad-NUS

Enter a name of a Pandora site:  Hous


Hous
there are  2  site names, select one from
HoustonTX-SanJacinto
HoustonTX


Enter a name of a Pandora site:  HoustonTX-SanJacinto


site  HoustonTX-SanJacinto was found and added to the list of sites 
the following sites were selected
HoustonTX-SanJacinto
from the list of existing Pandora sites
https://data.pandonia-global-network.org/HoustonTX-SanJacinto/Pandora261s1/L2/Pandora261s1_HoustonTX-SanJacinto_L2_rnvh3p1-8.txt
Pandora261s1_HoustonTX-SanJacinto_L2_rnvh3p1-8.txt  does not exit in local directory, downloading from the web
https://data.pandonia-global-network.org/HoustonTX-SanJacinto/Pandora261s1/L2/Pandora261s1_HoustonTX-SanJacinto_L2_rnvh3p1-8.txt
Pandora L2 file  Pandora261s1_HoustonTX-SanJacinto_L2_rnvh3p1-8.txt  has been downloaded
location name  HoustonTX-SanJacinto
location latitude  29.579
location longitude  -95.2045


would you like to process another Pandora site? Y or N N


## selecting the latest Pandora netCDF files

In [14]:
#
# There can be several runs reading Pandora data. Only the latest one for each location is needed.
# Code below creates list of all .nc Pandora files and picks up the latest by their timestamps.
#

names = sorted(glob.glob("*_created_*.nc"))

name0 = names[0]
root0 = name0.split("_created_")[0]
time0 = datetime.strptime(name0.split("_created_")[-1][:-4], "%Y%m%dT%H%M%S")
unique_list = []
for name in names[1:]:
    root = name.split("_created_")[0]
    time = datetime.strptime(name.split("_created_")[-1][:-4], "%Y%m%dT%H%M%S")
    if root0 != root:
        unique_list.append(name0)
        name0 = name
        root0 = root
        time0 = time
    else:
        if time > time0:
            name0 = name
            time0 = time
            if len(unique_list) == 0:
                unique_list.append(name0)
            else:
                unique_list[-1] = name0

In [15]:
unique_list

['BoulderCO-NCAR_40.0375N_105.2420W_created_20250314T203547Z.nc',
 'BuffaloNY_43.0015N_078.7869W_created_20250314T203608Z.nc',
 'CorpusChristiTX_27.7150N_097.3287W_created_20250314T203637Z.nc',
 'HoustonTX-SanJacinto_29.5790N_095.2045W_created_20250314T203710Z.nc',
 'ManhattanNY-CCNY_40.8153N_073.9505W_created_20250314T203304Z.nc',
 'MexicoCity-UNAM_19.3262N_099.1761W_created_20250314T203445Z.nc',
 'RichmondCA_37.9130N_122.3360W_created_20250314T203211Z.nc',
 'WashingtonDC_38.9218N_077.0124W_created_20250314T203425Z.nc',
 'WrightwoodCA_34.3819N_117.6813W_created_20250314T203228Z.nc']

In [16]:
Csfc_all = np.empty(0)
NO2_trop_col_all = np.empty(0)
NO2_trop_col_unc_all = np.empty(0)

for fname in unique_list:
    lat, lon, POI_name, Csfc, NO2_trop_col, NO2_trop_col_unc = read_Pandora_nc(fname)
    print(lat, lon, POI_name)

    # accumulate observations in arrays
    Csfc_all = np.append(Csfc_all, Csfc)
    NO2_trop_col_all = np.append(NO2_trop_col_all, NO2_trop_col)
    NO2_trop_col_unc_all = np.append(NO2_trop_col_unc_all, NO2_trop_col_unc)

    # standard linear regression, no weights
    regress = stats.linregress(NO2_trop_col, Csfc)
    slope = regress.slope
    intercept = regress.intercept
    r2 = regress.rvalue**2
    stderr = regress.stderr
    intercept_stderr = regress.intercept_stderr
    print(
        f"\t# of points: {len(Csfc):3d}, slope: {slope: 8.2e} \u00b1 {stderr:8.2e}, intercept: {intercept: 8.2e} \u00b1 {intercept_stderr: 8.2e}, R^2 = {r2:7.4f}"
    )

    # lines below is preparation for the final weighted linear regression run.
    # their purpose is to ensure standard and weighted regressions match if weights are set to 1.
    weights = np.empty(len(Csfc))
    weights[:] = 1.0
    X = sm.add_constant(NO2_trop_col)
    wls_model = sm.WLS(Csfc, X, weights=weights)
    results = wls_model.fit()
    [intercept, slope] = results.params
    r2 = results.rsquared
    print(
        f"\t# of points: {len(Csfc):3d}, slope: {slope: 8.2e}, intercept: {intercept: 8.2e}, R^2 = {r2:7.4f}"
    )

    # weighted linear regression, weights are inversly proportional to the uncertainties of Pandora column retrievals,
    # i.e., smaller uncertainty gives greater weight to the observation.
    weights[:] = 1.0 / NO2_trop_col_unc
    X = sm.add_constant(NO2_trop_col)
    wls_model = sm.WLS(Csfc, X, weights=weights)
    results = wls_model.fit()
    [intercept, slope] = results.params
    r2 = results.rsquared
    print(
        f"\t# of points: {len(Csfc):3d}, slope: {slope: 8.2e}, intercept: {intercept: 8.2e}, R^2 = {r2:7.4f}"
    )

print("\nOverall statistics for ", len(unique_list), "Pandora stations")
# standard linear regression, no weights
regress = stats.linregress(NO2_trop_col_all, Csfc_all)
slope = regress.slope
intercept = regress.intercept
r2 = regress.rvalue**2
stderr = regress.stderr
intercept_stderr = regress.intercept_stderr
print(
    f"\t# of points: {len(Csfc_all):3d}, slope: {slope: 8.2e} \u00b1 {stderr:8.2e}, intercept: {intercept: 8.2e} \u00b1 {intercept_stderr: 8.2e}, R^2 = {r2:7.4f}"
)

# lines below is preparation for the final weighted linear regression run.
# their purpose is to ensure standard and weighted regressions match if weights are set to 1.
weights = np.empty(len(Csfc_all))
weights[:] = 1.0
X = sm.add_constant(NO2_trop_col_all)
wls_model = sm.WLS(Csfc_all, X, weights=weights)
results = wls_model.fit()
[intercept, slope] = results.params
r2 = results.rsquared
print(
    f"\t# of points: {len(Csfc_all):3d}, slope: {slope: 8.2e}, intercept: {intercept: 8.2e}, R^2 = {r2:7.4f}"
)

# weighted linear regression, weights are inversly proportional to the uncertainties of Pandora column retrievals,
# i.e., smaller uncertainty gives greater weight to the observation.
weights[:] = 1.0 / NO2_trop_col_unc_all
X = sm.add_constant(NO2_trop_col_all)
wls_model = sm.WLS(Csfc_all, X, weights=weights)
results = wls_model.fit()
[intercept, slope] = results.params
r2 = results.rsquared
print(
    f"\t# of points: {len(Csfc_all):3d}, slope: {slope: 8.2e}, intercept: {intercept: 8.2e}, R^2 = {r2:7.4f}"
)

BoulderCO-NCAR 40.0375 -105.242 41.0375 -104.242
40.0375 -105.242 BoulderCO-NCAR
	# of points: 18516, slope:  1.13e-03 ± 2.16e-05, intercept:  2.94e-08 ±  1.04e-09, R^2 =  0.1290
	# of points: 18516, slope:  1.13e-03, intercept:  2.94e-08, R^2 =  0.1290
	# of points: 18516, slope:  1.31e-03, intercept:  2.61e-08, R^2 =  0.1967
BuffaloNY 43.0015 -78.7869 44.0015 -77.7869
43.0015 -78.7869 BuffaloNY
	# of points: 3912, slope:  1.27e-03 ± 3.12e-05, intercept:  1.82e-08 ±  1.25e-09, R^2 =  0.2986
	# of points: 3912, slope:  1.27e-03, intercept:  1.82e-08, R^2 =  0.2986
	# of points: 3912, slope:  1.36e-03, intercept:  1.68e-08, R^2 =  0.3039
CorpusChristiTX 27.715 -97.3287 28.715 -96.3287
27.715 -97.3287 CorpusChristiTX
	# of points: 10263, slope:  1.37e-03 ± 2.30e-05, intercept:  2.38e-08 ±  9.27e-10, R^2 =  0.2551
	# of points: 10263, slope:  1.37e-03, intercept:  2.38e-08, R^2 =  0.2551
	# of points: 10263, slope:  1.45e-03, intercept:  2.09e-08, R^2 =  0.3118
HoustonTX-SanJacinto 29.579

# Final results

## In order to convert NO2 tropospheric column, moles/m^2, into near surface concentration of NO2, moles/m^3, the following transfer function needs to be used
## Csfc = 1.34e-3 x NO2_trop_col + 5.65e-08
## this statistics was obtained on March 14, 2025 and based on the most up-to-date Pandora data available from 8 Pandora stations

### since the regressions varies from station to station, adding/removing stations from this sort of analysis may significantly affect the final results