In [2]:
import re
from os.path import join
import numpy as np
import pandas as pd
import xarray as xr
from tqdm.notebook import tqdm
from joblib import Parallel, delayed
from glob import glob
import matplotlib.pyplot as plt

In [None]:
# location_dirs = glob("/home/patel_zeel/OpenAQ/PurpleAir/data/*")
csv_files = glob("/home/patel_zeel/OpenAQ/PurpleAir/data/**/*.csv.gz", recursive=True)

In [None]:
# combine all csv files into one csv with ubuntu command
# cat *.csv.gz > combined.csv.gz

In [2]:
def process_file(location_dir):
    csv_files = glob(join(location_dir, "**/*.csv.gz"), recursive=True)
    # print(f"Processing {len(csv_files)} files in {location_dir}")
    combo_df = pd.concat([pd.read_csv(file) for file in csv_files])
    combo_df["lat_lon"] = combo_df["lat"].astype(str) + "_" + combo_df["lon"].astype(str)
    assert len(combo_df.location_id.unique()) == 1, f"location_id assert: {combo_df.location_id.unique()}"
    location_id = combo_df.location_id.iloc[0]
    ds_list = []
    for lat_lon_suffix, lat_lon in enumerate(combo_df["lat_lon"].unique()):
        latlon_id = int(f"{location_id}{lat_lon_suffix}")
        df = combo_df[combo_df["lat_lon"] == lat_lon]
        timezone = re.findall(r"[-+]\d{2}:\d{2}", df["datetime"].iloc[0])[0]
        new_df = df.pivot_table(index=["datetime"], columns="parameter", values="value", aggfunc="mean")
        # new_df.index = pd.to_datetime(new_df.index, utc=True)
        # new_df.index = pd.to_datetime(new_df.index.strftime("%Y-%m-%d %H:%M:%S"))
        ds = new_df.to_xarray().expand_dims("latlon_id")

        # assert len(df.location.unique()) == 1, f"location assert: {df.location.unique()}"
        assert len(df.lat.unique()) == 1, df[["lat", "lon", "location_id", "location"]].drop_duplicates()
        assert len(df.lon.unique()) == 1, np.unique(df[["lat", "lon"]])
        ds["latlon_id"] = [latlon_id]
        ds.coords["location_id"] = ("latlon_id", [df.iloc[0].location_id])
        ds.coords["timezone"] = ("latlon_id", [timezone])
        ds.coords["location"] = ("latlon_id", [df.iloc[0].location])
        ds.coords["lat"] = ("latlon_id", [df.iloc[0].lat])
        ds.coords["lon"] = ("latlon_id", [df.iloc[0].lon])
        ds_list.append(ds)
    # ds = xr.concat(ds_list, dim="latlon_id")
    # convert all data vars to float 16 without a for loop
    # ds = ds.astype(np.float16)
    
    return ds_list
    
np.random.seed(0)
np.random.shuffle(location_dirs)
n_processes = 48
ds_list = Parallel(n_jobs=n_processes)(delayed(process_file)(location_dir) for location_dir in tqdm(location_dirs))

  0%|          | 0/22498 [00:00<?, ?it/s]



In [3]:
ds = xr.open_dataset("/home/patel_zeel/OpenAQ/PurpleAir/purpleair_california.nc")
ds

## Appendix

In [5]:
ds = xr.open_zarr("/tmp/zasdjsadelhi/cpcb_camx_ds.zarr.zip")
ds

Unnamed: 0,Array,Chunk
Bytes,46.02 kiB,46.02 kiB
Shape,"(63,)","(63,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,
"Array Chunk Bytes 46.02 kiB 46.02 kiB Shape (63,) (63,) Dask graph 1 chunks in 2 graph layers Data type",63  1,

Unnamed: 0,Array,Chunk
Bytes,46.02 kiB,46.02 kiB
Shape,"(63,)","(63,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,

Unnamed: 0,Array,Chunk
Bytes,4.43 kiB,4.43 kiB
Shape,"(63,)","(63,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,
"Array Chunk Bytes 4.43 kiB 4.43 kiB Shape (63,) (63,) Dask graph 1 chunks in 2 graph layers Data type",63  1,

Unnamed: 0,Array,Chunk
Bytes,4.43 kiB,4.43 kiB
Shape,"(63,)","(63,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,

Unnamed: 0,Array,Chunk
Bytes,504 B,504 B
Shape,"(63,)","(63,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 504 B 504 B Shape (63,) (63,) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",63  1,

Unnamed: 0,Array,Chunk
Bytes,504 B,504 B
Shape,"(63,)","(63,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,504 B,504 B
Shape,"(63,)","(63,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 504 B 504 B Shape (63,) (63,) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",63  1,

Unnamed: 0,Array,Chunk
Bytes,504 B,504 B
Shape,"(63,)","(63,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.18 kiB,4.18 kiB
Shape,"(63,)","(63,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,
"Array Chunk Bytes 4.18 kiB 4.18 kiB Shape (63,) (63,) Dask graph 1 chunks in 2 graph layers Data type",63  1,

Unnamed: 0,Array,Chunk
Bytes,4.18 kiB,4.18 kiB
Shape,"(63,)","(63,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 4.21 MiB 547.50 kiB Shape (8760, 63) (2190, 32) Dask graph 8 chunks in 2 graph layers Data type float64 numpy.ndarray",63  8760,

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 4.21 MiB 547.50 kiB Shape (8760, 63) (2190, 32) Dask graph 8 chunks in 2 graph layers Data type float64 numpy.ndarray",63  8760,

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 4.21 MiB 547.50 kiB Shape (8760, 63) (2190, 32) Dask graph 8 chunks in 2 graph layers Data type float64 numpy.ndarray",63  8760,

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 4.21 MiB 547.50 kiB Shape (8760, 63) (2190, 32) Dask graph 8 chunks in 2 graph layers Data type float64 numpy.ndarray",63  8760,

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 4.21 MiB 547.50 kiB Shape (8760, 63) (2190, 32) Dask graph 8 chunks in 2 graph layers Data type float64 numpy.ndarray",63  8760,

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 4.21 MiB 547.50 kiB Shape (8760, 63) (2190, 32) Dask graph 8 chunks in 2 graph layers Data type float64 numpy.ndarray",63  8760,

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 4.21 MiB 547.50 kiB Shape (8760, 63) (2190, 32) Dask graph 8 chunks in 2 graph layers Data type float64 numpy.ndarray",63  8760,

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 4.21 MiB 547.50 kiB Shape (8760, 63) (2190, 32) Dask graph 8 chunks in 2 graph layers Data type float64 numpy.ndarray",63  8760,

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 4.21 MiB 547.50 kiB Shape (8760, 63) (2190, 32) Dask graph 8 chunks in 2 graph layers Data type float64 numpy.ndarray",63  8760,

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 4.21 MiB 547.50 kiB Shape (8760, 63) (2190, 32) Dask graph 8 chunks in 2 graph layers Data type float64 numpy.ndarray",63  8760,

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 4.21 MiB 547.50 kiB Shape (8760, 63) (2190, 32) Dask graph 8 chunks in 2 graph layers Data type float64 numpy.ndarray",63  8760,

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 4.21 MiB 547.50 kiB Shape (8760, 63) (2190, 32) Dask graph 8 chunks in 2 graph layers Data type float64 numpy.ndarray",63  8760,

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.11 MiB,273.75 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.11 MiB 273.75 kiB Shape (8760, 63) (2190, 32) Dask graph 8 chunks in 2 graph layers Data type float32 numpy.ndarray",63  8760,

Unnamed: 0,Array,Chunk
Bytes,2.11 MiB,273.75 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.11 MiB,273.75 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.11 MiB 273.75 kiB Shape (8760, 63) (2190, 32) Dask graph 8 chunks in 2 graph layers Data type float32 numpy.ndarray",63  8760,

Unnamed: 0,Array,Chunk
Bytes,2.11 MiB,273.75 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 4.21 MiB 547.50 kiB Shape (8760, 63) (2190, 32) Dask graph 8 chunks in 2 graph layers Data type float64 numpy.ndarray",63  8760,

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 4.21 MiB 547.50 kiB Shape (8760, 63) (2190, 32) Dask graph 8 chunks in 2 graph layers Data type float64 numpy.ndarray",63  8760,

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 4.21 MiB 547.50 kiB Shape (8760, 63) (2190, 32) Dask graph 8 chunks in 2 graph layers Data type float64 numpy.ndarray",63  8760,

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 4.21 MiB 547.50 kiB Shape (8760, 63) (2190, 32) Dask graph 8 chunks in 2 graph layers Data type float64 numpy.ndarray",63  8760,

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 4.21 MiB 547.50 kiB Shape (8760, 63) (2190, 32) Dask graph 8 chunks in 2 graph layers Data type float64 numpy.ndarray",63  8760,

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 4.21 MiB 547.50 kiB Shape (8760, 63) (2190, 32) Dask graph 8 chunks in 2 graph layers Data type float64 numpy.ndarray",63  8760,

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 4.21 MiB 547.50 kiB Shape (8760, 63) (2190, 32) Dask graph 8 chunks in 2 graph layers Data type float64 numpy.ndarray",63  8760,

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 4.21 MiB 547.50 kiB Shape (8760, 63) (2190, 32) Dask graph 8 chunks in 2 graph layers Data type float64 numpy.ndarray",63  8760,

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 4.21 MiB 547.50 kiB Shape (8760, 63) (2190, 32) Dask graph 8 chunks in 2 graph layers Data type float64 numpy.ndarray",63  8760,

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 4.21 MiB 547.50 kiB Shape (8760, 63) (2190, 32) Dask graph 8 chunks in 2 graph layers Data type float64 numpy.ndarray",63  8760,

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 4.21 MiB 547.50 kiB Shape (8760, 63) (2190, 32) Dask graph 8 chunks in 2 graph layers Data type float64 numpy.ndarray",63  8760,

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 4.21 MiB 547.50 kiB Shape (8760, 63) (2190, 32) Dask graph 8 chunks in 2 graph layers Data type float64 numpy.ndarray",63  8760,

Unnamed: 0,Array,Chunk
Bytes,4.21 MiB,547.50 kiB
Shape,"(8760, 63)","(2190, 32)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [12]:
df = ds.sel(Timestamp="2023-12").isel(station=0).to_dataframe().reset_index()
df["diff"] = df["PM2.5"] - df["P25"]
df.corr(numeric_only=True)['diff']

AT            -0.006902
BP             0.068563
Benzene        0.255110
CO             0.391459
Eth-Benzene         NaN
MP-Xylene           NaN
NH3            0.426099
NO             0.403536
NO2            0.323284
NOx            0.436226
O Xylene            NaN
Ozone         -0.150440
P10           -0.149375
P25           -0.142300
PM10           0.772530
PM2.5          0.866415
RF                  NaN
RH             0.231162
SO2            0.074136
SR             0.087294
TOT-RF              NaN
Toluene        0.182347
VWS                 NaN
WD            -0.151958
WS                  NaN
Xylene         0.321350
latitude            NaN
longitude           NaN
diff           1.000000
Name: diff, dtype: float64