In [None]:
import numpy as np
import os
import ast
import pickle as pkl
import pandas as pd
import yaml
import json
from pathlib import Path
from tqdm import tqdm
import matplotlib.pyplot as plt

In [None]:
MERGED_DIR = Path('data/merged')
SITES = os.listdir(MERGED_DIR)
MODIS_A4_DIR = Path('data/raw/modis_a4')
MODIS_A2_DIR = Path('data/raw/modis_a2')

In [3]:
# MCD42A2 water cover map - binarize
water_dict = {
    0: 1, # shallow ocean
    1: 0, # land
    2: 0, # ocean coastlines and lake shorelines
    3: 1, # shallow inland water
    4: 1, # ephemeral water
    5: 1, # deep inland water
    6: 1, # moderate or continental ocean
    7: 1, # deep ocean
    255: 0 # fill value, treat as land for simplicity
}

# For all MODIS bands, we're treating -1 as a fill value
def clean_a4_data(arr):
    arr = np.where((arr > 30000) | (arr < 0), -10000, arr)
    arr = np.where(arr > 10000, 10000, arr)
    arr = arr / 10000.0
    return arr[:,1:9,1:9].astype(np.float32)

def clean_a2_data(arr):
    # Snow: 0 = no snow, 1 = snow, 255 = fill
    snow_arr = np.where((arr[0] == 255), -1, arr[0]).astype(np.float32)
    water_arr = np.vectorize(water_dict.get)(arr[2]).astype(np.float32)
    return np.stack((snow_arr, water_arr), axis=0)[:,1:9,1:9]


In [4]:
sites = os.listdir(MERGED_DIR)
for site in tqdm(sites):
    a2_file = MODIS_A2_DIR / f'{site}.pkl'
    a4_file = MODIS_A4_DIR / f'{site}.pkl'
    modis_processed = {}
    if os.path.exists(MERGED_DIR / site / 'modis_a2.pkl'):
        os.remove(MERGED_DIR / site / 'modis_a2.pkl', )
    if os.path.exists(MERGED_DIR / site / 'modis_a4.pkl'):
        os.remove(MERGED_DIR / site / 'modis_a4.pkl')

    if os.path.exists(a2_file) and os.path.exists(a4_file):
        with open(a2_file, 'rb') as f:
            a2_raw = pkl.load(f)
        with open(a4_file, 'rb') as f:
            a4_raw = pkl.load(f)
        for ts in a2_raw['pixel_values'].keys():
            a4_pixels = clean_a4_data(a4_raw['pixel_values'][ts])
            a2_pixels = clean_a2_data(a2_raw['pixel_values'][ts])
            modis_processed[ts] = np.concatenate((a4_pixels, a2_pixels), axis=0)
    
    with open(MERGED_DIR / site / 'modis.pkl', 'wb') as f:
        pkl.dump(modis_processed, f)

  0%|          | 0/417 [00:00<?, ?it/s]

100%|██████████| 417/417 [01:01<00:00,  6.75it/s]


# Tarring it up
# starting in the data dir
mkdir carbonsense_v2
ln -s ../merged carbonsense_v2/site_data
ln -s ../phenocam carbonsense_v2/phenocam
tar hcvf carbonsense_v2.tar carbonsense_v2