In [None]:
import glob 
import os 
import pandas as pd 
import numpy as np 
import rasterio
import pickle
from tqdm.notebook import tqdm 
from datetime import datetime

## Create dataset for DDM 

DDM is a temporal model, meaning it should be trained to a specific period of time. Because our imagery cadence is inconsistent (e.g., we don't reliably haev 1 image per day or per week for each location), we modify the dataset to be organized by week. Each location should have one image per week, 52 images in total. If a location has more than one image on a given week, we randomly select one to use. If it has no images on a given week, we use the image from the previous week. 

The resulting dataset allows to us to train a DDM model for each week of the year. 

In [None]:
truth_df = pd.read_csv('../local/truth.csv')
truth_df.head()

In [None]:
def extract_week(img):
    """Extract the week from the image name"""
    
    dat = img.split(os.sep)[-1].split('_')[0]
    dat = datetime.strptime(dat, '%Y%m%d')
    week = dat.isocalendar()[1]
    month = dat.month
    return week, month


In [None]:
time_periods = {}

for i, row in tqdm(truth_df.iterrows()): 
    
    if row.label == 2:
        continue 
        
    time_periods[row.location_name] = {f"week_{i}" : {} for i in range(1,53)}
    
    ims = sorted(glob.glob(os.path.join(row.dir, '*.tif')))
    im0 = ims[0]
    
    times = [extract_week(im) for im in ims]
    weeks = [w for w, _ in times]
    
    weeks, inds = np.unique(weeks, return_index=True)
    ims = np.array(ims)[inds]
        
    if weeks[0] != 1: 
        weeks = np.insert(weeks, 0, 1)
        ims = np.insert(ims, 0, im0)
            
    for i in range(len(ims)):
        
        with rasterio.open(ims[i]) as src:
            b, g, r, _ = src.read()

        end = 53 if i == len(ims)-1 else weeks[i+1]
        for w in range(weeks[i], end):
            
            time_periods[row.location_name][f"week_{w}"] = {
                "r": r,
                "g": g,
                "b": b
            }
            
    time_periods[row.location_name]['images'] = ims

    


In [None]:
with open('time_periods.p', 'wb') as f: 
    pickle.dump(time_periods, f)

In [None]:
np.unique(weeks)