In [None]:
# default_exp historical

# Create Historical Dataset (2012 onwards)
>This module has functions to generate the burned areas predictions to an extended historical period.

**Note:** This module is currently being tested.

In [None]:
# export
import pandas as pd
import scipy.io as sio
import requests
import IPython
import matplotlib.pyplot as plt
from nbdev.imports import test_eq
import datetime
from geoget.download import run_all
from banet.core import filter_files, ls, Path, InOutPath, ProjectPath
from banet.geo import Region
from banet.data import *
from banet.predict import predict_time
Path.ls = ls

In [None]:
# hide
from nbdev.showdoc import show_doc
from nbdev.export import notebook2script
from IPython.core.debugger import set_trace

In [None]:
# export
class RunManager():
    def __init__(self, project_path:ProjectPath, region:str, times:pd.DatetimeIndex,
                 product:str='VIIRS750', max_size=2000):
        """
        project_path: banet.core.ProjectPath object
        region: name of the region
        times: dates for the first day of month for each month to use
        product: VIIRS750 or VIIRS375
        max_size: tile size to use on inference to reduce memory usage
        """
        self.path    = project_path
        self.times   = self.init_times(times)
        self.product = product
        self.region  = region
        self.max_size= 2000
        
    def init_times(self, times):
        tstart = times[0] - pd.Timedelta(days=15)
        tstart = pd.Timestamp(f'{tstart.year}-{tstart.month}-01')
        tend = times[-1] + pd.Timedelta(days=75)
        tend = pd.Timestamp(f'{tend.year}-{tend.month}-01') - pd.Timedelta(days=1)
        return pd.date_range(tstart, tend, freq='D')
        
    def check_data(self):
        "Check existing and missing files in dataset folder."
        times = self.times
        files, missing_files = [], []
        for t in times:
            tstr = t.strftime('%Y%m%d')
            file = self.path.dataset/f'{self.product}{self.region}_{tstr}.nc'
            if file.is_file():
                files.append(file)
            else:
                missing_files.append(file)
        return {'files': files, 'missing_files': missing_files}
    
    def get_download_dates(self):
        "Find for which new dates the files need to be downloaded."
        files = self.check_data()['files']
        if len(files) == 0: 
            start = self.times[0]
        else:
            start = pd.Timestamp(files[-1].stem.split('_')[-1])+pd.Timedelta(days=1)
        start = start.strftime('%Y-%m-%d 00:00:00')
        end = self.times[-1].strftime('%Y-%m-%d 23:59:59')
        return start, end
        
    def download_viirs(self):
        "Download viirs data needed for the dataset."
        tstart, tend = self.get_download_dates()
        region = Region.load(f'{self.path.config}/R_{self.region}.json')
        
        if self.product == 'VIIRS750':
            viirs_downloader = VIIRS750_download(region, tstart, tend)
            viirs_downloader_list = viirs_downloader.split_times()
            
        elif self.product == 'VIIRS375':
            viirs_downloader1 = VIIRS375_download(region, tstart, tend)
            region.pixel_size = 0.1 # Angles can be interpolated later
            viirs_downloader2 = VIIRS750_download(region, tstart, tend, 
                                bands=['SolarZenithAngle', 'SatelliteZenithAngle'])
            viirs_downloader_list1 = viirs_downloader1.split_times()
            viirs_downloader_list2 = viirs_downloader2.split_times()
            viirs_downloader_list = [*viirs_downloader_list1, *viirs_downloader_list2]
            
        else: raise NotImplementedError(f'Not implemented for {self.product}.')
            
        run_all(viirs_downloader_list, self.path.ladsweb)
        
    def preprocess_dataset_750(self):
        "Apply pre-processing to the rawdata and saves results in dataset directory."
        paths = InOutPath(f'{self.path.ladsweb}', f'{self.path.dataset}')
        R = Region.load(f'{self.path.config}/R_{self.region}.json')
        bands = ['Reflectance_M5', 'Reflectance_M7', 'Reflectance_M10', 'Radiance_M12',
                 'Radiance_M15', 'SolarZenithAngle', 'SatelliteZenithAngle']
        print('\nPre-processing data...')
        viirs = Viirs750Dataset(paths, R, bands=bands)
        merge_tiles = MergeTiles('SatelliteZenithAngle')
        mir_calc = MirCalc('SolarZenithAngle', 'Radiance_M12', 'Radiance_M15')
        rename = BandsRename(['Reflectance_M5', 'Reflectance_M7'], ['Red', 'NIR'])
        bfilter = BandsFilter(['Red', 'NIR', 'MIR'])
        act_fires = ActiveFiresLog(f'{self.path.hotspots}/hotspots{self.region}.csv')
        viirs.process_all(proc_funcs=[merge_tiles, mir_calc, rename, bfilter, act_fires])
        
    def preprocess_dataset_375(self):
        "Apply pre-processing to the rawdata and saves results in dataset directory."
        paths = InOutPath(f'{self.path.ladsweb}', f'{self.path.dataset}')
        R = Region.load(f'{self.path.config}/R_{self.region}.json')
        bands = ['Reflectance_I1', 'Reflectance_I2', 'Reflectance_I3',
                 'Radiance_I4', 'Radiance_I5', 'SolarZenithAngle', 'SatelliteZenithAngle']
        print('\nPre-processing data...')
        viirs = Viirs375Dataset(paths, R, bands=bands)
        interpAng = InterpolateAngles(R.new(pixel_size=0.1), R, 
                      ['SolarZenithAngle', 'SatelliteZenithAngle'])
        merge_tiles = MergeTiles('SatelliteZenithAngle')
        mir_calc = MirCalc('SolarZenithAngle', 'Radiance_I4', 'Radiance_I5')
        rename = BandsRename(['Reflectance_I1', 'Reflectance_I2'], ['Red', 'NIR'])
        bfilter = BandsFilter(['Red', 'NIR', 'MIR'])
        act_fires = ActiveFiresLog(f'{self.path.hotspots}/hotspots{self.region}.csv')
        viirs.process_all(proc_funcs=[interpAng, BandsAssertShape(), merge_tiles, 
                                      mir_calc, rename, bfilter, act_fires])
        
    def preprocess_dataset(self):
        if self.product == 'VIIRS750':
            self.preprocess_dataset_750()
        elif self.product == 'VIIRS375':
            self.preprocess_dataset_375()
        else: raise NotImplementedError(f'Not implemented for {self.product}.')
        
    def init_model_weights(self, weight_files:list):
        "Downloads model weights if they don't exist yet on config directory."
        local_files = []
        for w in weight_files:
            file_save = self.path.config/w
            if not file_save.is_file():
                print(f'Downloading model weights {w}')
                url = f'https://github.com/mnpinto/banet_weights/raw/master/model/{w}'
                file = requests.get(url)
                open(str(file_save), 'wb').write(file.content)
            local_files.append(file_save)
        return local_files
    
    def get_preds(self, weight_files:list, threshold=0.5, save=True, max_size=2000,
                  filename='data'):
        "Computes BA-Net predictions ensembling the models in the weight_files list."
        local_files = self.init_model_weights(weight_files)
        iop = InOutPath(self.path.dataset, self.path.outputs, mkdir=False)
        region = Region.load(f'{self.path.config}/R_{self.region}.json')
        predict_time(iop, self.times, local_files, region, threshold=threshold,
                     save=save, max_size=max_size, product=self.product, output=filename)

In [None]:
show_doc(RunManager.preprocess_dataset)
show_doc(RunManager.init_model_weights)
show_doc(RunManager.get_preds)

<h4 id="RunManager.preprocess_dataset" class="doc_header"><code>RunManager.preprocess_dataset</code><a href="__main__.py#L104" class="source_link" style="float:right">[source]</a></h4>

> <code>RunManager.preprocess_dataset</code>()



<h4 id="RunManager.init_model_weights" class="doc_header"><code>RunManager.init_model_weights</code><a href="__main__.py#L111" class="source_link" style="float:right">[source]</a></h4>

> <code>RunManager.init_model_weights</code>(**`weight_files`**:`list`)

Downloads model weights if they don't exist yet on config directory.

<h4 id="RunManager.get_preds" class="doc_header"><code>RunManager.get_preds</code><a href="__main__.py#L124" class="source_link" style="float:right">[source]</a></h4>

> <code>RunManager.get_preds</code>(**`weight_files`**:`list`, **`threshold`**=*`0.5`*, **`save`**=*`True`*, **`max_size`**=*`2000`*, **`filename`**=*`'data'`*)

Computes BA-Net predictions ensembling the models in the weight_files list.

Running all processes looks like this:

```python
region = 'BR'
paths = ProjectPath('../hide/historical_test')
weight_files = ['banetv0.20-val2017-fold0.pth']

times = pd.date_range('2015-08-01', '2015-09-01', freq='MS')
# Save R_{region}.json file in config folder
manager = RunManager(paths, region, times, product='VIIRS375')
manager.download_viirs()
# Save hotspots{region}.json file in hotspots folder
manager.preprocess_dataset()
manager.get_preds(weight_files, threshold=0.01, filename='ba100m')
```

```python
region = 'PT'
paths = ProjectPath('../hide/historical_test')
weight_files = ['banetv0.20-val2017-fold0.pth']
times = pd.date_range('2017-06-01', '2017-10-01', freq='MS')
Region(region, [-10, 36, -6, 44], 0.001).export(paths.config/f'R_{region}.json')

manager = RunManager(paths, region, times, product='VIIRS375')
manager.download_viirs()
# Save hotspots{region}.json file in hotspots folder
manager.preprocess_dataset()
manager.get_preds(weight_files, threshold=0.01, filename=f'ba100m_{region}{times[0].year}')
```


In [None]:
# hide
notebook2script()

Converted 00_core.ipynb.
Converted 01_geo.ipynb.
Converted 02_data.ipynb.
Converted 03_models.ipynb.
Converted 04_predict.ipynb.
Converted 04b_nrt.ipynb.
Converted 04c_historical.ipynb.
Converted 05_train.ipynb.
Converted 06_cli.ipynb.
Converted 07_web.ipynb.
Converted index.ipynb.
Converted tutorial.australia2020.ipynb.
