# 00_Data_Download.ipynb

### How to Install

1. Install processing environment from conda YML file. More details in: https://developers.google.com/earth-engine/python_install-conda

```
    conda env create -f processing_environment.yml
```
  
2. Make new kernel to use in jupylab

```
ipython kernel install --name ee --user
```

3. Update kernel.json with 3.8 python (what gee uses)
    - check if ee kernel exists
    
```
    jupyter kernelspec list
```
    - find the conda binary that the ee environment uses
    
```
    whereis conda
    ls /opt/conda/envs/
```
    - update the conda binary via vim
    
```
    vim ~/.local/share/jupyter/kernels/ee/kernel.json
    /opt/conda/envs/ee/bin/python
```

4. Install gdal

```
conda activate ee
conda install gdal
```

5. In new notebook from jupylab, select kernel 'ee'


### How to Add New Areas

In utils/gee_settings.py
1. In 'areas' list, include area, removing spaces i.e. Villa del Rosario > villadelrosario
2. In BBOX dict, add bounding box arranged as a list of 4 numbers, upper left and lower right
3. In CLOUD_PARAMS dict, specify cloud filter and if will be masked or not
4. In admin2RefN, add name in Admin Boundary shapefile

Once downloaded file shows in gs://immap-gee
1. check if the area is split into multiple files
2. If yes, add area to multi-part list in Section Input params

## Load tools

In [2]:
import geopandas as gpd
from fiona.crs import to_string
import pathlib
from tqdm import tqdm

import sys
sys.path.insert(0, '../utils')
from gee import sen2median, deflatecrop1

data_dir = "../data/"

Enter verification code:  4/0QEvoK7gPq9vmIsYdT9NZcNWcgjgl3zHFGGxNRGxNmKLnWB8HEIx1rE



Successfully saved authorization token.


In [3]:
adm_dir = data_dir + 'admin_bounds/'
img_dir = data_dir + 'images/'
tmp_dir = data_dir + 'tmp/'

dirs = [adm_dir, img_dir, tmp_dir]
for dir_ in dirs:
    with pathlib.Path(dir_) as path:
        if not path.exists():
            path.mkdir(parents=True, exist_ok=True)

In [4]:
# gee_settings.py
# substitute of: from gee_settings import BBOX, CLOUD_PARAMS, admin2RefN
BBOX = {'testuribia': [-72.292152, 11.734492, -72.244001, 11.686520]}
CLOUD_PARAMS = {'testuribia': {'2015-2016': (40, True), '2017-2018': (40, True), '2019-2020': (40, True)}}

## Input params

In [5]:
PRODUCT = 'COPERNICUS/S2' # L1C
years = ['2015-2016']
def get_minmaxdt(year_str):
    list_ = year_str.split('-')
    return list_[0] + '-01-01', list_[1] + '-12-31'

areas = ['testuribia']

## Download from GEE

In [5]:
for area in areas:
    for year in years:
        cloud_pct, mask = CLOUD_PARAMS[area][year]
        min_dt, max_dt = get_minmaxdt(year)
        sen2median(
            BBOX[area], 
            FILENAME = f'gee_{area}_{year}', 
            min_dt = min_dt, 
            max_dt = max_dt,
            cloud_pct = cloud_pct, 
            mask = mask,
            PRODUCT = PRODUCT,
            verbose = 1
        )

Processing gee_testuribia_2015-2016
using COPERNICUS/S2
Filtering to images with cloud cover < 40
with mask
Task started


In [17]:
# Download from GDrive: https://medium.com/@acpanjan/download-google-drive-files-using-wget-3c2c025a8b99
# !sudo apt-get install unzip

img_dir = str(pathlib.Path(img_dir).resolve()) + '/'
adm_dir = str(pathlib.Path(adm_dir).resolve()) + '/'

# gee_testuribia_2015-2016.tif
out_dir = img_dir#'/home/cholo/geoai-immap/data/images/'
!wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1PBVw3QJW4ZcSpcHjPBRhLMoS0xlLDFp3' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1PBVw3QJW4ZcSpcHjPBRhLMoS0xlLDFp3" -O {out_dir}gee_testuribia_2015-2016.tif && rm -rf /tmp/cookies.txt

# testuribia_shp.zip
out_dir = adm_dir#'/home/cholo/geoai-immap/data/admin_bounds/'
!wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1hBTSs6zFqoP8Qug45rp8tf55CBvVysgD' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1hBTSs6zFqoP8Qug45rp8tf55CBvVysgD" -O {out_dir}testuribia_shp.zip && rm -rf /tmp/cookies.txt
!unzip -d {out_dir} {out_dir}testuribia_shp.zip

--2020-06-02 08:13:31--  https://docs.google.com/uc?export=download&confirm=&id=1PBVw3QJW4ZcSpcHjPBRhLMoS0xlLDFp3
Resolving docs.google.com (docs.google.com)... 74.125.142.138, 74.125.142.100, 74.125.142.101, ...
Connecting to docs.google.com (docs.google.com)|74.125.142.138|:443... connected.
HTTP request sent, awaiting response... 302 Moved Temporarily
Location: https://doc-08-5s-docs.googleusercontent.com/docs/securesc/l4i7u2rr9h06ghro9hfq6njlupgbp9l9/khoa30vgsr9j8f7earqv5ehicgglmc68/1591085550000/13853625020392332200/06423805926151996068Z/1PBVw3QJW4ZcSpcHjPBRhLMoS0xlLDFp3?e=download [following]
--2020-06-02 08:13:31--  https://doc-08-5s-docs.googleusercontent.com/docs/securesc/l4i7u2rr9h06ghro9hfq6njlupgbp9l9/khoa30vgsr9j8f7earqv5ehicgglmc68/1591085550000/13853625020392332200/06423805926151996068Z/1PBVw3QJW4ZcSpcHjPBRhLMoS0xlLDFp3?e=download
Resolving doc-08-5s-docs.googleusercontent.com (doc-08-5s-docs.googleusercontent.com)... 74.125.142.132, 2607:f8b0:400e:c08::84
Connecting to 

## Deflate and crop

In [19]:
# not yet working when run as python cell
# snippet of gee.py > deflatecrop1 function
import subprocess
area = 'testuribia'
# make shapefile for area of interest
text = '''
eval "$(conda shell.bash hook)"
conda activate ee
gdalwarp -cutline {adm_dir}{area}.shp -srcnodata -dstnodata {img_dir}gee_{area}_2015-2016.tif {img_dir}{area}_2015-2016.tif
'''
replacement_txt = text.replace('{area}', area).replace('{img_dir}', img_dir).replace('{adm_dir}', adm_dir)
f = open("deflatecrop.sh", "w")
f.write(replacement_txt)
f.close()
print(replacement_txt)
result = subprocess.run('sh deflatecrop.sh', shell = True, stdout=subprocess.PIPE)
print(result.stdout)


eval "$(conda shell.bash hook)"
conda activate ee
gdalwarp -cutline /home/jupyter/geoai-immap/data/admin_bounds/testuribia.shp -srcnodata -dstnodata /home/jupyter/geoai-immap/data/images/gee_testuribia_2015-2016.tif /home/jupyter/geoai-immap/data/images/testuribia_2015-2016.tif

b'Creating output file that is 537P x 535L.\nProcessing /home/jupyter/geoai-immap/data/images/gee_testuribia_2015-2016.tif [1/1] : 0Copying nodata values from source /home/jupyter/geoai-immap/data/images/gee_testuribia_2015-2016.tif to destination /home/jupyter/geoai-immap/data/images/testuribia_2015-2016.tif.\n...10...20...30...40...50...60...70...80...90...100 - done.\n'


# 01_Data_Preprocessing.ipynb

In [20]:
import os
import operator
from tqdm import tqdm
import pandas as pd
import numpy as np
pd.set_option('use_inf_as_na', True)

import geopandas as gpd
import rasterio as rio

import sys
sys.path.insert(0, '../utils')
import geoutils

import logging
import warnings
logging.getLogger().setLevel(logging.ERROR)
warnings.filterwarnings("ignore")

import matplotlib.pyplot as plt
%matplotlib inline

%load_ext autoreload
%autoreload 2

In [21]:
data_dir = "../data/"
images_dir = data_dir + 'images/'
indices_dir = data_dir + 'indices/'
pos_mask_dir = data_dir + 'pos_masks/'
neg_mask_dir = data_dir + 'neg_masks/'

if not os.path.exists(data_dir):
    os.makedirs(data_dir)
if not os.path.exists(images_dir):
    os.makedirs(images_dir)
if not os.path.exists(indices_dir):
    os.makedirs(indices_dir)
if not os.path.exists(pos_mask_dir):
    os.makedirs(pos_mask_dir)
if not os.path.exists(neg_mask_dir):
    os.makedirs(neg_mask_dir)

## Generate TIFF Files for Indices
The following script is used to generate TIFF files for the derived indices for each of the images. There is no need to run this if the indices have already been generated. 

In [22]:
for area in areas:
    
    area_dict = geoutils.get_filepaths([area], images_dir, indices_dir, pos_mask_dir, neg_mask_dir)
    print("Image filepaths:")
    print(area_dict[area])
    
    print('Calculating indices')
    area_dict = geoutils.write_indices(area_dict, area, indices_dir)
    
    print('Operations completed.')

  0%|          | 0/2 [00:00<?, ?it/s]

Image filepaths:
{'pos_mask_gpkg': '../data/pos_masks/testuribia_pos.gpkg', 'neg_mask_gpkg': '../data/neg_masks/testuribia_neg.gpkg', 'images': ['../data/images/gee_testuribia_2015-2016.tif', '../data/images/testuribia_2015-2016.tif'], 'indices': []}
Calculating indices


100%|██████████| 2/2 [00:03<00:00,  1.73s/it]

Operations completed.



