# Data Download

### How to Install

1. Install GEE in GCE by following: https://developers.google.com/earth-engine/python_install-conda

  - if encountered jupyter command not found, add conda to path
  
```
    export PATH=~/anaconda3/bin:$PATH
```

2. Make new kernel to use in jupylab

```
conda install ipykernel
ipython kernel install --name ee --user
```

3. Update kernel.json with 3.8.2 python (what gee uses)

```
jupyter kernelspec list
vim /home/cholo/.local/share/jupyter/kernels/ee/kernel.json
```

    - update via vim with this python
  
```
    /home/cholo/anaconda3/envs/ee/bin/python
```

4. Install gdal

```
conda activate ee
conda install gdal
```

5. In new notebook from jupylab, select kernel 'ee'

To bypass ee.Authenticate, you can run 'earthengine authenticate' in terminal

## Load tools

In [1]:
import geopandas as gpd
import pathlib
from tqdm import tqdm

import sys
sys.path.insert(0, '../utils')
import gee

data_dir = "../data/"

Enter verification code:  4/ywGDYZuQCddPNix0kKn8K4lDd61_Pl2-R-_pkdr3AmfC1Py13GEgIXQ



Successfully saved authorization token.


In [2]:
BBOX = {
    'arauquita': [-71.69980876899996,6.526376968000022,-70.86773583299998,7.054700505000028], 
    'cucuta': [-72.60807671699996,7.723856582000053,-72.34649040099998,8.431575791000057], 
    'tibu': [-73.07343705199997,8.251246337000055,-72.47112479799995,9.142041082000048], 
    'soacha': [-74.30657295399999,4.382138206000036,-74.17129610699999,4.633509322000066], 
    'villadelrosario': [-72.52267855299993,7.64295204900003,-72.44465188199996,7.902600765000045],
    'saravena': [-72.09962277499994,6.745921904000056,-71.69442685499996,7.0810649060000515],
    'test_uribia': [-72.37971307699996, 11.747684544661437, -72.15636466747618, 11.523307245000069],
}

In [3]:
adm_dir = data_dir + 'admin_bounds/'
sentinel_dir = data_dir + 'sentinel2/'
tmp_dir = data_dir + 'tmp/'

dirs = [adm_dir, sentinel_dir, tmp_dir]
for dir_ in dirs:
    with pathlib.Path(dir_) as path:
        if not path.exists():
            path.mkdir(parents=True, exist_ok=True)

## Download from GEE

In [None]:
years = [2015, 2016, 2017, 2018, 2019, 2020]
areas = list(BBOX.keys())
cloud_pcts = [100]#, 10, 20, 30]

for area in areas:
    for year in years:
        for pct in cloud_pcts:
            gee.sen2median(BBOX[area], year, FILENAME = f'gee_{area}_{year}_pct{pct}', cloud_pct = pct)

## Deflate and crop

In [15]:
# get area shape file
! gsutil cp gs://immap-masks/admin_boundaries/admin_bounds.gpkg {adm_dir}

areas = ['arauquita', 'tibu', 'saravena']
# Get name in shapefile
admin2RefN = {
    'arauquita': 'Arauquita', 
    'tibu': 'Tibu', 
    'saravena': 'Saravena',
}

for area in areas:   
    gdf = gpd.read_file(adm_dir + 'admin_bounds.gpkg')
    area1 = gdf.query("admin2RefN == '"+ admin2RefN[area] + "'")
    area1.to_file(adm_dir + area + '.shp')

In [4]:
files_ = [
    'gee_arauquita_2016',
    'gee_arauquita_2017',
    'gee_arauquita_2018',
    'gee_arauquita_2019',
    'gee_arauquita_2020',
    
    ['gee_tibu_20160000000000-0000000000','gee_tibu_20160000009472-0000000000'],
    ['gee_tibu_20170000000000-0000000000','gee_tibu_20170000009472-0000000000'],
    ['gee_tibu_20180000000000-0000000000','gee_tibu_20180000009472-0000000000'],
    ['gee_tibu_20190000000000-0000000000','gee_tibu_20190000009472-0000000000'],
    ['gee_tibu_20200000000000-0000000000','gee_tibu_20200000009472-0000000000'],
    
    'gee_saravena_2016',
    'gee_saravena_2017',
    'gee_saravena_2018',
    'gee_saravena_2019',
    'gee_saravena_2020',
]

In [None]:
for f in tqdm(files_):
    gee.deflatecrop_all(
        repl = f, 
        output_dir = sentinel_dir, 
        adm_dir = adm_dir,
        tmp_dir = tmp_dir,
    )

  0%|          | 0/8 [00:00<?, ?it/s]

In [None]:
# 5m for one arauquita image
# 5m for one tibu image
# 1m30s for one saravena image