# Get cell processing info

In [None]:
import os
import sys
from pathlib import Path
import pandas as pd
import geopandas as gpd
import numpy as np
import pyproj
import pickle
from shapely.geometry import box
import matplotlib.pyplot as plt

%matplotlib inline

In [None]:
sys.path.append(r"../LUCinSA_helpers")
from file_checks import *

In [None]:
'''
PARAMETERS: modify in notebook_params notebook, then run that notebook and this cell to update here
DO NOT modify this cell
'''

%store -r basic_config
print("basic parameters: \n brdf_dir = {} \n grid_cell = {} \n index_dir = {} \n local_dir = {}"
      .format(basic_config['brdf_dir'],basic_config['grid_cell'],basic_config['index_dir'],basic_config['local_dir']))
print("\n image_type = {}".format(basic_config['image_type']))
%store -r single_output_params
print("single_output_params: \n map_years = {}".format(single_output_params['map_years']))

In [None]:
# this is to fix an old problem. Can probably delete now.
for cellid in range(3027):
    processing_info_path = Path('{}/{:06d}/processing.info'.format(basic_config['raw_dir'],cellid))
    landsat_path = Path('{}/{:06d}/landsat'.format(basic_onfig['raw_dir'],cellid))
    sentinel2_path = Path('{}/{:06d}/sentinel2'.format(basicConfig['raw_dir'],cellid))
    brdf_path = Path('{}/{:06d}/brdf'.format(basicConfig['raw_dir'],cellid))

    print('processing {}...'.format(cellid))
    if not os.path.exists(landsat_path):
        continue
    if processing_info_path.is_file():
        reconstructed_dbs = []
        deleted_dbs = []
        processing_db = pd.read_pickle(processing_info_path)
        if 'shift_x' in processing_db:
            print ('already has db with shift x')
            if len(processing_db['brdf_id'].unique()) < 10:
                print('this db was created without unique brdf ids')
                processing_db.drop(['brdf','bandpass','brdf_error','brdf_id','coreg','shift_x','shift_y','coreg_error'], axis=1, inplace=True)
                pd.to_pickle(processing_db, processing_info_path)
                reconstructed_dbs.append(cellid)
        elif 'numpix' in processing_db and 'bdrf_id' in processing_db:
            if len(processing_db['brdf_id'].unique()) < 10:
                print('this db was created without unique brdf ids')
                processing_db.drop(['brdf','bandpass','brdf_error','brdf_id'], axis=1, inplace=True)
                pd.to_pickle(processing_db, processing_info_path)
                reconstructed_dbs.append(cellid)
        elif 'numpix' not in processing_db:
            print('deleting existing db')
            processing_info_path.unlink()
            deleted_dbs.append(cellid)
    else:
        print('no existing database. making new database')
        
    reconstruct_db(processing_info_path,landsat_path,sentinel2_path,brdf_path,modified=False)
print('restructured dbs:{}'.format(reconstructed_dbs))
print('deleted dbs:{}'.format(deleted_dbs))

## Check processing db

In [None]:
processing_db = pd.read_pickle(Path('{}/{:06d}/processing.info'.format(basic_config['raw_dir'],int(basic_config['grid_cell']))))
processing_db.tail(n=10)

## To create new processing database (if processing.info is corrupted or deleted)

In [None]:
processing_info_path = Path('{}/{:06d}/processing.info'.format(basic_config['raw_dir'],int(basic_config['grid_cell'])))
landsat_path = Path('{}/{:06d}/landsat'.format(basic_config['raw_dir'],int(basic_config['grid_cell'])))
sentinel2_path = Path('{}/{:06d}/sentinel2'.format(basic_config['raw_dir'],int(basic_config['grid_cell'])))
brdf_path = Path(basic_config['brdf_dir'])
modified = False
reconstruct_db(processing_info_path,landsat_path,sentinel2_path,brdf_path)
processing_db = pd.read_pickle(Path('{}/{:06d}/processing.info'.format(basic_config['raw_dir'],int(basic_config['grid_cell']))))
processing_db.tail(n=10)

In [None]:
brdf_db = pd.read_pickle(Path(brdf_path/'scene.info'))
brdf_db.tail(n=10)

In [None]:
##View processing errors
processing_errors1 = processing_db[processing_db['redownload']==True]
processing_errors2 = processing_db[~processing_db['brdf_error'].isnull()]
processing_errors = pd.concat([processing_errors1, processing_errors2],axis=0)
print('of the {} images available, {} were not processed due to errors'.format(processing_db.shape[0],processing_errors.shape[0]))
processing_errors

In [None]:
##View brdf status
processed0 = processing_db[processing_db['skip']!=True]
processed = processed0[processed0['redownload']!=True]
no_brdf = processed[processed['brdf']==False | processed['brdf'].isnull()]
print('of the {} images processed, {} do not have brdf calculations'.format(processed.shape[0],no_brdf.shape[0]))

In [None]:
##View coreg status:
processed_sentinel = processed[processed.index.str.startswith('S')]
creg_sentinel = processed_sentinel[processed_sentinel['coreg']==True]
print('of the {} Sentinel images, {} were coreged'.format(processed_sentinel.shape[0],creg_sentinel.shape[0]))
avg_x_shift = creg_sentinel['shift_x'].mean()
avg_y_shift = creg_sentinel['shift_y'].mean()
med_x_shift = creg_sentinel['shift_x'].median()
med_y_shift = creg_sentinel['shift_y'].median()
print ('shift x: avg:{}, med:{}. shift y: avg:{}, med:{}'.format(avg_x_shift, avg_y_shift, med_x_shift, med_y_shift))

In [None]:
###To get all images in brdf directory:
all_images = print_files_in_directory(basic_config['brdf_dir'],'.nc',print_list=basic_config['print_list'],out_dir=basic_config['home_dir'],data_source='stac')

if basic_config['print_list'] == True:
    print('full dataframe is printed as FileList.txt in {}'.format(out_dir=basic_config['home_dir']))
else:
    print('sample of dataframe: (Not printed to file. Can print by setting printList=True in notebook_params)')
all_images.head(n=5)

## Read scene.info file

In [None]:
import math
p_df = pd.read_pickle(Path('{}/{:06d}/processing.info'.format(basic_config['raw_dir'],int(basic_config['grid_cell']))))
p_df = p_df.reset_index()
p_df['sensor'] = p_df.apply(lambda x: x['index'].split('_')[0], axis=1)
p_df['shift'] = p_df.apply(lambda x: math.sqrt(math.pow(x['shift_x'],2)+math.pow(x['shift_y'],2)),axis=1)
p_df.set_index('index',inplace=True, drop=True)
#p_df5 = p_df[p_df['sensor']=='LT05']
p_df7 = p_df[p_df['sensor']=='LE07']
p_df7.head(n=50)

# Get cell status from new db

In [None]:
##for all years:
df_all = get_img_list_from_db(basic_config['raw_dir'], basic_config['grid_cell'],basic_config['image_type'],yrs=None,data_source='stac')
##for selection of years:
df_slice = get_img_list_from_db(basic_config['raw_dir'], basic_config['grid_cell'],basic_config['image_type'],yrs=single_output_params['map_years'],data_source='stac')

df_slice.head(n=5)

In [None]:
status = get_cell_status(basic_config['raw_dir'], '/home/downspout-cel/paraguay_lc/stac/grids', basic_config['grid_cell'],yrs=None,data_source='stac')
print(status)

In [None]:
dl_status_db_path = '/home/downspout-cel/paraguay_lc/cell_processing_dl.csv'

In [None]:
status_db_path = '/home/downspout-cel/paraguay_lc/cell_processing_post.csv'
#update_cell_status_db(status_db_path, range(4050,4101), basic_config['raw_dir'], '/home/downspout-cel/paraguay_lc/stac/grids', yrs=None,data_source='stac')

## To save an html copy of this notebook with all outputs:

In [None]:
### Run to print output as html
outName = str(basic_config['country']+'0_check_and_fill_db_'+str(basic_config['grid_cell']))
!jupyter nbconvert --output-dir='./Outputs' --to html --no-input --output=$outName 0_check_and_fill_db.ipynb