# Gets summary of images in a directory

In [None]:
import os
import sys
from pathlib import Path
import pandas as pd
import geopandas as gpd
import numpy as np
import pyproj
import pickle
from shapely.geometry import box
import matplotlib.pyplot as plt

%matplotlib inline

In [None]:
sys.path.append(r"../LUCinSA_helpers")
from FileChecks import *

In [None]:
'''
PARAMETERS: modify in notebook_params notebook, then run that notebook and this cell to update here
DO NOT modify this cell
'''

%store -r basicConfig
print("Basic Parameters: \n brdf_dir = {} \n gridCell = {} \n index_dir = {} \n out_dir = {}"
      .format(basicConfig['brdf_dir'],basicConfig['gridCell'],basicConfig['index_dir'],basicConfig['out_dir']))
%store -r SingleOutputParams
print("SingleOutputParams: \n MapYears = {}".format(SingleOutputParams['MapYears']))
%store -r SinglePlotParams
print("SinglePlotParams: \n iImageType = {}".format(SinglePlotParams['imageType']))

In [None]:
###To get all images in brdf directory:
Allimages = PrintFilesInDirectory(basicConfig['brdf_dir'],'.nc',printList=basicConfig['printList'],out_dir=basicConfig['out_dir'],dataSource='stac')

if basicConfig['printList'] == True:
    print('full dataframe is printed as FileList.txt in {}'.format(out_dir=basicConfig['out_dir']))
else:
    print('sample of dataframe: (Not printed to file. Can print by setting printList=True in notebook_params)')
Allimages.head(n=5)

In [None]:
#Allimages.groupby(['yr','sensor']).size().unstack().plot(kind='bar', stacked=True, figsize=(20, 5), title=('Images per year'))
Allimages.groupby(['yr','sensor']).size().unstack().plot(kind='bar', stacked=True, figsize=(20, 5), 
            title=('Number images processed per year for {} cell {}'.format(basicConfig['country'],basicConfig['gridCell'])));

In [None]:
Allimages.groupby(['yrmo','sensor']).size().unstack().plot(kind='bar', stacked=True, figsize=(20, 5), 
        title=('Number images processed per month for {} cell {}'.format(basicConfig['country'],basicConfig['gridCell'])));

## Focus on smaller range of years to see month on axis:

In [None]:
ImgSubset = Allimages[Allimages["yr"].astype(int) > 2019]
ImgSubset.groupby(['yrmo','sensor']).size().unstack().plot(kind='bar', stacked=True, figsize=(20, 5), 
   title=('Number images processed per month for {} cell {} from 2019 on'.format(basicConfig['country'],basicConfig['gridCell'])));

## Read scene.info file

In [None]:
##for all years:
#dfAll = GetImgListFromDb(SinglePlotParams['imageType'], basicConfig['raw_dir'], basicConfig['gridCell'],Yrs=None,dataSource='stac')
##for selection of years:
dfSlice = GetImgListFromDb(SinglePlotParams['imageType'], basicConfig['raw_dir'], basicConfig['gridCell'],Yrs=SingleOutputParams['MapYears'],dataSource='stac')

In [None]:
CatList = GetImgListFromCat(SinglePlotParams['imageType'],basicConfig['gridCell'], basicConfig['gridFile'], Yrs=SingleOutputParams['MapYears'])

In [None]:
missingFiles = CompareFilesToDb(SinglePlotParams['imageType'], 'Both', basicConfig['raw_dir'], basicConfig['gridCell'], basicConfig['gridFile'], Yrs=SingleOutputParams['MapYears'],dataSource=basicConfig['data_source'])

## Check processing for cell

In [None]:
GetCellStatus(basicConfig['raw_dir'],basicConfig['gridCell'],basicConfig['gridFile'],Yrs = [2010,2022])

##TO ADD:
    ##Check if brdfs have been generated
    ## if yes, check for...
    ##Check coreg
      ##check non-coreged images
    ##Check indices

In [None]:
#CatDiff = ComparePlanetaryHub_w_Element84 ('Sentinel', basicConfig['gridCell'],basicConfig['gridFile'],Yrs = [2010,2022])
Diff0 = pd.DataFrame(CatDiff[0])
Diff0['date'] = Diff0.apply(lambda x: x[0].split("_")[2], axis=1)
Diff0.sort_values(by=['date'], inplace=True) 
Diff1 = pd.DataFrame(CatDiff[1])
Diff1['date'] = Diff1.apply(lambda x: x[0].split("_")[2], axis=1)
Diff1.sort_values(by=['date'], inplace=True)    
print('{} Images in Element84 but not Planetary: {}'.format(len(CatDiff[0]),Diff0))
print('{} Images in Planetary but not Element84: {}'.format(len(CatDiff[1]),Diff1))

## Check original products

In [None]:
l_orig = GetImgFromPlanetaryHub(CatList[0])
import rich.table

table = rich.table.Table("Asset Key", "Descripiption")
for asset_key, asset in l_orig.assets.items():
    # print(f"{asset_key:<25} - {asset.title}")
    table.add_row(asset_key, asset.title)

table

In [None]:
#get url to download a band/asset:
url=img.assets["nir08"].href
print(url)

## To save an html copy of this notebook with all outputs:

In [None]:
### Run to print output as html
outName = str(basicConfig['country']+'1a_ImagesProcessed_in_Cell_'+str(basicConfig['gridCell']))
!jupyter nbconvert --output-dir='./Outputs' --to html --no-input --output=$outName 1a_ExploreData_FileContent.ipynb