# Summary of images processed

In [None]:
import os
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

In [None]:
def PrintFilesInDirectory(in_dir,endstring,brdfDir=True,printList=False,out_dir=None):
    fileList = []
    for f in os.listdir(in_dir):
        if f.endswith(endstring) and 'angles' not in f:
            fileList.append(f)
    filedf = pd.DataFrame(fileList, columns = ['file'])
    
    if brdfDir==True:
        filedf['sensor'] = filedf['file'].str[:4]
        filedf['date'] = np.where((filedf['sensor']=='L1C_'),filedf['file'].str[19:27],filedf['file'].str[17:25])
        filedf['yr'] = filedf['date'].str[:4]
        filedf['yrmo'] = filedf['date'].str[:6]
        sorted_files = filedf.sort_values(by='date')
    
    if printList == True:
        pd.DataFrame.to_csv(filedf, os.path.join(out_dir,'FileList.txt'), sep=',', na_rep='.', index=False)   
        
    return filedf

In [None]:
def PrintFilesInMultipleDirectories(full_dir,sub_dir,endstring,brdfDir=True,printList=False,out_dir=None):
    '''
    This will return a dataframe with all files ending in {endstring} in each directory named {sub_dir} within the {full_dir}
    If the desired sub_dir is a brdf directory ({brdf=True}), adds correct date and year info to dataframe for plotting
    Drops duplicated filenames to reveal the number of images coming from unique Sentinel/Landsat scenes.
    Will print final dataframe to file in {out_dir} with {printList=True}
    '''
    fileList = []
    multiFileList = []
    for x in full_dir.iterdir():
        if x.is_dir():
            for sx in x.iterdir():
                if os.path.basename(sx) == sub_dir:
                    fileSet = PrintFilesInDirectory(sx,endstring,brdfDir=brdfDir,printList=False,out_dir=None)
                    multiFileList.append(fileSet)
    fullFiledf = pd.concat(multiFileList)
    numCells = len(multiFileList)
    lenOrig = len(fullFiledf)
    uniqueImgs = fullFiledf.drop_duplicates(subset=['file'],keep='first')
    print('There are {} processed images coming from {} unique Sentinel/Landsat images over {} cells.'.format(lenOrig,len(uniqueImgs),numCells))
    
    if printList == True:
        pd.DataFrame.to_csv(uniqueImgs, os.path.join(out_dir,'ALLFileList.txt'), sep=',', na_rep='.', index=False)   
        
    return uniqueImgs

In [None]:
'''
PARAMETERS: modify in Notebook_settings notebook, then run that notebook and this cell to update here
DO not modify this cell
'''

%store -r basicConfig
print("Basic Parameters: \n raw_dir = {} \n smooth_dir = {} \n out_dir = {}"
      .format(basicConfig['raw_dir'],basicConfig['smooth_dir'],basicConfig['out_dir']))

In [None]:
###To get all files processed in brdf directory across all processed cells:
AllFiles = PrintFilesInMultipleDirectories(basicConfig['raw_dir'],"brdf",'.nc',brdfDir=True,printList=basicConfig['printList'],out_dir=basicConfig['out_dir'])

In [None]:
#Allimages.groupby(['yr','sensor']).size().unstack().plot(kind='bar', stacked=True, figsize=(20, 5), title=('Images per year'))
AllFiles.groupby(['yr','sensor']).size().unstack().plot(kind='bar', stacked=True, figsize=(20, 5),
            title=('Images processed for {}'.format(basicConfig['country'])));

In [None]:
### Run to print output as html

outName = str(basicConfig['country']+'5a_ImagesProcessed')
!jupyter nbconvert --output-dir='./Outputs' --to html --no-input --output=$outName 5a_SummarizeData_ImagesProcessed.ipynb