# Gets summary of images in a directory

In [None]:
import os
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

In [None]:
def PrintFilesInDirectory(in_dir,endstring,brdfDir=True,printList=False,out_dir=None):
    '''
    Can generate a dataframe with a list of all files in folder for quick exploration. 
    Option to print to file for deeper look.
    If the directory of interest is a brdf directory, can extract dates and sensor types from file names
    using '.nc' as {endstring} and {brdfDir=True} and generate quick graphs of images by year and month below.
    '''
    fileList = []
    for f in os.listdir(in_dir):
        if f.endswith(endstring) and 'angles' not in f:
            fileList.append(f)
    filedf = pd.DataFrame(fileList, columns = ['file'])
    
    if brdfDir==True:
        filedf['sensor'] = filedf['file'].str[:4]
        filedf['date'] = np.where((filedf['sensor']=='L1C_'),filedf['file'].str[19:27],filedf['file'].str[17:25])
        filedf['yr'] = filedf['date'].str[:4]
        filedf['yrmo'] = filedf['date'].str[:6]
        sorted_files = filedf.sort_values(by='date')
    
    if printList == True:
        pd.DataFrame.to_csv(filedf, os.path.join(out_dir,'FileList.txt'), sep=',', na_rep='.', index=False)   
        
    return filedf

In [None]:
'''
PARAMETERS: modify in notebook_params notebook, then run that notebook and this cell to update here
DO NOT modify this cell
'''

%store -r basicConfig
print("Basic Parameters: \n brdf_dir = {} \n gridCell = {} \n index_dir = {} \n out_dir = {}"
      .format(basicConfig['brdf_dir'],basicConfig['gridCell'],basicConfig['index_dir'],basicConfig['out_dir']))

In [None]:
###To get all images in brdf directory:
Allimages = PrintFilesInDirectory(basicConfig['brdf_dir'],'.nc',brdfDir=True,printList=basicConfig['printList'],out_dir=basicConfig['out_dir'])

if basicConfig['printList'] == True:
    print('full dataframe is printed as FileList.txt in {}'.format(out_dir=basicConfig['out_dir']))
else:
    print('sample of dataframe: (Not printed to file. Can print by setting printList=True in notebook_params)')
Allimages.head(n=5)

In [None]:
#Allimages.groupby(['yr','sensor']).size().unstack().plot(kind='bar', stacked=True, figsize=(20, 5), title=('Images per year'))
Allimages.groupby(['yr','sensor']).size().unstack().plot(kind='bar', stacked=True, figsize=(20, 5), 
            title=('Number images processed per year for {} cell {}'.format(basicConfig['country'],basicConfig['gridCell'])));

In [None]:
Allimages.groupby(['yrmo','sensor']).size().unstack().plot(kind='bar', stacked=True, figsize=(20, 5), 
        title=('Number images processed per month for {} cell {}'.format(basicConfig['country'],basicConfig['gridCell'])));

## Focus on smaller range of years to see month on axis:

In [None]:
ImgSubset = Allimages[Allimages["yr"].astype(int) > 2019]
ImgSubset.groupby(['yrmo','sensor']).size().unstack().plot(kind='bar', stacked=True, figsize=(20, 5), 
   title=('Number images processed per month for {} cell {} from 2019 on'.format(basicConfig['country'],basicConfig['gridCell'])));

In [None]:
### Run to print output as html

outName = str(basicConfig['country']+'1a_ImagesProcessed_in_Cell_'+str(basicConfig['gridCell']))
!jupyter nbconvert --output-dir='./Outputs' --to html --no-input --output=$outName 1a_ExploreData_FileContent.ipynb