In [None]:
#| default_exp imgs_stats

# Statistics of images

In [None]:
#|hide
from nbdev.showdoc import *

In [1]:
#|export

import rasterio
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
from tqdm import tqdm
from flood_exercise import utils_func
from flood_exercise import const_vals as CONST

In [4]:
#|export

class ImgsStatistics():

  def __init__(self,
      path_to_imgs : str , # path to the folder that contains the images
      ):
      
      # get the tiles paths
      self.list_of_files = utils_func.load_tif_paths(path_to_imgs)

      self.collect_results = {CONST.PATH_STR : [] , CONST.REGION_STR : []}

      for path in tqdm(self.list_of_files):
         region = self._get_region_name_(path)
         self.collect_results[CONST.PATH_STR] = path
         self.collect_results[CONST.REGION_STR]=region
         
         self.test = self._img_statistics_(path)


  def _get_region_name_(self,
                        tile_name : str , # path of image , assuming that the first word in the file name is the region name
                        ):
     """
     Takes a string of images from the type "region_id_label.tif and extract the region name.
     for example, for "Bolivia_23014_S2Hand.tif" it will return Bolivia
     Parameters:
     folder_path (str): The path to the folder containing TIFF files.

     Returns:
     string: Name of the region , extracted from the tile name 
     """
     
     region = tile_name.split(CONST.SPLIT_TILES_NAMES_STR)[0]
     return region
  

  
  def _img_statistics_(self,
                       path : str , # path to image (tif file)
                       ):
     
     with rasterio.open(path) as src:
      
      src_arr = src.read()

      #convert 0 to nan , assuming 0 is no value and we don't want it to interrupt the staitistics
      src_arr  = np.where(src_arr==0, np.nan, src_arr )
      band_names = list(src.descriptions)

      collect_bands_stats = []

      for band_name , index in zip(band_names,range(0,src.read().shape[0])):
         self.bands_stats_imgs = {CONST.STR_BAND_NAME : [] , CONST.STR_MEAN : [] , CONST.STR_STD : [] }
         arr = src_arr[index,:,:]

         #calculate mean
         mean = np.nanmean(arr)
         
         #calculate std
         std = np.nanstd(arr)

         dict_stats = { CONST.STR_BAND_NAME +'_' + CONST.STR_MEAN : mean , 
                       CONST.STR_BAND_NAME +'_' + CONST.STR_STD : std
                        }
         
         collect_bands_stats.append(dict_stats)


     return collect_bands_stats






        
        

        

     

    


In [5]:
instance = ImgsStatistics(
  path_to_imgs = r'D:\git\flood_exercise\S2')

instance.test

 11%|█         | 44/399 [00:03<00:25, 13.68it/s]

In [None]:
instance.list_of_files

In [None]:
dici =instance.test

In [None]:
dici

In [None]:
df1=pd.DataFrame(dici).T
df1.columns = df1.iloc[0]
df1 = df1.iloc[1:,:]
df1

In [None]:
src=rasterio.open(r'D:\git\flood_exercise\S2\Bolivia_23014_S2Hand.tif')
band_names=list(rasterio.open(r'D:\git\flood_exercise\S2\Bolivia_23014_S2Hand.tif').descriptions)

src_arr = src.read()

plt.imshow(src_arr[0,:,:])
np.nanmean(src_arr[0,:,:])

In [None]:
src_arr[0,:,:]

In [None]:
np.mean(src_arr[0,:,:])

In [None]:
np.nanmean(src_arr[0,:,:])

In [None]:
src_arr = np.where(src_arr==0, np.nan, src_arr)
src_arr

In [None]:
for x , y in zip(band_names,range(0,src.read().shape[0])):
  print(x,y)