In [None]:
#| default_exp imgs_stats

# Statistics of images

In [None]:
#|hide
from nbdev.showdoc import *

In [1]:
#|export

import rasterio
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
from tqdm import tqdm
from flood_exercise import utils_func
from flood_exercise import const_vals as CONST

In [85]:
#|export

class ImgsStatistics():

  def __init__(self,
      path_to_imgs : str , # path to the folder that contains the images
      ):
      
      # get the tiles paths
      self.list_of_files = utils_func.load_tif_paths(path_to_imgs)
      self._iterate_tiles_()


  def _get_region_name_(self,
                        tile_name : str , # path of image , assuming that the first word in the file name is the region name
                        ):
     """
     Takes a string of images from the type "region_id_label.tif and extract the region name.
     for example, for "Bolivia_23014_S2Hand.tif" it will return Bolivia
     Parameters:
     folder_path (str): The path to the folder containing TIFF files.

     Returns:
     string: Name of the region , extracted from the tile name 
     """
     region = tile_name.split(CONST.SPLIT_TILES_NAMES_STR1)[-1].split(CONST.SPLIT_TILES_NAMES_STR2)[0]
     return region
  

  
  def _img_statistics_(self,
                       path : str , # path to image (tif file)
                       ):
     
     with rasterio.open(path) as src:
      
      src_arr = src.read()

      #convert 0 to nan , assuming 0 is no value and we don't want it to interrupt the staitistics
      src_arr  = np.where(src_arr==0, np.nan, src_arr )
      band_names = list(src.descriptions)

      collect_bands_stats = {}

      for band_name , index in zip(band_names,range(0,src.read().shape[0])):
         self.bands_stats_imgs = {CONST.STR_BAND_NAME : [] , CONST.STR_MEAN : [] , CONST.STR_STD : [] }
         arr = src_arr[index,:,:]

         #calculate mean
         mean = np.nanmean(arr)
         
         #calculate std
         std = np.nanstd(arr)

         collect_bands_stats[band_name +'_' + CONST.STR_MEAN] = mean
         collect_bands_stats[band_name +'_' + CONST.STR_STD] = std
         
   
     df_img_stats = pd.DataFrame.from_dict([collect_bands_stats])

     return df_img_stats


  def _iterate_tiles_(self):
   
   self.collect_info = {CONST.PATH_STR : [] , CONST.REGION_STR : []}
   self.collect_stats = []

   for path in tqdm(self.list_of_files):
      #get the region name using the get_region_name function
      region = self._get_region_name_(path)
      
      #collect the regn name and the path for the final table
      self.collect_info[CONST.PATH_STR].append(path)
      self.collect_info[CONST.REGION_STR].append(region)
         
      #get the image statistics
      df_img_stats = self._img_statistics_(path)
      self.collect_stats.append(df_img_stats)

   #organize datasets
   #organize the region info
   df1 = pd.DataFrame(self.collect_info)
   #ordanize the stats
   df2 = pd.concat(self.collect_stats)
   df2.reset_index(inplace=True)
   #cocatenate
   self.results = pd.concat([df1,df2],axis=1)










        
        

        

     

    


In [86]:
instance = ImgsStatistics(
  path_to_imgs = r'D:\git\flood_exercise\S2')



  0%|          | 0/399 [00:00<?, ?it/s]


AttributeError: module 'flood_exercise.const_vals' has no attribute 'SPLIT_TILES_NAMES_STR1'

In [68]:
pd.concat(instance.collect_stats)

Unnamed: 0,B1_mean,B1_std,B2_mean,B2_std,B3_mean,B3_std,B4_mean,B4_std,B5_mean,B5_std,...,B8A_mean,B8A_std,B9_mean,B9_std,B10_mean,B10_std,B11_mean,B11_std,B12_mean,B12_std
0,2045.121361,366.960304,1839.157126,391.249278,1811.085734,379.863595,1753.377412,424.093716,1964.627490,414.774592,...,3138.480989,548.933831,953.190210,387.732510,159.752287,74.063205,2718.732305,552.134430,2072.346645,544.698418
0,1572.750298,238.176893,1345.034756,289.717047,1332.743843,317.775426,1168.033314,399.544498,1196.093018,292.127970,...,1769.841801,966.515705,243.451187,109.609274,37.857872,34.112022,1040.482773,586.654512,626.226967,345.350598
0,2013.734406,1151.946099,1772.196365,1256.893949,1716.215126,1234.393483,1531.988964,1410.563046,1800.176743,1237.863133,...,3929.483131,1380.832087,371.368935,187.097016,9.684616,2.510644,2307.633781,1232.076664,1301.098717,1043.084131
0,1442.364147,138.868240,1294.289673,217.404070,1417.947430,285.272892,1388.561562,437.871174,1272.019600,276.668144,...,1172.161255,885.005315,108.150059,67.019372,5.705055,1.241582,596.679100,549.332932,323.786411,281.757341
0,1489.600925,118.475460,1286.906940,166.706857,1264.285282,187.111246,1204.147636,267.579638,1174.817230,172.547158,...,1440.182800,547.865291,102.980587,43.379124,7.087765,1.656915,1049.212807,853.856596,665.331821,590.233659
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,1335.008938,242.002314,1156.158051,366.925566,1165.635044,378.320440,1097.614441,566.170135,1462.676022,428.368143,...,3139.715183,702.106304,963.875763,149.946729,12.606197,1.407922,2408.852219,713.517056,1494.073376,675.028190
0,1211.460930,60.073873,957.013645,101.266215,960.577333,103.557143,685.778290,225.213282,1126.971083,149.754652,...,3230.799223,879.109970,945.488744,209.771066,10.903004,1.085587,1821.383298,473.622415,931.870198,354.322564
0,1277.548512,59.184389,1032.082386,102.587972,992.515831,108.127801,855.138050,241.366602,1169.980293,166.256549,...,2538.985744,921.178796,683.315823,231.303370,13.719379,5.158683,1724.935505,637.995502,954.246548,425.050670
0,1244.628212,48.008136,994.853779,91.446819,996.123226,104.907798,765.239330,195.107320,1219.595806,154.961657,...,3035.109715,622.396975,808.566460,144.699138,10.382317,1.186718,1949.497345,432.971810,1000.108887,277.715240


In [84]:
df1 = pd.DataFrame(instance.collect_info)
df2 = pd.concat(instance.collect_stats)
df2.reset_index(inplace=True)

df3 = pd.concat([df1,df2],axis=1)
df3

Unnamed: 0,path,region,index,B1_mean,B1_std,B2_mean,B2_std,B3_mean,B3_std,B4_mean,...,B8A_mean,B8A_std,B9_mean,B9_std,B10_mean,B10_std,B11_mean,B11_std,B12_mean,B12_std
0,D:\git\flood_exercise\S2\Nigeria_417184_S2Hand...,Nigeria,0,2045.121361,366.960304,1839.157126,391.249278,1811.085734,379.863595,1753.377412,...,3138.480989,548.933831,953.190210,387.732510,159.752287,74.063205,2718.732305,552.134430,2072.346645,544.698418
1,D:\git\flood_exercise\S2\Mekong_1396181_S2Hand...,Mekong,0,1572.750298,238.176893,1345.034756,289.717047,1332.743843,317.775426,1168.033314,...,1769.841801,966.515705,243.451187,109.609274,37.857872,34.112022,1040.482773,586.654512,626.226967,345.350598
2,D:\git\flood_exercise\S2\Mekong_1191208_S2Hand...,Mekong,0,2013.734406,1151.946099,1772.196365,1256.893949,1716.215126,1234.393483,1531.988964,...,3929.483131,1380.832087,371.368935,187.097016,9.684616,2.510644,2307.633781,1232.076664,1301.098717,1043.084131
3,D:\git\flood_exercise\S2\Mekong_1248200_S2Hand...,Mekong,0,1442.364147,138.868240,1294.289673,217.404070,1417.947430,285.272892,1388.561562,...,1172.161255,885.005315,108.150059,67.019372,5.705055,1.241582,596.679100,549.332932,323.786411,281.757341
4,D:\git\flood_exercise\S2\India_900498_S2Hand.tif,India,0,1489.600925,118.475460,1286.906940,166.706857,1264.285282,187.111246,1204.147636,...,1440.182800,547.865291,102.980587,43.379124,7.087765,1.656915,1049.212807,853.856596,665.331821,590.233659
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
394,D:\git\flood_exercise\S2\USA_741178_S2Hand.tif,USA,0,1335.008938,242.002314,1156.158051,366.925566,1165.635044,378.320440,1097.614441,...,3139.715183,702.106304,963.875763,149.946729,12.606197,1.407922,2408.852219,713.517056,1494.073376,675.028190
395,D:\git\flood_exercise\S2\USA_761032_S2Hand.tif,USA,0,1211.460930,60.073873,957.013645,101.266215,960.577333,103.557143,685.778290,...,3230.799223,879.109970,945.488744,209.771066,10.903004,1.085587,1821.383298,473.622415,931.870198,354.322564
396,D:\git\flood_exercise\S2\USA_955053_S2Hand.tif,USA,0,1277.548512,59.184389,1032.082386,102.587972,992.515831,108.127801,855.138050,...,2538.985744,921.178796,683.315823,231.303370,13.719379,5.158683,1724.935505,637.995502,954.246548,425.050670
397,D:\git\flood_exercise\S2\USA_986268_S2Hand.tif,USA,0,1244.628212,48.008136,994.853779,91.446819,996.123226,104.907798,765.239330,...,3035.109715,622.396975,808.566460,144.699138,10.382317,1.186718,1949.497345,432.971810,1000.108887,277.715240


In [None]:
df1=pd.DataFrame(dici).T
df1.columns = df1.iloc[0]
df1 = df1.iloc[1:,:]
df1

In [None]:
src=rasterio.open(r'D:\git\flood_exercise\S2\Bolivia_23014_S2Hand.tif')
band_names=list(rasterio.open(r'D:\git\flood_exercise\S2\Bolivia_23014_S2Hand.tif').descriptions)

src_arr = src.read()

plt.imshow(src_arr[0,:,:])
np.nanmean(src_arr[0,:,:])

In [None]:
src_arr[0,:,:]

In [None]:
np.mean(src_arr[0,:,:])

In [None]:
np.nanmean(src_arr[0,:,:])

In [None]:
src_arr = np.where(src_arr==0, np.nan, src_arr)
src_arr

In [None]:
for x , y in zip(band_names,range(0,src.read().shape[0])):
  print(x,y)