In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import xarray as xr
import datetime
import os
import import_ipynb
import glob
import warnings
import ee
import geopandas as gpd
ee.Initialize()
import geemap
import pickle
warnings.filterwarnings("ignore") 
# from Master_phd import *

In [None]:
g = pd.read_csv(r'Path the file having all the water surface elevation data from each water body')
g.head(3)
""" The columns of the data file here are [Unnamed: 0 gauge_id	Gauge	date	height	Latitude	Longitude	Number of Readings]
Where "gauge_id" and "Unnamed: 0" is the id associated with each gauge. "Gauge" is the name of the waterbody where gauge is installed. 
"date" is the date on which the observation was collected. "height" is water surface elevation. 
"Latitude" and "Longitude" are the coordinates of the gauge.
"Number of Readings" are the total number of readings from the gauge since it was installed. """

In [None]:
""" Storing all the names of the water body where we have the water surface elevation data """
names = list(set(g['Gauge']))
haor_names = []
for name in names:
    haor_names.append(name.replace(' ','_'))
haor_names

In [None]:
""" Removing the unnecessary columns. These columns are extra information which is not required for analysis """
for i in range(len(names)):
    filtered_g = g[g['Gauge'] == names[i].replace('_',' ')]
    filtered_g = filtered_g.drop(columns =['Latitude','Longitude','Unnamed: 0']).reset_index(drop = True)
    filtered_g.to_csv(r'Path to store the elevation files'+filtered_g['gauge_id'][0]+"_"+names[i].replace(" ","_")+'_elevation.csv',index=False)

In [None]:
""" Defining all the required functions. These functions are from my master_phd notebook, but I am directly pasting the function for user needs"""

def MAKE_LOCSS_ELEVATION(file_height,path,UID,haorname,unit='meter'):
    f = file_height
    f1 = f.reset_index(drop = True)
    # columns_to_retain = ['date','height']
#     f1 = f1.drop(columns=[col for col in df if col not in columns_to_retain])
#     f1 = f1.drop(columns=['Notes'])
    f1 = f1.rename(columns = {'date':'Date','height':'Height(ft)'})
    f1 = f1[['Date','Height(ft)','Gauge','gauge_id']]
    
    m = np.quantile(f1['Height(ft)'],0.95)
    Quantile90_threshold_noise_list=[]
    for k in range(len(f1)):
        if f1['Height(ft)'][k]>=m:
            Quantile90_threshold_noise_list.append(k)
    f1 = f1.drop(f1.index[Quantile90_threshold_noise_list])
    f1 = f1.reset_index(drop = True)
    
    if unit =='feet':
        f1['Height(ft)'] = f1['Height(ft)']*0.3048
    elif unit =='meter':
        f1['Height(ft)'] = f1['Height(ft)']
    else:
        print('Wrong unit passed')
        exit()
    f1 = f1.rename(columns={'Height(ft)':'Height in meters'})
    f1.to_csv(path+'\\Corrected_'+UID+"_"+haorname+'.csv',index = False)
    print('finished',haorname)
    return f1.head()

def SORTING_AREA_DATA_DATEUPDATED(waterareafile,elevationfile,path ,haorname,technique):
    f = waterareafile
    g = elevationfile
    f['Date'] = pd.to_datetime(f['Date'], format='%Y-%m-%d')
    for area_range in range(len(f)):
        f['Date'][area_range] = f['Date'][area_range].date()
    for elev_range in range(len(g)):
        date_time_str = g['Date'][elev_range] 
        g['Date'][elev_range] = datetime.datetime.strptime(date_time_str, '%m/%d/%Y')
        g['Date'][elev_range] = g['Date'][elev_range].date()
    g1 = pd.DataFrame(np.zeros([len(g), 2])*np.nan,columns = ['Date','Area'])
    g1['Date'] = g['Date']
    f1 = pd.concat([f, g1],axis = 0, ignore_index=True)
    f1 = f1.sort_values(by=['Date'])     
    f1.to_csv(path+haorname+"\\"+'GEE_'+technique+'_'+haorname+"_dateupdated.csv",index=False)
    return f1.head()

def STORAGE_CHANGE(areafile,elevfile,path,UID,haorname,technique,save_pic=True,save_csv=True):
    f = areafile
    g = elevfile
    f['Date'] = pd.to_datetime(f['Date'], format='%Y-%m-%d')
    kw = dict(limit_direction='forward')
    f1 = f.copy()
    f1['Area'] = f1['Area'].interpolate()
    f1['Date_status'] = f1['Date'].isin(g['Date'])
    f7 = f1[f1['Date_status'] ==True ]
    f7 =f7.drop_duplicates(subset = ['Date'])
    f7 = f7.drop(columns = ['Date_status'])
    g2 = g.drop_duplicates(subset=['Date'])
    f7 = f7.reset_index().drop(columns=['index'])
    g2 = g2.reset_index().drop(columns=['index'])
    Est_Vol = [np.nan]
    for i in range(1,len(f7)):
        Est_Vol.append((f7['Area'][i]+f7['Area'][i-1])*(0.5)*((g2['Height in meters'][i]-g2['Height in meters'][i-1])))
    f7.insert(2,'Height(m)',g2["Height in meters"])
    f7.insert(3,'Est_Vol_Change(km3)',Est_Vol)
    fig,ax = plt.subplots(figsize = (14,6))
    ax.plot(f7['Date'],f7['Est_Vol_Change(km3)'],label = 'Storage Change')
    ax.set_xlabel('Date',fontsize = 14)
    ax.set_ylabel('Storage Change',fontsize = 14)
    ax.set_title('Storage Change of '+ haorname+ 'Haor '+ technique,fontsize = 16)
    fig.autofmt_xdate()
    ax.legend()
    plt.tight_layout()
    os.makedirs(path+"\\"+haorname+"\\",exist_ok=True)
    if save_csv ==True:    
        f7.to_csv(path+"\\"+haorname+"\\"+UID+'_'+haorname+'_'+technique+'.csv')
    path_pic =  path.split('Vol_Est')[0]+'graphs'
    os.makedirs(path_pic+"\\"+haorname+"\\",exist_ok=True)
    if save_pic ==True:    
        plt.savefig(path_pic+"\\"+haorname+"\\"+UID+'_'+haorname+"_Storage_Change_"+technique) 
    

#### Converting the elevation data to usable format

In [None]:
""" Quantile filtering the data and converting it in meters if it is in feet """
for i in range(len(names)):
    csv_files = glob.glob(r'Path to the elevation files\\*'+"_"+names[i].replace(" ","_")+'_elevation.csv')
    f = pd.read_csv(csv_files[0])
    f = f.reset_index(drop = True)
    MAKE_LOCSS_ELEVATION(f,path =r'Path to the elevation files\Corrected\\',UID = f['gauge_id'][0],haorname = names[i].replace(" ","_"),unit='meter')

#### Sorting the Area Data

In [None]:
names_list=['Alta_Dighi',
 'Arali_Bil',
 'Asmatpur_Beel',
 'Balait_Haor',
 'Bayhe_Beel',
 'Bhatipara_Haor',
 'Bongo_Sonahat',
 'Boro_Haor',
 'Bukvora_Baor',
 'Caran_Beel',
 'Chatal_Beel',
 'Ciklir_Beel',
 'Court_Dighi',
 'Cuagachi_Doa',
 'Damus',
 'Dhala_China_Beel',
 'Golar_Beel',
 'Hatir_Jheel',
 'Jaliyar_Haor',
 'Joal_Bhanga_Haor',
 'Joysagor',
 'kalai_Beel',
 'Khoasagar_Dighi',
 'Khordo_Baor',
 'Kuralgachi_Beel',
 'Lusni_Beel_Haor',
 'Markas_Beel',
 'Mohishuara_Haor',
 'Mora_Ganga',
 'Mosha_Gatir_Haor',
 'Pabiadher_Haor',
 'Patharchuli_Haor',
 'Ramrai_Dighi',
 'Rarikhal',
 'Shiruali_Baor',
 'Srihar_Beel',
 'Subolpur_Beel',
 'Sub_Beel_Haor',
 'Sunair_Haor', 
 'Atranga_Dighi',
 'Aura_Bora_Beel',
 'Baisha_Beel',
 'Balashur',
 'Bamui_Beel',
 'Bijoy_Singha_Dighi',
 'Boluhar_Baor',
 'Buripota_Beel',
 'Calan_Beel',
 'Dekhar',
 'Dharmo_Sagar',
 'Ghora_dighi',
 'Hakaluki',
 'Kaptai_lake',
 'Kataiya_Band',
 'Korchar',
 'Mohamaya_Lake',
 'Moharaja_Dighi',
 'Nilsagor',
 'Ramsagor',
 'Sukan_Dighi',
 'Udgal_Beel',
 'Vullar_Haor']
len(names_list)

In [None]:
""" Sorting the water surface and water surface elevation data as per time and inputting the common dates between the two datasets."""
for i in range(len(names_list)):
    print('Working on ', names_list[i])
    data_files = glob.glob(r'Path to Water Surface Area files'+names_list[i]+"\\"+'GEE_*'+'.csv')
    for k in range(len(data_files)):
        area_files = data_files[k]
        area_file = pd.read_csv(area_files)
        elev_files = glob.glob(r'Path to the elevation files\Corrected\\'+'Corrected_*'+names_list[i]+'.csv')
        elev_file = pd.read_csv(elev_files[0])
        try:
            SORTING_AREA_DATA_DATEUPDATED(area_file,elev_file,path = r'Path to Water Surface Area files' ,haorname = names_list[i],technique=area_files.split("\\GEE_")[1].split(names_list[0])[0])#area_files.split("\\GEE_")[1].split(names_list[0])[0])
        except:
            print('Did not finish with', names_list[i],'technique',area_files.split("\\GEE_")[1].split(names_list[0])[0])
            pass


#### Storing the names of the wetlands

In [None]:


with open(r"Path to store wetlands name\Wetlands_Names.txt", "wb") as fp:   #Pickling
    pickle.dump(names_list, fp)

#### Estimating the Volume Change

In [None]:
with open(r"Path to store wetlands name\Wetlands_Names.txt", "rb") as fp:   # Unpickling
    names_list= pickle.load(fp)

In [None]:
path=r'C:\Users\skhan7\Desktop\Research\PhD\Chapter1\Vol_Est\\'
path.split('Vol_Est')[0]+'graphs'

In [None]:
""" Interpolating the data where there is no data between common dates and estimating the volume change using trapezoidal method. """
for i in range(len(names_list)):
    print('Working on ', names_list[i])
    data_files = glob.glob(r'Path to Water Surface Area files'+names_list[i]+"\\"+'GEE_*'+'dateupdated.csv')
    for k in range(len(data_files)):
        area_files = data_files[k]
        area_file = pd.read_csv(area_files)
        elev_files = glob.glob(r'Path to the elevation files\Corrected\\'+'Corrected_*'+names_list[i]+'.csv')
        elev_file = pd.read_csv(elev_files[0])
        UID = elev_file['gauge_id'][0]
        haorname = names_list[i]
        technique=area_files.split('GEE_')[1].split(names_list[i])[0]
        STORAGE_CHANGE(area_file,elev_file,path=r'Path to store volume change files\\',UID=UID,haorname=haorname,technique=technique,save_pic=True,save_csv=True)
        
    

In [None]:
np.isnan(area_file['Area']).sum()

In [None]:
plt.plot(area_file['Date'],area_file['Area'])

In [None]:
area_file['Area'] = area_file['Area'].interpolate()
area_file

In [None]:
plt.plot(area_file['Date'],area_file['Area'])