In [4]:
!pip install eemont
!pip install geemap



In [5]:
import numpy as np
import pandas as pd
import ee, eemont, geemap

In [6]:
ee.Authenticate()
ee.Initialize(project = 'mekong-river-prediction')

# Read the data

In [7]:
salinity = pd.read_csv('/content/vietnam-mekong-data - salinity.csv')
station = pd.read_csv('/content/vietnam-mekong-data - station.csv')

In [8]:
# Drop the null value
salinity.dropna(inplace=True)

# Merge the station to main data

In [9]:
# Read station
station.head()

Unnamed: 0,Station,Latitude,Longitude
0,An_Thuan_Ben_Tre,9.980203,106.601953
1,Ben_Luc_Long_An,10.638333,106.475456
2,Ben_Trai_Ben_Tre,9.888364,106.526731
3,Ca_Mau_Ca_Mau,9.172244,105.148711
4,Dai_Ngai_Soc_Trang,9.734864,106.074472


In [10]:
# prompt: can you add the id for the station list from 1, integer please

import numpy as np
station['id'] = np.arange(1, len(station) + 1, dtype=int)
station.head()


Unnamed: 0,Station,Latitude,Longitude,id
0,An_Thuan_Ben_Tre,9.980203,106.601953,1
1,Ben_Luc_Long_An,10.638333,106.475456,2
2,Ben_Trai_Ben_Tre,9.888364,106.526731,3
3,Ca_Mau_Ca_Mau,9.172244,105.148711,4
4,Dai_Ngai_Soc_Trang,9.734864,106.074472,5


In [11]:
station_list = station.values.tolist()
station_list

[['An_Thuan_Ben_Tre', 9.980203, 106.601953, 1],
 ['Ben_Luc_Long_An', 10.638333, 106.475456, 2],
 ['Ben_Trai_Ben_Tre', 9.888364, 106.526731, 3],
 ['Ca_Mau_Ca_Mau', 9.172244, 105.148711, 4],
 ['Dai_Ngai_Soc_Trang', 9.734864, 106.074472, 5],
 ['Hoa_Binh_Tien_Giang', 10.290344, 106.592403, 6],
 ['Hung_My_Tra_Vinh', 9.883058, 106.447458, 7],
 ['Loc_Thuan_Ben_Tre', 10.242186, 106.602697, 8],
 ['Rach_Gia_Kien_Giang', 10.012292, 105.084019, 9],
 ['Soc_Trang_Soc_Trang', 9.597708, 106.017856, 10],
 ['Soc_Doc_Ben_Tre', 10.033333, 106.05, 11],
 ['Vam_Kenh_Tien_Giang', 10.274361, 106.73715, 12]]

# Extract the data from satellite

In [12]:
salinity.tail()

Unnamed: 0,Station,Day,1,3,5,7,9,11,13,15,17,19,21,23
7541,Vam_Kenh_Tien_Giang,22-06-2018,4.0,1.9,0.6,3.1,6.8,8.8,8.2,7.1,3.0,1.4,4.8,6.0
7542,Vam_Kenh_Tien_Giang,23-06-2018,5.4,3.1,1.5,4.1,7.5,6.7,6.0,6.9,2.4,0.4,3.2,6.1
7543,Vam_Kenh_Tien_Giang,24-06-2018,6.6,4.7,2.0,4.1,6.2,8.0,6.5,8.1,6.4,3.1,0.2,4.1
7548,Vam_Kenh_Tien_Giang,29-06-2018,4.1,6.1,8.0,6.6,4.9,3.8,6.5,7.5,5.8,4.4,2.8,0.0
7549,Vam_Kenh_Tien_Giang,30-06-2018,3.2,6.7,7.8,7.0,6.5,3.9,6.4,7.0,6.7,5.0,3.0,0.0


In [13]:
#Check the type of columns
salinity.info()

<class 'pandas.core.frame.DataFrame'>
Index: 6711 entries, 0 to 7549
Data columns (total 14 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   Station  6711 non-null   object 
 1   Day      6711 non-null   object 
 2   1        6711 non-null   float64
 3   3        6711 non-null   float64
 4   5        6711 non-null   object 
 5   7        6711 non-null   float64
 6   9        6711 non-null   object 
 7   11       6711 non-null   float64
 8   13       6711 non-null   float64
 9   15       6711 non-null   float64
 10  17       6711 non-null   object 
 11  19       6711 non-null   object 
 12  21       6711 non-null   float64
 13  23       6711 non-null   float64
dtypes: float64(8), object(6)
memory usage: 786.4+ KB


In [14]:
salinity['Day'] = pd.to_datetime(salinity['Day'], dayfirst=True, errors='coerce').dt.date

  salinity['Day'] = pd.to_datetime(salinity['Day'], dayfirst=True, errors='coerce').dt.date


In [15]:
date_min = ee.Date(salinity['Day'].min().isoformat()) # Change date_min to 2013-01-01 #salinity['Day'].min()
date_max = ee.Date(salinity['Day'].max().isoformat())

In [16]:
date_min, date_max

(<ee.ee_date.Date at 0x7f6869f38c50>, <ee.ee_date.Date at 0x7f6869f38150>)

In [17]:
#Create spatial pivot
collections = []
for station in station_list:
  collections.append(ee.Feature(ee.Geometry.Point([station[2], station[1]]) ,{'pivot': station[3]}))
pivots = ee.FeatureCollection(collections)
pivots.getInfo()

{'type': 'FeatureCollection',
 'columns': {'pivot': 'Integer', 'system:index': 'String'},
 'features': [{'type': 'Feature',
   'geometry': {'type': 'Point', 'coordinates': [106.601953, 9.980203]},
   'id': '0',
   'properties': {'pivot': 1}},
  {'type': 'Feature',
   'geometry': {'type': 'Point', 'coordinates': [106.475456, 10.638333]},
   'id': '1',
   'properties': {'pivot': 2}},
  {'type': 'Feature',
   'geometry': {'type': 'Point', 'coordinates': [106.526731, 9.888364]},
   'id': '2',
   'properties': {'pivot': 3}},
  {'type': 'Feature',
   'geometry': {'type': 'Point', 'coordinates': [105.148711, 9.172244]},
   'id': '3',
   'properties': {'pivot': 4}},
  {'type': 'Feature',
   'geometry': {'type': 'Point', 'coordinates': [106.074472, 9.734864]},
   'id': '4',
   'properties': {'pivot': 5}},
  {'type': 'Feature',
   'geometry': {'type': 'Point', 'coordinates': [106.592403, 10.290344]},
   'id': '5',
   'properties': {'pivot': 6}},
  {'type': 'Feature',
   'geometry': {'type': 'Poi

In [18]:
def add_water_indices(img):
    # Try to map standard band names
    # Create a dictionary to map them
    band_map = {
        'BLUE': 'SR_B1',
        'GREEN': 'SR_B2',
        'RED': 'SR_B3',
        'NIR': 'SR_B4',
        'SWIR1': 'SR_B5',
        'SWIR2': 'SR_B7'
    }

    # Now select correct bands
    blue = img.select(band_map['BLUE'])
    green = img.select(band_map['GREEN'])
    red = img.select(band_map['RED'])
    nir = img.select(band_map['NIR'])
    swir1 = img.select(band_map['SWIR1'])
    swir2 = img.select(band_map['SWIR2'])

    ndwi = green.subtract(nir).divide(green.add(nir)).rename("NDWI")
    mndwi = green.subtract(swir1).divide(green.add(swir1)).rename("MNDWI")
    wi1 = green.add(nir).divide(green.subtract(nir)).rename("WI1")
    wi2 = green.add(swir1).divide(green.subtract(swir1)).rename("WI2")

    # If you want AWEI manually:
    awei_sh = green.subtract(swir1).multiply(4).subtract(
              nir.multiply(0.25)).subtract(
              swir2.multiply(2.75)).rename('AWEIsh')

    awei_nsh = blue.add(green.multiply(2.5)).subtract(
               nir.multiply(1.5)).subtract(
               swir1.multiply(1.5)).subtract(
               swir2.multiply(0.25)).rename('AWEInsh')

    return img.addBands([ndwi, mndwi, wi1, wi2, awei_sh, awei_nsh])

In [19]:
def params_retrieval(dataset, res):
    Map = geemap.Map()
    satellite = (ee.ImageCollection(dataset)
                 .filterBounds(pivots)
                 .filterDate(date_min, date_max)
                 .preprocess())

    # Check if satellite collection is empty
    if satellite.size().getInfo() == 0:
        raise ValueError("No images found for the given dataset and date range.")

    # Count original bands (before any processing)
    original_band_names = satellite.first().bandNames().getInfo()
    print(f"Original bands: {original_band_names}")

    # Process: either spectralIndices or manual map
    if dataset == 'LANDSAT/LC08/C02/T1_L2':
        try:
            indices = ['NDWI', 'MNDWI', 'WI1', 'WI2', 'AWEIsh', 'AWEInsh']
            satellite = satellite.spectralIndices(indices)
        except Exception as e:
            raise RuntimeError(f"Error computing spectral indices: {e}")
    else:
        try:
            satellite = satellite.map(add_water_indices)
        except Exception as e:
            raise RuntimeError(f"Error applying add_water_indices: {e}")

    # Get band names after processing
    final_band_names = satellite.first().bandNames().getInfo()
    print(f"Bands after processing: {final_band_names}")

    # Sanity check: at least we should have more bands after adding indices
    if len(final_band_names) <= len(original_band_names):
        print("⚠️ Warning: Number of bands did not increase after processing. Are you sure add_water_indices() worked?")

    # Time series extraction
    ts = satellite.getTimeSeriesByRegions(
        collection=pivots,
        bands=final_band_names,
        reducer=[ee.Reducer.mean(), ee.Reducer.median()],
        scale= res
    )

    tsPandas = geemap.ee_to_df(ts)
    return tsPandas



In [20]:
Landsat = params_retrieval('LANDSAT/LC08/C02/T1_L2', 30)

Original bands: ['SR_B1', 'SR_B2', 'SR_B3', 'SR_B4', 'SR_B5', 'SR_B6', 'SR_B7', 'SR_QA_AEROSOL', 'ST_B10', 'ST_ATRAN', 'ST_CDIST', 'ST_DRAD', 'ST_EMIS', 'ST_EMSD', 'ST_QA', 'ST_TRAD', 'ST_URAD', 'QA_PIXEL', 'QA_RADSAT']
Bands after processing: ['SR_B1', 'SR_B2', 'SR_B3', 'SR_B4', 'SR_B5', 'SR_B6', 'SR_B7', 'SR_QA_AEROSOL', 'ST_B10', 'ST_ATRAN', 'ST_CDIST', 'ST_DRAD', 'ST_EMIS', 'ST_EMSD', 'ST_QA', 'ST_TRAD', 'ST_URAD', 'QA_PIXEL', 'QA_RADSAT', 'NDWI', 'MNDWI', 'WI1', 'WI2', 'AWEIsh', 'AWEInsh']




In [21]:
Landsat_7 = params_retrieval('LANDSAT/LE07/C02/T1_L2', 30)

Original bands: ['SR_B1', 'SR_B2', 'SR_B3', 'SR_B4', 'SR_B5', 'SR_B7', 'SR_ATMOS_OPACITY', 'SR_CLOUD_QA', 'ST_B6', 'ST_ATRAN', 'ST_CDIST', 'ST_DRAD', 'ST_EMIS', 'ST_EMSD', 'ST_QA', 'ST_TRAD', 'ST_URAD', 'QA_PIXEL', 'QA_RADSAT']
Bands after processing: ['SR_B1', 'SR_B2', 'SR_B3', 'SR_B4', 'SR_B5', 'SR_B7', 'SR_ATMOS_OPACITY', 'SR_CLOUD_QA', 'ST_B6', 'ST_ATRAN', 'ST_CDIST', 'ST_DRAD', 'ST_EMIS', 'ST_EMSD', 'ST_QA', 'ST_TRAD', 'ST_URAD', 'QA_PIXEL', 'QA_RADSAT', 'NDWI', 'MNDWI', 'WI1', 'WI2', 'AWEIsh', 'AWEInsh']


In [22]:
Landsat_5 = params_retrieval('LANDSAT/LT05/C02/T1_L2',30)

Original bands: ['SR_B1', 'SR_B2', 'SR_B3', 'SR_B4', 'SR_B5', 'SR_B7', 'SR_ATMOS_OPACITY', 'SR_CLOUD_QA', 'ST_B6', 'ST_ATRAN', 'ST_CDIST', 'ST_DRAD', 'ST_EMIS', 'ST_EMSD', 'ST_QA', 'ST_TRAD', 'ST_URAD', 'QA_PIXEL', 'QA_RADSAT']
Bands after processing: ['SR_B1', 'SR_B2', 'SR_B3', 'SR_B4', 'SR_B5', 'SR_B7', 'SR_ATMOS_OPACITY', 'SR_CLOUD_QA', 'ST_B6', 'ST_ATRAN', 'ST_CDIST', 'ST_DRAD', 'ST_EMIS', 'ST_EMSD', 'ST_QA', 'ST_TRAD', 'ST_URAD', 'QA_PIXEL', 'QA_RADSAT', 'NDWI', 'MNDWI', 'WI1', 'WI2', 'AWEIsh', 'AWEInsh']


In [24]:
# prompt: drop the -9999 from Landsat df

import numpy as np
Landsat = Landsat.replace(-9999, np.nan)


In [25]:
Landsat

Unnamed: 0,AWEInsh,AWEIsh,MNDWI,NDWI,QA_PIXEL,QA_RADSAT,SR_B1,SR_B2,SR_B3,SR_B4,...,ST_EMIS,ST_EMSD,ST_QA,ST_TRAD,ST_URAD,WI1,WI2,date,pivot,reducer
0,,,,,,,,,,,...,,,,,,,,2013-05-19T03:10:04,1,mean
1,,,,,,,,,,,...,,,,,,,,2013-05-19T03:10:04,2,mean
2,,,,,,,,,,,...,,,,,,,,2013-05-19T03:10:04,3,mean
3,,,,,,,,,,,...,,,,,,,,2013-05-19T03:10:04,4,mean
4,,,,,,,,,,,...,,,,,,,,2013-05-19T03:10:04,5,mean
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12547,,,,,,,,,,,...,,,,,,,,2018-05-31T03:19:13,8,median
12548,,,,,,,,,,,...,,,,,,,,2018-05-31T03:19:13,9,median
12549,,,,,,,,,,,...,,,,,,,,2018-05-31T03:19:13,10,median
12550,,,,,,,,,,,...,,,,,,,,2018-05-31T03:19:13,11,median


In [26]:
# prompt: Landsat dropna

Landsat.dropna(inplace=True)
Landsat

Unnamed: 0,AWEInsh,AWEIsh,MNDWI,NDWI,QA_PIXEL,QA_RADSAT,SR_B1,SR_B2,SR_B3,SR_B4,...,ST_EMIS,ST_EMSD,ST_QA,ST_TRAD,ST_URAD,WI1,WI2,date,pivot,reducer
35,0.017887,-0.234051,-0.067352,-0.525301,21824.0,0.0,0.019918,0.034630,0.064165,0.052148,...,0.9562,0.0128,6.19,8.296,5.392,0.247224,-0.055857,2013-06-20T03:09:59,12,mean
59,0.018437,-0.367955,-0.049796,-0.552540,21824.0,0.0,0.031082,0.042660,0.091307,0.084680,...,0.9562,0.0128,6.46,8.593,4.996,0.297604,-0.073465,2013-08-07T03:10:04,12,mean
71,0.024205,-0.368120,-0.117039,-0.470369,21824.0,0.0,0.044310,0.058720,0.114105,0.116333,...,0.9562,0.0128,5.20,8.479,4.904,0.166092,-0.163056,2013-08-23T03:10:06,12,mean
119,0.024466,-0.256401,0.017666,-0.568793,21824.0,0.0,0.021595,0.029405,0.063368,0.051350,...,0.9562,0.0128,6.68,8.517,4.898,0.407815,0.049055,2013-11-11T03:09:47,12,mean
179,-0.020214,-0.273568,-0.180213,-0.492155,21824.0,0.0,0.033667,0.042825,0.074120,0.072745,...,0.9562,0.0128,5.86,9.152,2.685,0.106537,-0.165794,2014-01-30T03:09:08,12,mean
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12464,0.228310,-0.037274,0.074182,-0.150507,21824.0,0.0,0.048407,0.069060,0.106322,0.106488,...,0.9709,0.0105,3.80,9.346,2.714,0.174348,-0.039583,2018-02-08T03:20:16,9,median
12476,0.107090,-0.074647,-0.111274,-0.140268,21824.0,0.0,0.037958,0.051955,0.078630,0.079813,...,0.9709,0.0105,3.46,9.657,3.137,0.010068,-0.194605,2018-02-24T03:20:10,9,median
12488,0.134734,-0.084444,-0.088407,-0.133493,21824.0,0.0,0.049177,0.068647,0.104067,0.106102,...,0.9709,0.0105,5.67,9.677,3.784,0.068469,-0.138555,2018-03-12T03:20:01,9,median
12512,0.166366,-0.102291,-0.099175,-0.100925,21824.0,0.0,0.047060,0.066970,0.120897,0.127195,...,0.9709,0.0105,4.51,9.921,4.512,0.035203,-0.254418,2018-04-13T03:19:45,9,median


In [27]:
# prompt: Landsat dropna
Landsat_7 = Landsat_7.replace(-9999, np.nan)
Landsat_7.dropna(inplace=True)
Landsat_7

Unnamed: 0,AWEInsh,AWEIsh,MNDWI,NDWI,QA_PIXEL,QA_RADSAT,SR_ATMOS_OPACITY,SR_B1,SR_B2,SR_B3,...,ST_EMSD,ST_QA,ST_TRAD,ST_URAD,WI1,WI2,date,pivot,reducer,SR_CLOUD_QA
59,-0.066974,0.063826,0.279551,-0.331170,5440.0,0.0,0.394,0.071508,0.099942,0.106625,...,0.0128,6.81,8.892,3.448,-3.019601,3.577170,2009-12-10T02:58:57,12,mean,1.0
72,0.009998,-0.190907,0.030183,-0.055095,5506.0,0.0,0.346,0.082618,0.094800,0.118312,...,0.0189,7.19,8.892,3.618,-18.150611,33.131413,2010-01-27T02:59:44,1,mean,32.0
83,-0.148395,-0.102116,0.023347,-0.464239,5440.0,0.0,0.395,0.048022,0.062680,0.064852,...,0.0128,4.10,8.855,3.376,-2.154062,42.832168,2010-01-27T02:59:44,12,mean,1.0
108,0.105128,0.204832,0.688309,-0.100552,5440.0,0.0,0.260,0.075192,0.095075,0.122273,...,0.0189,6.61,9.190,3.428,-9.945078,1.452836,2010-03-16T03:00:12,1,mean,32.0
227,-0.321088,-0.296445,-0.242789,-0.624710,5440.0,0.0,0.117,0.043072,0.055063,0.051982,...,0.0128,4.47,8.632,5.347,-1.600742,-4.118805,2011-05-06T03:01:40,12,mean,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11547,-0.054840,-0.482098,-0.071724,-0.104272,5698.0,0.0,0.060,0.122740,0.134180,0.140890,...,0.0000,7.23,8.334,4.804,-9.590269,-13.942368,2017-10-04T03:17:14,4,median,8.0
11571,-0.025298,-0.282750,-0.160215,-0.013827,5504.0,0.0,0.420,0.057372,0.059820,0.056905,...,0.0000,6.53,8.372,4.275,-72.320417,-6.241621,2017-11-21T03:17:12,4,median,8.0
11679,-0.161272,-1.018423,-0.156230,-0.113730,5440.0,0.0,0.060,0.205707,0.214838,0.225288,...,0.0000,5.54,8.818,4.587,-8.792791,-6.400811,2018-04-14T03:15:24,4,median,8.0
12044,0.003054,-0.395473,-0.007088,-0.049184,5442.0,0.0,0.347,0.129890,0.140615,0.148700,...,0.0105,7.24,8.595,4.675,-20.331844,-141.089664,2011-11-12T03:13:57,9,median,8.0


In [28]:
# prompt: Landsat dropna
Landsat_5 = Landsat_5.replace(-9999, np.nan)
Landsat_5.dropna(inplace=True)
Landsat_5

Unnamed: 0,AWEInsh,AWEIsh,MNDWI,NDWI,QA_PIXEL,QA_RADSAT,SR_ATMOS_OPACITY,SR_B1,SR_B2,SR_B3,...,ST_EMIS,ST_EMSD,ST_QA,ST_TRAD,ST_URAD,WI1,WI2,date,pivot,reducer
11,-0.088122,0.111531,0.447957,-0.411879,5440.0,0.0,0.262,0.057070,0.084267,0.082315,...,0.9856,0.0128,4.50,8.713,4.382,-2.427900,2.232355,2009-02-17T02:53:59,12,mean
107,-0.033514,0.037522,0.264682,-0.279482,5440.0,0.0,0.354,0.057702,0.082452,0.077338,...,0.9856,0.0128,5.59,8.326,5.354,-3.578050,3.778124,2009-06-25T02:56:33,12,mean
156,0.159997,0.230269,0.579147,0.084272,5506.0,0.0,0.328,0.072003,0.114463,0.136628,...,0.9856,0.0189,7.37,8.713,5.316,11.866376,1.726677,2009-09-13T02:57:47,1,mean
167,-0.280780,-0.161269,-0.005096,-0.536135,5440.0,0.0,0.336,0.048242,0.080527,0.081408,...,0.9478,0.0128,6.04,8.437,5.272,-1.865201,-196.218182,2009-09-13T02:57:47,12,mean
179,-0.168876,0.020857,0.248268,-0.442067,5440.0,0.0,0.330,0.061030,0.097963,0.101840,...,0.9856,0.0128,6.32,8.215,5.145,-2.262098,4.027908,2009-10-15T02:58:09,12,mean
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2283,-0.015783,-0.375982,-0.060343,0.012736,5504.0,0.0,0.324,0.078630,0.102775,0.104040,...,0.9597,0.0000,3.85,9.101,3.570,78.516441,-16.571970,2009-12-09T03:05:12,4,median
2295,-0.006000,-0.284483,-0.026095,-0.003267,5504.0,0.0,0.408,0.072717,0.096477,0.096093,...,0.9597,0.0000,4.75,8.713,4.763,-306.067194,-38.322050,2010-01-26T03:05:28,4,median
2403,0.064510,-0.141971,0.150130,-0.002946,5440.0,0.0,0.336,0.102087,0.130302,0.134813,...,0.9597,0.0000,4.95,8.880,4.968,-339.448052,6.660910,2011-10-28T03:02:32,4,median
2456,-0.071766,-0.535269,-0.157586,-0.018158,5504.0,0.0,0.090,0.082205,0.110035,0.116800,...,0.9623,0.0105,5.10,9.046,5.439,-55.071253,-6.345722,2009-04-20T03:07:42,9,median


In [30]:
# prompt: save landsat_5, landsat_7, landsat_8

Landsat.to_csv('landsat_8.csv', index=False)
Landsat_7.to_csv('landsat_7.csv', index=False)
Landsat_5.to_csv('landsat_5.csv', index=False)
