In [21]:
import os
import sys
import sqlite3
import numpy as np
import pandas as pd
import geopandas as gp
import plotly.express as px
import matplotlib.pyplot as plt
import scipy.stats as scs

import pyet
import pyeto
import spei

In [22]:
sys.path.append('/home/pooya/w/DroughtMonitoringIran/')

DATA_PATH = "./assets/data/GEE_CSV_Exports/"
DATABASE_PATH = "./database/database.db"

In [23]:
conn = sqlite3.connect(DATABASE_PATH)

monthly_data = pd.read_sql(sql='SELECT * FROM ground_data_monthly', con=conn)
monthly_data['Date'] = pd.to_datetime(monthly_data['Date'])

monthly_PET = pd.read_sql(sql='SELECT * FROM pet_monthly', con=conn)
monthly_PET['Date'] = pd.to_datetime(monthly_PET['Date'])

geoinfo = pd.read_sql(sql='SELECT * FROM ground_data_geoinfo', con=conn)

conn.close()

In [24]:
monthly_data

Unnamed: 0,Station_ID,Date,Temp_Max,Temp_Min,Temp_Mean,Wind_Speed,Pressure,Humidity_Max,Humidity_Min,Humidity,Sunshine,Precip
0,40709,2006-01-31,6.8,0.9,3.8,0.9,1026.7,96.6,70.3,85.1,2.3,134.46
1,40709,2006-02-28,9.2,3.2,6.2,1.1,1020.6,96.6,78.6,88.8,2.9,37.58
2,40709,2006-03-31,13.2,6.8,10.0,1.7,1019.1,95.6,70.6,84.4,3.2,85.78
3,40709,2006-04-30,16.5,10.9,13.7,0.8,1017.7,96.4,73.8,86.9,3.1,99.07
4,40709,2006-05-31,21.0,14.7,17.9,1.9,1018.8,93.1,67.7,81.3,6.0,39.90
...,...,...,...,...,...,...,...,...,...,...,...,...
5570,99361,2024-03-31,12.1,1.8,6.9,2.0,819.8,80.0,41.8,59.0,6.4,13.60
5571,99361,2024-04-30,15.7,6.2,10.9,2.1,822.6,83.8,48.4,63.0,7.7,48.61
5572,99361,2024-05-31,18.0,9.3,13.6,1.9,821.0,87.9,61.4,75.2,5.3,49.11
5573,99361,2024-06-30,23.2,13.8,18.5,2.0,822.2,86.9,55.9,69.9,7.4,158.21


In [25]:
monthly_PET

Unnamed: 0,Station_ID,Date,PET_Hargreaves
0,40709,2006-01-31,23.76
1,40709,2006-02-28,30.78
2,40709,2006-03-31,55.38
3,40709,2006-04-30,70.81
4,40709,2006-05-31,103.12
...,...,...,...
5570,99361,2024-03-31,67.21
5571,99361,2024-04-30,88.16
5572,99361,2024-05-31,107.19
5573,99361,2024-06-30,128.74


In [26]:
geoinfo

Unnamed: 0,Station_ID,Station_Name,Province,Station_Latitude,Station_Longitude,Station_Elevation
0,40759,Sari,Mazandaran,36.536,52.998,23.0
1,99306,Bandar-e-amirabad,Mazandaran,36.856,53.386,-20.0
2,99357,Baladeh,Mazandaran,36.198,51.801,2120.0
3,99299,Galugah,Mazandaran,36.738,53.837,-10.0
4,40737,Gharakhil,Mazandaran,36.487,52.108,14.7
5,40760,Kiyasar,Mazandaran,36.248,53.546,1294.3
6,99361,Alasht,Mazandaran,36.071,52.843,1805.0
7,99309,Amol,Mazandaran,36.479,52.468,23.7
8,99348,Kojur,Mazandaran,36.39,51.729,1550.0
9,99360,Polsefid,Mazandaran,36.104,53.062,610.0


### NDVI & EVI

In [27]:
vi_dataset = pd.DataFrame()

# Find *.csv files in the directory with NDVI
list_fles = [x for x in os.listdir(DATA_PATH) if x.endswith(".csv") and ("NDVI" in x or "EVI" in x)]
vi_dataset = pd.DataFrame()
for lf in list_fles:
    data = pd.read_csv(filepath_or_buffer=DATA_PATH + lf, na_values=-999)
    data = data[["Province", "St_Name", "date", "mean"]]
    data.rename(
        columns={
            "Province": "Province",
            "St_Name": "Station_Name",
            "date": "Date",
            "mean": f"{lf.split('_')[2]}_{lf.split('_')[0]}"
        },
        inplace=True
    )
    if vi_dataset.empty:
        vi_dataset = data
    else:
        vi_dataset = vi_dataset.merge(data, on=["Province", "Station_Name", "Date"], how="outer")


def merge_columns(df, col_prefix):
    cols = [col for col in df.columns if col.startswith(col_prefix)]
    if len(cols) > 1:
        df[col_prefix] = df[cols].mean(axis=1, skipna=True)
        df.drop(columns=cols, inplace=True)
    elif len(cols) == 1:
        df.rename(columns={cols[0]: col_prefix}, inplace=True)


unique_prefixes = set(col.split('_')[0] + '_' + col.split('_')[1] for col in vi_dataset.columns if '_' in col)
for prefix in unique_prefixes:
    merge_columns(vi_dataset, prefix)

# Select the columns of interest
gi = geoinfo[["Station_ID", "Station_Name", "Province"]]

vi_dataset = gi.merge(vi_dataset, on=["Province", "Station_Name"], how="left")

vi_dataset.sort_values(by=["Province", "Station_Name", "Date"], inplace=True)
vi_dataset.reset_index(inplace=True, drop=True)
vi_dataset.drop_duplicates(inplace=True)

vi_dataset = vi_dataset[["Station_ID", "Date", "NDVI_MOD13A3", "NDVI_MYD13A3", "EVI_MOD13A3", "EVI_MYD13A3"]]

vi_dataset['NDVI'] = vi_dataset[['NDVI_MOD13A3', 'NDVI_MYD13A3']].mean(axis=1)
vi_dataset['EVI'] = vi_dataset[['EVI_MOD13A3', 'EVI_MYD13A3']].mean(axis=1)

vi_dataset['Month'] = vi_dataset['Date'].apply(lambda x: x.split('-')[1])

monthly_stats = vi_dataset.groupby(['Station_ID', 'Month']).agg(
    NDVImin=('NDVI', 'min'),
    NDVImax=('NDVI', 'max'),
).reset_index()


vi_dataset = vi_dataset.merge(monthly_stats, on=['Station_ID', 'Month'])

vi_dataset['VCI'] = ((vi_dataset['NDVI'] - vi_dataset['NDVImin']) / (vi_dataset['NDVImax'] - vi_dataset['NDVImin']))

vi_dataset.drop(columns=['Month', 'NDVImin', 'NDVImax'], inplace=True)

vi_dataset = vi_dataset.round(2)

vi_dataset

Unnamed: 0,Station_ID,Date,NDVI_MOD13A3,NDVI_MYD13A3,EVI_MOD13A3,EVI_MYD13A3,NDVI,EVI,VCI
0,40709,2000-02,0.44,,0.20,,0.44,0.20,0.75
1,40709,2000-03,0.38,,0.17,,0.38,0.17,0.23
2,40709,2000-04,0.44,,0.21,,0.44,0.21,0.41
3,40709,2000-05,0.44,,0.20,,0.44,0.20,0.08
4,40709,2000-06,0.67,,0.38,,0.67,0.38,0.98
...,...,...,...,...,...,...,...,...,...
7445,40735,2024-07,0.54,0.63,0.31,0.42,0.58,0.37,1.00
7446,40735,2024-08,0.51,0.57,0.27,0.31,0.54,0.29,1.00
7447,40735,2024-09,0.44,0.53,0.21,0.31,0.49,0.26,1.00
7448,40735,2024-10,0.53,0.48,0.24,0.29,0.51,0.26,1.00


### LST

In [28]:
# Find *.csv files in the directory with NDVI
list_fles = [x for x in os.listdir(DATA_PATH) if x.endswith(".csv") and ("LSTNight" in x or "LSTDay" in x)]
lst_dataset = pd.DataFrame()
for lf in list_fles:
    data = pd.read_csv(filepath_or_buffer=DATA_PATH + lf, na_values=-999)
    data = data[["Province", "St_Name", "date", "mean"]]
    data.rename(
        columns={
            "Province": "Province",
            "St_Name": "Station_Name",
            "date": "Date",
            "mean": f"{lf.split('_')[2]}_{lf.split('_')[0]}"
        },
        inplace=True
    )
    if lst_dataset.empty:
        lst_dataset = data
    else:
        lst_dataset = lst_dataset.merge(data, on=["Province", "Station_Name", "Date"], how="outer")


def merge_columns(df, col_prefix):
    cols = [col for col in df.columns if col.startswith(col_prefix)]
    if len(cols) > 1:
        df[col_prefix] = df[cols].mean(axis=1, skipna=True)
        df.drop(columns=cols, inplace=True)
    elif len(cols) == 1:
        df.rename(columns={cols[0]: col_prefix}, inplace=True)


unique_prefixes = set(col.split('_')[0] + '_' + col.split('_')[1] for col in lst_dataset.columns if '_' in col)
for prefix in unique_prefixes:
    merge_columns(lst_dataset, prefix)

# Select the columns of interest
gi = geoinfo[["Station_ID", "Station_Name", "Province"]]

lst_dataset = gi.merge(lst_dataset, on=["Province", "Station_Name"], how="left")

lst_dataset.sort_values(by=["Province", "Station_Name", "Date"], inplace=True)
lst_dataset.reset_index(inplace=True, drop=True)
lst_dataset.drop_duplicates(inplace=True)

lst_dataset = lst_dataset[["Station_ID", "Date", "LSTDay_MOD21C3", "LSTDay_MYD21C3", "LSTNight_MOD21C3", "LSTNight_MYD21C3"]]

lst_dataset['LSTDay'] = lst_dataset[['LSTDay_MOD21C3', 'LSTDay_MYD21C3']].mean(axis=1)
lst_dataset['LSTNight'] = lst_dataset[['LSTNight_MOD21C3', 'LSTNight_MYD21C3']].mean(axis=1)
lst_dataset['LST'] = lst_dataset[['LSTDay', 'LSTNight']].mean(axis=1)


lst_dataset['Month'] = lst_dataset['Date'].apply(lambda x: x.split('-')[1])

monthly_stats = lst_dataset.groupby(['Station_ID', 'Month']).agg(
    LSTDaymin=('LSTDay', 'min'),
    LSTDaymax=('LSTDay', 'max'),
    LSTNightmin=('LSTNight', 'min'),
    LSTNightmax=('LSTNight', 'max'),
    LSTmin=('LST', 'min'),
    LSTmax=('LST', 'max'),
).reset_index()

lst_dataset = lst_dataset.merge(monthly_stats, on=['Station_ID', 'Month'])

lst_dataset['TCIDay'] = ((lst_dataset['LSTDaymax'] - lst_dataset['LSTDay']) / (lst_dataset['LSTDaymax'] - lst_dataset['LSTDaymin']))
lst_dataset['TCINight'] = ((lst_dataset['LSTNightmax'] - lst_dataset['LSTNight']) / (lst_dataset['LSTNightmax'] - lst_dataset['LSTNightmin']))
lst_dataset['TCI'] = ((lst_dataset['LSTmax'] - lst_dataset['LST']) / (lst_dataset['LSTmax'] - lst_dataset['LSTmin']))

lst_dataset.drop(columns=['Month', 'LSTDaymin', 'LSTDaymax', 'LSTNightmin', 'LSTNightmax', 'LSTmin', 'LSTmax'], inplace=True)

lst_dataset = lst_dataset.round(2)


lst_dataset

Unnamed: 0,Station_ID,Date,LSTDay_MOD21C3,LSTDay_MYD21C3,LSTNight_MOD21C3,LSTNight_MYD21C3,LSTDay,LSTNight,LST,TCIDay,TCINight,TCI
0,40709,2000-02,14.81,,,,14.81,,14.81,0.00,,0.00
1,40709,2000-03,16.83,,8.27,,16.83,8.27,12.55,0.38,0.47,0.36
2,40709,2000-04,24.07,,12.57,,24.07,12.57,18.32,0.00,0.18,0.04
3,40709,2000-05,28.87,,18.57,,28.87,18.57,23.72,0.00,0.33,0.00
4,40709,2000-06,31.21,,20.45,,31.21,20.45,25.83,0.09,0.76,0.45
...,...,...,...,...,...,...,...,...,...,...,...,...
7445,40735,2024-07,23.57,35.83,12.31,11.89,29.70,12.10,20.90,1.00,0.70,0.92
7446,40735,2024-08,26.19,38.95,12.79,13.17,32.57,12.98,22.78,0.92,0.10,0.68
7447,40735,2024-09,27.23,36.13,10.55,12.29,31.68,11.42,21.55,0.56,0.00,0.38
7448,40735,2024-10,17.93,21.03,4.43,6.65,19.48,5.54,12.51,1.00,0.33,0.78


### VHI

In [29]:
alpha = 0.5

vi_dataset = vi_dataset.merge(
    right=vi_dataset[["Station_ID", "Date", "VCI"]]\
        .merge(lst_dataset[["Station_ID", "Date", "TCIDay", "TCINight", "TCI"]], on=["Station_ID", "Date"], how="outer")\
        .assign(VHIDay=lambda x: alpha * x['VCI'] + (1 - alpha) * x['TCIDay'])\
        .assign(VHINight=lambda x: alpha * x['VCI'] + (1 - alpha) * x['TCINight'])\
        .assign(VHI=lambda x: alpha * x['VCI'] + (1 - alpha) * x['TCI'])\
        .round(2)\
        .filter(items=["Station_ID", "Date", "VHIDay", "VHINight", "VHI"]),
    on=["Station_ID", "Date"],
)

vi_dataset

Unnamed: 0,Station_ID,Date,NDVI_MOD13A3,NDVI_MYD13A3,EVI_MOD13A3,EVI_MYD13A3,NDVI,EVI,VCI,VHIDay,VHINight,VHI
0,40709,2000-02,0.44,,0.20,,0.44,0.20,0.75,0.38,,0.38
1,40709,2000-03,0.38,,0.17,,0.38,0.17,0.23,0.30,0.35,0.30
2,40709,2000-04,0.44,,0.21,,0.44,0.21,0.41,0.20,0.30,0.22
3,40709,2000-05,0.44,,0.20,,0.44,0.20,0.08,0.04,0.20,0.04
4,40709,2000-06,0.67,,0.38,,0.67,0.38,0.98,0.54,0.87,0.72
...,...,...,...,...,...,...,...,...,...,...,...,...
7445,40735,2024-07,0.54,0.63,0.31,0.42,0.58,0.37,1.00,1.00,0.85,0.96
7446,40735,2024-08,0.51,0.57,0.27,0.31,0.54,0.29,1.00,0.96,0.55,0.84
7447,40735,2024-09,0.44,0.53,0.21,0.31,0.49,0.26,1.00,0.78,0.50,0.69
7448,40735,2024-10,0.53,0.48,0.24,0.29,0.51,0.26,1.00,1.00,0.66,0.89


In [30]:
conn = sqlite3.connect(DATABASE_PATH)

vi_dataset.to_sql('di_monthly', conn, if_exists='replace', index=False)
conn.commit()

lst_dataset.to_sql('lst_monthly', conn, if_exists='replace', index=False)
conn.commit()

conn.close()