In [1]:
import os
import sys
import sqlite3
import numpy as np
import pandas as pd
import geopandas as gp
import plotly.express as px
import matplotlib.pyplot as plt
import scipy.stats as scs

import pyet
import pyeto
import spei

In [2]:
sys.path.append('/home/pooya/w/DroughtMonitoringIran/')

DATABASE_PATH = "./database/database.db"

In [3]:
conn = sqlite3.connect(DATABASE_PATH)

precip_monthly = pd.read_sql(sql='SELECT * FROM gee_precip_monthly', con=conn)
precip_monthly['Date'] = pd.to_datetime(precip_monthly['Date'])

indices_monthly = pd.read_sql(sql='SELECT * FROM gee_indices_monthly', con=conn)
indices_monthly['Date'] = pd.to_datetime(indices_monthly['Date'])

conn.close()

In [4]:
precip_monthly

Unnamed: 0,Station_ID,Date,ERA5_Precipitation,GPM_Precipitation,TRMM_Precipitation,TERRACLIMATE_Precipitation,PERSIANNCDR_Precipitation
0,40709,2006-09-30,130.801336,115.920010,54.357873,117.0,39.081124
1,40709,2006-10-31,280.789058,164.424012,120.467601,212.0,84.028451
2,40709,2006-11-30,245.921390,101.520002,67.297440,122.0,78.084248
3,40709,2006-12-31,183.809788,77.376002,74.464657,110.0,76.726466
4,40709,2007-01-31,64.469668,26.040000,5.355088,26.0,21.264424
...,...,...,...,...,...,...,...
5131,99361,2024-02-29,100.628399,82.824002,,,49.011868
5132,99361,2024-03-31,80.836829,16.368001,,,24.582743
5133,99361,2024-04-30,112.907503,36.000001,,,
5134,99361,2024-05-31,191.929751,52.080000,,,


In [5]:
indices_monthly

Unnamed: 0,Station_ID,Date,NDVI_MOD13A3,LSTDay_MOD21C3,LSTNight_MYD21C3,EVI_MYD13A3,LSTNight_MOD21C3,NDVI_MYD13A3,LSTDay_MYD21C3,EVI_MOD13A3,NDVI,EVI,LSTDay,LSTNight,LST
0,40709,2006-09-30,0.5450,29.37,18.67,0.2874,24.09,0.4742,25.87,0.3289,0.50960,0.30815,27.62,21.38,24.500
1,40709,2006-10-31,0.6019,25.09,16.37,0.3065,16.09,0.5955,22.93,0.3294,0.59870,0.31795,24.01,16.23,20.120
2,40709,2006-11-30,0.5557,18.23,9.15,0.2798,15.95,0.5524,16.37,0.2924,0.55405,0.28610,17.30,12.55,14.925
3,40709,2006-12-31,0.4903,10.49,2.97,0.2105,9.99,0.4332,9.81,0.2610,0.46175,0.23575,10.15,6.48,8.315
4,40709,2007-01-31,0.4191,11.37,2.73,0.2021,8.27,0.4171,8.31,0.1875,0.41810,0.19480,9.84,5.50,7.670
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5131,99361,2024-02-29,0.3920,0.11,-1.47,0.1344,2.21,0.2661,14.45,0.1730,0.32905,0.15370,7.28,0.37,3.825
5132,99361,2024-03-31,,5.63,-0.07,0.1551,3.73,0.3203,22.47,,0.32030,0.15510,14.05,1.83,7.940
5133,99361,2024-04-30,0.3901,21.71,2.63,0.5179,7.97,0.7398,26.89,0.2489,0.56495,0.38340,24.30,5.30,14.800
5134,99361,2024-05-31,0.5332,22.71,6.59,0.4374,13.01,0.6308,30.37,0.3797,0.58200,0.40855,26.54,9.80,18.170


### PCI

In [6]:
tmp_pci = precip_monthly.copy()
tmp_pci['Month'] = tmp_pci.Date.dt.month

tmp_pci_stats = tmp_pci\
    .groupby(['Station_ID', 'Month'])\
    .agg(
        ERA5min=('ERA5_Precipitation', 'min'),
        ERA5max=('ERA5_Precipitation', 'max'),
        
        GPMmin=('GPM_Precipitation', 'min'),
        GPMmax=('GPM_Precipitation', 'max'),
        
        TRMMmin=('TRMM_Precipitation', 'min'),
        TRMMmax=('TRMM_Precipitation', 'max'),
        
        TerraClimatemin=('TERRACLIMATE_Precipitation', 'min'),
        TerraClimatemax=('TERRACLIMATE_Precipitation', 'max'),
        
        PERSIANNCDRmin=('PERSIANNCDR_Precipitation', 'min'),
        PERSIANNCDRmax=('PERSIANNCDR_Precipitation', 'max'),
    ).reset_index()

tmp_pci = tmp_pci.merge(tmp_pci_stats, on=['Station_ID', 'Month'])

tmp_pci['PCI_ERA5'] = ((tmp_pci['ERA5_Precipitation'] - tmp_pci['ERA5min']) / (tmp_pci['ERA5max'] - tmp_pci['ERA5min']))
tmp_pci['PCI_GPM'] = ((tmp_pci['GPM_Precipitation'] - tmp_pci['GPMmin']) / (tmp_pci['GPMmax'] - tmp_pci['GPMmin']))
tmp_pci['PCI_TRMM'] = ((tmp_pci['TRMM_Precipitation'] - tmp_pci['TRMMmin']) / (tmp_pci['TRMMmax'] - tmp_pci['TRMMmin']))
tmp_pci['PCI_TerraClimate'] = ((tmp_pci['TERRACLIMATE_Precipitation'] - tmp_pci['TerraClimatemin']) / (tmp_pci['TerraClimatemax'] - tmp_pci['TerraClimatemin']))
tmp_pci['PCI_PERSIANNCDR'] = ((tmp_pci['PERSIANNCDR_Precipitation'] - tmp_pci['PERSIANNCDRmin']) / (tmp_pci['PERSIANNCDRmax'] - tmp_pci['PERSIANNCDRmin']))

tmp_pci.drop(columns=['ERA5_Precipitation', 'GPM_Precipitation', 'TRMM_Precipitation', 'TERRACLIMATE_Precipitation', 'PERSIANNCDR_Precipitation', 'Month',  'ERA5min', 'ERA5max', 'GPMmin', 'GPMmax', 'TRMMmin', 'TRMMmax', 'TerraClimatemin', 'TerraClimatemax', 'PERSIANNCDRmin', 'PERSIANNCDRmax'], inplace=True)

tmp_pci

Unnamed: 0,Station_ID,Date,PCI_ERA5,PCI_GPM,PCI_TRMM,PCI_TerraClimate,PCI_PERSIANNCDR
0,40709,2006-09-30,0.349535,0.455752,0.235196,0.411504,0.197079
1,40709,2006-10-31,0.626091,0.492447,0.339456,0.483483,0.438384
2,40709,2006-11-30,0.484601,0.411067,0.323744,0.454128,0.611135
3,40709,2006-12-31,0.689053,0.522013,0.561862,0.761194,0.841176
4,40709,2007-01-31,0.104413,0.000000,0.000000,0.034483,0.000000
...,...,...,...,...,...,...,...
5131,99361,2024-02-29,0.534895,0.743910,,,0.165655
5132,99361,2024-03-31,0.268925,0.000000,,,0.214003
5133,99361,2024-04-30,0.292934,0.350000,,,
5134,99361,2024-05-31,0.979409,0.847458,,,


### VCI

In [7]:
tmp_vci = indices_monthly.copy()[['Station_ID', 'Date', 'NDVI']]

tmp_vci['Month'] = tmp_vci.Date.dt.month

tmp_vci_stats = tmp_vci.groupby(['Station_ID', 'Month']).agg(
    NDVImin=('NDVI', 'min'),
    NDVImax=('NDVI', 'max'),
).reset_index()

tmp_vci = tmp_vci.merge(tmp_vci_stats, on=['Station_ID', 'Month'])

tmp_vci['VCI'] = ((tmp_vci['NDVI'] - tmp_vci['NDVImin']) / (tmp_vci['NDVImax'] - tmp_vci['NDVImin']))

tmp_vci.drop(columns=['Month', 'NDVI', 'NDVImin', 'NDVImax'], inplace=True)

tmp_vci

Unnamed: 0,Station_ID,Date,VCI
0,40709,2006-09-30,0.271298
1,40709,2006-10-31,0.832176
2,40709,2006-11-30,0.650847
3,40709,2006-12-31,0.460428
4,40709,2007-01-31,0.486276
...,...,...,...
5131,99361,2024-02-29,0.912153
5132,99361,2024-03-31,0.274732
5133,99361,2024-04-30,0.838552
5134,99361,2024-05-31,0.648361


### TCI

In [8]:
tmp_tci = indices_monthly.copy()[['Station_ID', 'Date', 'LST']]

tmp_tci['Month'] = tmp_tci.Date.dt.month

tmp_tci_stats = tmp_tci.groupby(['Station_ID', 'Month']).agg(
    LSTmin=('LST', 'min'),
    LSTmax=('LST', 'max'),
).reset_index()

tmp_tci = tmp_tci.merge(tmp_tci_stats, on=['Station_ID', 'Month'])

tmp_tci['TCI'] = ((tmp_tci['LSTmax'] - tmp_tci['LST']) / (tmp_tci['LSTmax'] - tmp_tci['LSTmin']))

tmp_tci.drop(columns=['LST', 'Month', 'LSTmin', 'LSTmax'], inplace=True)

tmp_tci


Unnamed: 0,Station_ID,Date,TCI
0,40709,2006-09-30,0.561668
1,40709,2006-10-31,0.392901
2,40709,2006-11-30,0.377212
3,40709,2006-12-31,0.915847
4,40709,2007-01-31,0.213087
...,...,...,...
5131,99361,2024-02-29,0.637190
5132,99361,2024-03-31,0.599759
5133,99361,2024-04-30,0.322581
5134,99361,2024-05-31,0.771205


### VHI

In [9]:
alpha = 0.5

tmp_vhi = tmp_vci.merge(
    right=tmp_tci,
    on=["Station_ID", "Date"],
).assign(
    VHI=lambda x: alpha * x['VCI'] + (1 - alpha) * x['TCI']
).filter(
    items=["Station_ID", "Date", "VHI"]
)

tmp_vhi

Unnamed: 0,Station_ID,Date,VHI
0,40709,2006-09-30,0.416483
1,40709,2006-10-31,0.612538
2,40709,2006-11-30,0.514030
3,40709,2006-12-31,0.688138
4,40709,2007-01-31,0.349681
...,...,...,...
5131,99361,2024-02-29,0.774672
5132,99361,2024-03-31,0.437246
5133,99361,2024-04-30,0.580566
5134,99361,2024-05-31,0.709783


### CI (Composite Index)

In [10]:
a = 1/3 # TCI
b = 1/3 # VCI
c = 1 - a - b # PCI

tmp_ci = tmp_vci.merge(
    right=tmp_tci,
    on=["Station_ID", "Date"],
).merge(
    right=tmp_pci,
    on=["Station_ID", "Date"],
).assign(
    CI_GPM=lambda x: a * x['TCI'] + b * x['VCI'] + c * x['PCI_GPM'],
).assign(
    CI_ERA5=lambda x: a * x['TCI'] + b * x['VCI'] + c * x['PCI_ERA5'],
).assign(
    CI_TRMM=lambda x: a * x['TCI'] + b * x['VCI'] + c * x['PCI_TRMM'],
).assign(
    CI_TerraClimate=lambda x: a * x['TCI'] + b * x['VCI'] + c * x['PCI_TerraClimate'],
).assign(
    CI_PERSIANNCDR=lambda x: a * x['TCI'] + b * x['VCI'] + c * x['PCI_PERSIANNCDR'],
).filter(
    items=["Station_ID", "Date", "CI_GPM", "CI_ERA5", "CI_TRMM", "CI_TerraClimate", "CI_PERSIANNCDR"]
)

tmp_ci

Unnamed: 0,Station_ID,Date,CI_GPM,CI_ERA5,CI_TRMM,CI_TerraClimate,CI_PERSIANNCDR
0,40709,2006-09-30,0.429573,0.394167,0.356054,0.414823,0.343348
1,40709,2006-10-31,0.572508,0.617056,0.521511,0.569520,0.554487
2,40709,2006-11-30,0.479709,0.504220,0.450601,0.494063,0.546398
3,40709,2006-12-31,0.632763,0.688443,0.646046,0.712490,0.739150
4,40709,2007-01-31,0.233121,0.267925,0.233121,0.244615,0.233121
...,...,...,...,...,...,...,...
5131,99361,2024-02-29,0.764418,0.694746,,,0.571666
5132,99361,2024-03-31,0.291497,0.381139,,,0.362832
5133,99361,2024-04-30,0.503711,0.484689,,,
5134,99361,2024-05-31,0.755675,0.799658,,,


### Combine

In [11]:
dataset = indices_monthly.merge(
    right=tmp_pci,
    on=["Station_ID", "Date"]    
).merge(
    right=tmp_vci,
    on=["Station_ID", "Date"]    
).merge(
    right=tmp_tci,
    on=["Station_ID", "Date"]    
).merge(
    right=tmp_vhi,
    on=["Station_ID", "Date"]    
).merge(
    right=tmp_ci,
    on=["Station_ID", "Date"]    
)


dataset

Unnamed: 0,Station_ID,Date,NDVI_MOD13A3,LSTDay_MOD21C3,LSTNight_MYD21C3,EVI_MYD13A3,LSTNight_MOD21C3,NDVI_MYD13A3,LSTDay_MYD21C3,EVI_MOD13A3,...,PCI_TerraClimate,PCI_PERSIANNCDR,VCI,TCI,VHI,CI_GPM,CI_ERA5,CI_TRMM,CI_TerraClimate,CI_PERSIANNCDR
0,40709,2006-09-30,0.5450,29.37,18.67,0.2874,24.09,0.4742,25.87,0.3289,...,0.411504,0.197079,0.271298,0.561668,0.416483,0.429573,0.394167,0.356054,0.414823,0.343348
1,40709,2006-10-31,0.6019,25.09,16.37,0.3065,16.09,0.5955,22.93,0.3294,...,0.483483,0.438384,0.832176,0.392901,0.612538,0.572508,0.617056,0.521511,0.569520,0.554487
2,40709,2006-11-30,0.5557,18.23,9.15,0.2798,15.95,0.5524,16.37,0.2924,...,0.454128,0.611135,0.650847,0.377212,0.514030,0.479709,0.504220,0.450601,0.494063,0.546398
3,40709,2006-12-31,0.4903,10.49,2.97,0.2105,9.99,0.4332,9.81,0.2610,...,0.761194,0.841176,0.460428,0.915847,0.688138,0.632763,0.688443,0.646046,0.712490,0.739150
4,40709,2007-01-31,0.4191,11.37,2.73,0.2021,8.27,0.4171,8.31,0.1875,...,0.034483,0.000000,0.486276,0.213087,0.349681,0.233121,0.267925,0.233121,0.244615,0.233121
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5131,99361,2024-02-29,0.3920,0.11,-1.47,0.1344,2.21,0.2661,14.45,0.1730,...,,0.165655,0.912153,0.637190,0.774672,0.764418,0.694746,,,0.571666
5132,99361,2024-03-31,,5.63,-0.07,0.1551,3.73,0.3203,22.47,,...,,0.214003,0.274732,0.599759,0.437246,0.291497,0.381139,,,0.362832
5133,99361,2024-04-30,0.3901,21.71,2.63,0.5179,7.97,0.7398,26.89,0.2489,...,,,0.838552,0.322581,0.580566,0.503711,0.484689,,,
5134,99361,2024-05-31,0.5332,22.71,6.59,0.4374,13.01,0.6308,30.37,0.3797,...,,,0.648361,0.771205,0.709783,0.755675,0.799658,,,


In [12]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5136 entries, 0 to 5135
Data columns (total 28 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   Station_ID        5136 non-null   int64         
 1   Date              5136 non-null   datetime64[ns]
 2   NDVI_MOD13A3      5112 non-null   float64       
 3   LSTDay_MOD21C3    5135 non-null   float64       
 4   LSTNight_MYD21C3  5132 non-null   float64       
 5   EVI_MYD13A3       5136 non-null   float64       
 6   LSTNight_MOD21C3  5136 non-null   float64       
 7   NDVI_MYD13A3      5136 non-null   float64       
 8   LSTDay_MYD21C3    5134 non-null   float64       
 9   EVI_MOD13A3       5112 non-null   float64       
 10  NDVI              5136 non-null   float64       
 11  EVI               5136 non-null   float64       
 12  LSTDay            5136 non-null   float64       
 13  LSTNight          5136 non-null   float64       
 14  LST               5136 n

In [13]:
conn = sqlite3.connect(DATABASE_PATH)

dataset.to_sql('gee_indices_monthly', conn, if_exists='replace', index=False)
conn.commit()

conn.close()