In [1]:
import os
import sys
import sqlite3
import numpy as np
import pandas as pd
import geopandas as gp
import plotly.express as px
import matplotlib.pyplot as plt
import scipy.stats as scs

import pyet
import pyeto
import spei

In [2]:
sys.path.append('/home/pooya/w/DroughtMonitoringIran/')

DATA_PATH = "./assets/data/GEE_CSV_Exports/"
DATABASE_PATH = "./database/database.db"

In [3]:
conn = sqlite3.connect(DATABASE_PATH)

monthly_data = pd.read_sql(sql='SELECT * FROM ground_data_monthly', con=conn)
monthly_data['Date'] = pd.to_datetime(monthly_data['Date'])

monthly_PET = pd.read_sql(sql='SELECT * FROM pet_monthly', con=conn)
monthly_PET['Date'] = pd.to_datetime(monthly_PET['Date'])

geoinfo = pd.read_sql(sql='SELECT * FROM ground_data_geoinfo', con=conn)

conn.close()

In [4]:
monthly_data

Unnamed: 0,Station_ID,Date,Temp_Max,Temp_Min,Temp_Mean,Wind_Speed,Pressure,Humidity_Max,Humidity_Min,Humidity,Sunshine,Precip
0,40709,2006-01-31,6.8,0.9,3.8,0.9,1026.7,96.6,70.3,85.1,2.3,134.46
1,40709,2006-02-28,9.2,3.2,6.2,1.1,1020.6,96.6,78.6,88.8,2.9,37.58
2,40709,2006-03-31,13.2,6.8,10.0,1.7,1019.1,95.6,70.6,84.4,3.2,85.78
3,40709,2006-04-30,16.5,10.9,13.7,0.8,1017.7,96.4,73.8,86.9,3.1,99.07
4,40709,2006-05-31,21.0,14.7,17.9,1.9,1018.8,93.1,67.7,81.3,6.0,39.90
...,...,...,...,...,...,...,...,...,...,...,...,...
5570,99361,2024-03-31,12.1,1.8,6.9,2.0,819.8,80.0,41.8,59.0,6.4,13.60
5571,99361,2024-04-30,15.7,6.2,10.9,2.1,822.6,83.8,48.4,63.0,7.7,48.61
5572,99361,2024-05-31,18.0,9.3,13.6,1.9,821.0,87.9,61.4,75.2,5.3,49.11
5573,99361,2024-06-30,23.2,13.8,18.5,2.0,822.2,86.9,55.9,69.9,7.4,158.21


In [5]:
monthly_PET

Unnamed: 0,Station_ID,Date,PET_Hargreaves
0,40709,2006-01-31,23.76
1,40709,2006-02-28,30.78
2,40709,2006-03-31,55.38
3,40709,2006-04-30,70.81
4,40709,2006-05-31,103.12
...,...,...,...
5570,99361,2024-03-31,67.21
5571,99361,2024-04-30,88.16
5572,99361,2024-05-31,107.19
5573,99361,2024-06-30,128.74


In [6]:
geoinfo

Unnamed: 0,Station_ID,Station_Name,Province,Station_Latitude,Station_Longitude,Station_Elevation
0,40759,Sari,Mazandaran,36.536,52.998,23.0
1,99306,Bandar-e-amirabad,Mazandaran,36.856,53.386,-20.0
2,99357,Baladeh,Mazandaran,36.198,51.801,2120.0
3,99299,Galugah,Mazandaran,36.738,53.837,-10.0
4,40737,Gharakhil,Mazandaran,36.487,52.108,14.7
5,40760,Kiyasar,Mazandaran,36.248,53.546,1294.3
6,99361,Alasht,Mazandaran,36.071,52.843,1805.0
7,99309,Amol,Mazandaran,36.479,52.468,23.7
8,99348,Kojur,Mazandaran,36.39,51.729,1550.0
9,99360,Polsefid,Mazandaran,36.104,53.062,610.0


In [7]:
geoinfo

Unnamed: 0,Station_ID,Station_Name,Province,Station_Latitude,Station_Longitude,Station_Elevation
0,40759,Sari,Mazandaran,36.536,52.998,23.0
1,99306,Bandar-e-amirabad,Mazandaran,36.856,53.386,-20.0
2,99357,Baladeh,Mazandaran,36.198,51.801,2120.0
3,99299,Galugah,Mazandaran,36.738,53.837,-10.0
4,40737,Gharakhil,Mazandaran,36.487,52.108,14.7
5,40760,Kiyasar,Mazandaran,36.248,53.546,1294.3
6,99361,Alasht,Mazandaran,36.071,52.843,1805.0
7,99309,Amol,Mazandaran,36.479,52.468,23.7
8,99348,Kojur,Mazandaran,36.39,51.729,1550.0
9,99360,Polsefid,Mazandaran,36.104,53.062,610.0


### Precipitation Data

In [8]:
precip_dataset = pd.DataFrame()

### ERA5

In [9]:
# Read Data
file_name = "ERA5_Monthly_Precipitation.csv"
era5_data = pd.read_csv(filepath_or_buffer=DATA_PATH + file_name)

# Rename ERA5 Dataset Columns
era5_data = era5_data[["Province", "St_Name", "date", "precipitation_mm"]]
era5_data.rename(
    columns={
        "Province": "Province",
        "St_Name": "Station_Name",
        "date": "Date",
        "precipitation_mm": "ERA5_Precip"
    },
    inplace=True
)

# Select the columns of interest
gi = geoinfo[["Station_ID", "Station_Name", "Province"]]

# Merge ERA5 Data with Geoinfo
precip_dataset = gi.merge(era5_data, on=["Province", "Station_Name"], how="left")

precip_dataset.sort_values(by=["Province", "Station_Name", "Date"], inplace=True)

precip_dataset

Unnamed: 0,Station_ID,Station_Name,Province,Date,ERA5_Precip
6578,40709,Astara,Gilan,2000-01,211.450629
6579,40709,Astara,Gilan,2000-02,108.227941
6580,40709,Astara,Gilan,2000-03,89.712488
6581,40709,Astara,Gilan,2000-04,66.366945
6582,40709,Astara,Gilan,2000-05,72.468541
...,...,...,...,...,...
3284,40735,Siahbisheh,Mazandaran,2024-07,131.660313
3285,40735,Siahbisheh,Mazandaran,2024-08,75.881136
3286,40735,Siahbisheh,Mazandaran,2024-09,114.718222
3287,40735,Siahbisheh,Mazandaran,2024-10,79.880182


### GPM

In [10]:
# Read Data
file_name = "GPM_Monthly_Precipitation.csv"
gpm_data = pd.read_csv(filepath_or_buffer=DATA_PATH + file_name)

# Rename GPM Dataset Columns
gpm_data = gpm_data[["Province", "St_Name", "date", "mean"]]
gpm_data.rename(
    columns={
        "Province": "Province",
        "St_Name": "Station_Name",
        "date": "Date",
        "mean": "GPM_Precip"
    },
    inplace=True
)

# merge GPM Data with Precipitation Dataset
precip_dataset = precip_dataset.merge(gpm_data, on=["Province", "Station_Name", "Date"], how="outer")
precip_dataset.sort_values(by=["Province", "Station_Name", "Date"], inplace=True)


precip_dataset

Unnamed: 0,Station_ID,Station_Name,Province,Date,ERA5_Precip,GPM_Precip
0,40709,Astara,Gilan,2000-01,211.450629,131.688001
1,40709,Astara,Gilan,2000-02,108.227941,77.256006
2,40709,Astara,Gilan,2000-03,89.712488,150.288005
3,40709,Astara,Gilan,2000-04,66.366945,20.880001
4,40709,Astara,Gilan,2000-05,72.468541,56.544004
...,...,...,...,...,...,...
7470,40735,Siahbisheh,Mazandaran,2024-07,131.660313,
7471,40735,Siahbisheh,Mazandaran,2024-08,75.881136,
7472,40735,Siahbisheh,Mazandaran,2024-09,114.718222,
7473,40735,Siahbisheh,Mazandaran,2024-10,79.880182,


### TRMM

In [11]:
# Read Data
file_name = "TRMM_Monthly_Precipitation.csv"
trmm_data = pd.read_csv(filepath_or_buffer=DATA_PATH + file_name)

# Rename GPM Dataset Columns
trmm_data = trmm_data[["Province", "St_Name", "date", "mean"]]
trmm_data.rename(
    columns={
        "Province": "Province",
        "St_Name": "Station_Name",
        "date": "Date",
        "mean": "TRMM_Precip"
    },
    inplace=True
)

# merge GPM Data with Precipitation Dataset
precip_dataset = precip_dataset.merge(trmm_data, on=["Province", "Station_Name", "Date"], how="outer")
precip_dataset.sort_values(by=["Province", "Station_Name", "Date"], inplace=True)


precip_dataset

Unnamed: 0,Station_ID,Station_Name,Province,Date,ERA5_Precip,GPM_Precip,TRMM_Precip
0,40709,Astara,Gilan,2000-01,211.450629,131.688001,128.091774
1,40709,Astara,Gilan,2000-02,108.227941,77.256006,62.963703
2,40709,Astara,Gilan,2000-03,89.712488,150.288005,141.844743
3,40709,Astara,Gilan,2000-04,66.366945,20.880001,18.764659
4,40709,Astara,Gilan,2000-05,72.468541,56.544004,51.030001
...,...,...,...,...,...,...,...
7470,40735,Siahbisheh,Mazandaran,2024-07,131.660313,,
7471,40735,Siahbisheh,Mazandaran,2024-08,75.881136,,
7472,40735,Siahbisheh,Mazandaran,2024-09,114.718222,,
7473,40735,Siahbisheh,Mazandaran,2024-10,79.880182,,


### TerraClimate

In [12]:
# Read Data
file_name = "TerraClimate_Monthly_Precipitation.csv"
tc_data = pd.read_csv(filepath_or_buffer=DATA_PATH + file_name)

# Rename GPM Dataset Columns
tc_data = tc_data[["Province", "St_Name", "date", "mean"]]
tc_data.rename(
    columns={
        "Province": "Province",
        "St_Name": "Station_Name",
        "date": "Date",
        "mean": "TerraClimate_Precip"
    },
    inplace=True
)

# merge GPM Data with Precipitation Dataset
precip_dataset = precip_dataset.merge(tc_data, on=["Province", "Station_Name", "Date"], how="outer")
precip_dataset.sort_values(by=["Province", "Station_Name", "Date"], inplace=True)


precip_dataset

Unnamed: 0,Station_ID,Station_Name,Province,Date,ERA5_Precip,GPM_Precip,TRMM_Precip,TerraClimate_Precip
0,40709,Astara,Gilan,2000-01,211.450629,131.688001,128.091774,77.0
1,40709,Astara,Gilan,2000-02,108.227941,77.256006,62.963703,71.0
2,40709,Astara,Gilan,2000-03,89.712488,150.288005,141.844743,117.0
3,40709,Astara,Gilan,2000-04,66.366945,20.880001,18.764659,18.0
4,40709,Astara,Gilan,2000-05,72.468541,56.544004,51.030001,18.0
...,...,...,...,...,...,...,...,...
7470,40735,Siahbisheh,Mazandaran,2024-07,131.660313,,,
7471,40735,Siahbisheh,Mazandaran,2024-08,75.881136,,,
7472,40735,Siahbisheh,Mazandaran,2024-09,114.718222,,,
7473,40735,Siahbisheh,Mazandaran,2024-10,79.880182,,,


### PERSIANNCDR

In [13]:
# Read Data
file_name = "PERSIANNCDR_Daily_Precipitation.csv"
pcdr_data = pd.read_csv(filepath_or_buffer=DATA_PATH + file_name)

# Rename GPM Dataset Columns
pcdr_data = pcdr_data[["Province", "St_Name", "date", "mean"]]
pcdr_data.rename(
    columns={
        "Province": "Province",
        "St_Name": "Station_Name",
        "date": "Date",
        "mean": "PERSIANNCDR_Precip"
    },
    inplace=True
)

# Daily to Monthly
pcdr_data["Date"] = pd.to_datetime(pcdr_data["Date"])
pcdr_data["Year"] = pcdr_data["Date"].dt.year
pcdr_data["Month"] = pcdr_data["Date"].dt.month
pcdr_data["Date"] = pcdr_data["Date"].dt.to_period("M").astype(str)
pcdr_data = pcdr_data.groupby(["Province", "Station_Name", "Date"])["PERSIANNCDR_Precip"].sum(min_count=15).reset_index()

# merge PERSIANNCDR Data with Precipitation Dataset
precip_dataset = precip_dataset.merge(pcdr_data, on=["Province", "Station_Name", "Date"], how="outer")
precip_dataset.sort_values(by=["Province", "Station_Name", "Date"], inplace=True)


precip_dataset

Unnamed: 0,Station_ID,Station_Name,Province,Date,ERA5_Precip,GPM_Precip,TRMM_Precip,TerraClimate_Precip,PERSIANNCDR_Precip
0,40709,Astara,Gilan,2000-01,211.450629,131.688001,128.091774,77.0,96.443996
1,40709,Astara,Gilan,2000-02,108.227941,77.256006,62.963703,71.0,46.755023
2,40709,Astara,Gilan,2000-03,89.712488,150.288005,141.844743,117.0,99.802746
3,40709,Astara,Gilan,2000-04,66.366945,20.880001,18.764659,18.0,24.317473
4,40709,Astara,Gilan,2000-05,72.468541,56.544004,51.030001,18.0,40.819520
...,...,...,...,...,...,...,...,...,...
7470,40735,Siahbisheh,Mazandaran,2024-07,131.660313,,,,
7471,40735,Siahbisheh,Mazandaran,2024-08,75.881136,,,,
7472,40735,Siahbisheh,Mazandaran,2024-09,114.718222,,,,
7473,40735,Siahbisheh,Mazandaran,2024-10,79.880182,,,,


### CHIRPS

In [14]:
# Read Data
file_name = "CHIRPS_Daily_Precipitation.csv"
chirps_data = pd.read_csv(filepath_or_buffer=DATA_PATH + file_name)

# Rename GPM Dataset Columns
chirps_data = chirps_data[["Province", "St_Name", "date", "mean"]]
chirps_data.rename(
    columns={
        "Province": "Province",
        "St_Name": "Station_Name",
        "date": "Date",
        "mean": "CHIRPS_Precip"
    },
    inplace=True
)
# Daily to Monthly
chirps_data["Date"] = pd.to_datetime(chirps_data["Date"])
chirps_data["Year"] = chirps_data["Date"].dt.year
chirps_data["Month"] = chirps_data["Date"].dt.month
chirps_data["Date"] = chirps_data["Date"].dt.to_period("M").astype(str)
chirps_data = chirps_data.groupby(["Province", "Station_Name", "Date"])["CHIRPS_Precip"].sum(min_count=15).reset_index()

# merge PERSIANNCDR Data with Precipitation Dataset
precip_dataset = precip_dataset.merge(chirps_data, on=["Province", "Station_Name", "Date"], how="outer")
precip_dataset.sort_values(by=["Province", "Station_Name", "Date"], inplace=True)


precip_dataset

Unnamed: 0,Station_ID,Station_Name,Province,Date,ERA5_Precip,GPM_Precip,TRMM_Precip,TerraClimate_Precip,PERSIANNCDR_Precip,CHIRPS_Precip
0,40709,Astara,Gilan,2000-01,211.450629,131.688001,128.091774,77.0,96.443996,
1,40709,Astara,Gilan,2000-02,108.227941,77.256006,62.963703,71.0,46.755023,
2,40709,Astara,Gilan,2000-03,89.712488,150.288005,141.844743,117.0,99.802746,
3,40709,Astara,Gilan,2000-04,66.366945,20.880001,18.764659,18.0,24.317473,
4,40709,Astara,Gilan,2000-05,72.468541,56.544004,51.030001,18.0,40.819520,
...,...,...,...,...,...,...,...,...,...,...
7470,40735,Siahbisheh,Mazandaran,2024-07,131.660313,,,,,32.320484
7471,40735,Siahbisheh,Mazandaran,2024-08,75.881136,,,,,56.618558
7472,40735,Siahbisheh,Mazandaran,2024-09,114.718222,,,,,174.688978
7473,40735,Siahbisheh,Mazandaran,2024-10,79.880182,,,,,76.052103


In [15]:
precip_dataset.drop(columns=["Station_Name", "Province"], inplace=True)
precip_dataset

Unnamed: 0,Station_ID,Date,ERA5_Precip,GPM_Precip,TRMM_Precip,TerraClimate_Precip,PERSIANNCDR_Precip,CHIRPS_Precip
0,40709,2000-01,211.450629,131.688001,128.091774,77.0,96.443996,
1,40709,2000-02,108.227941,77.256006,62.963703,71.0,46.755023,
2,40709,2000-03,89.712488,150.288005,141.844743,117.0,99.802746,
3,40709,2000-04,66.366945,20.880001,18.764659,18.0,24.317473,
4,40709,2000-05,72.468541,56.544004,51.030001,18.0,40.819520,
...,...,...,...,...,...,...,...,...
7470,40735,2024-07,131.660313,,,,,32.320484
7471,40735,2024-08,75.881136,,,,,56.618558
7472,40735,2024-09,114.718222,,,,,174.688978
7473,40735,2024-10,79.880182,,,,,76.052103


In [16]:
precip_dataset['Month'] = precip_dataset['Date'].apply(lambda x: x.split('-')[1])

monthly_stats = precip_dataset.groupby(['Station_ID', 'Month']).agg(
    ERA5min=('ERA5_Precip', 'min'),
    ERA5max=('ERA5_Precip', 'max'),
    
    GPMmin=('GPM_Precip', 'min'),
    GPMmax=('GPM_Precip', 'max'),
    
    TRMMmin=('TRMM_Precip', 'min'),
    TRMMmax=('TRMM_Precip', 'max'),
    
    TerraClimatemin=('TerraClimate_Precip', 'min'),
    TerraClimatemax=('TerraClimate_Precip', 'max'),
    
    PERSIANNCDRmin=('PERSIANNCDR_Precip', 'min'),
    PERSIANNCDRmax=('PERSIANNCDR_Precip', 'max'),
    
    CHIRPSmin=('CHIRPS_Precip', 'min'),
    CHIRPSmax=('CHIRPS_Precip', 'max'),
).reset_index()


precip_dataset = precip_dataset.merge(monthly_stats, on=['Station_ID', 'Month'])

In [17]:
precip_dataset['PCI_ERA5'] = ((precip_dataset['ERA5_Precip'] - precip_dataset['ERA5min']) / (precip_dataset['ERA5max'] - precip_dataset['ERA5min']))
precip_dataset['PCI_GPM'] = ((precip_dataset['GPM_Precip'] - precip_dataset['GPMmin']) / (precip_dataset['GPMmax'] - precip_dataset['GPMmin']))
precip_dataset['PCI_TRMM'] = ((precip_dataset['TRMM_Precip'] - precip_dataset['TRMMmin']) / (precip_dataset['TRMMmax'] - precip_dataset['TRMMmin']))
precip_dataset['PCI_TerraClimate'] = ((precip_dataset['TerraClimate_Precip'] - precip_dataset['TerraClimatemin']) / (precip_dataset['TerraClimatemax'] - precip_dataset['TerraClimatemin']))
precip_dataset['PCI_PERSIANNCDR'] = ((precip_dataset['PERSIANNCDR_Precip'] - precip_dataset['PERSIANNCDRmin']) / (precip_dataset['PERSIANNCDRmax'] - precip_dataset['PERSIANNCDRmin']))
precip_dataset['PCI_CHIRPS'] = ((precip_dataset['CHIRPS_Precip'] - precip_dataset['CHIRPSmin']) / (precip_dataset['CHIRPSmax'] - precip_dataset['CHIRPSmin']))

precip_dataset.drop(columns=['Month',  'ERA5min', 'ERA5max', 'GPMmin', 'GPMmax', 'TRMMmin', 'TRMMmax', 'TerraClimatemin', 'TerraClimatemax', 'PERSIANNCDRmin', 'PERSIANNCDRmax', 'CHIRPSmin', 'CHIRPSmax'], inplace=True)

precip_dataset

Unnamed: 0,Station_ID,Date,ERA5_Precip,GPM_Precip,TRMM_Precip,TerraClimate_Precip,PERSIANNCDR_Precip,CHIRPS_Precip,PCI_ERA5,PCI_GPM,PCI_TRMM,PCI_TerraClimate,PCI_PERSIANNCDR,PCI_CHIRPS
0,40709,2000-01,211.450629,131.688001,128.091774,77.0,96.443996,,0.832878,0.633929,0.845284,0.620690,0.708913,
1,40709,2000-02,108.227941,77.256006,62.963703,71.0,46.755023,,0.429242,0.366071,0.358505,0.484848,0.214789,
2,40709,2000-03,89.712488,150.288005,141.844743,117.0,99.802746,,0.303417,0.813084,0.957617,0.803279,0.848183,
3,40709,2000-04,66.366945,20.880001,18.764659,18.0,24.317473,,0.256723,0.037433,0.107766,0.068376,0.111865,
4,40709,2000-05,72.468541,56.544004,51.030001,18.0,40.819520,,0.188416,0.163265,0.355328,0.000000,0.113478,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7470,40735,2024-07,131.660313,,,,,32.320484,0.745702,,,,,0.381045
7471,40735,2024-08,75.881136,,,,,56.618558,0.302063,,,,,0.572843
7472,40735,2024-09,114.718222,,,,,174.688978,0.732031,,,,,1.000000
7473,40735,2024-10,79.880182,,,,,76.052103,0.244395,,,,,0.056061


In [18]:
conn = sqlite3.connect(DATABASE_PATH)

precip_dataset.to_sql('precip_monthly', conn, if_exists='replace', index=False)

conn.commit()
conn.close()