In [11]:
import os
import sys
import sqlite3
import numpy as np
import pandas as pd
import geopandas as gp
import plotly.express as px
import matplotlib.pyplot as plt
import scipy.stats as scs

import pyet
import pyeto
import spei

In [12]:
sys.path.append('/home/pooya/w/DroughtMonitoringIran/')

DATA_PATH = "./assets/data/GEE_CSV_Exports/"
DATABASE_PATH = "./database/database.db"

### Precipitation Data

In [13]:
precip_dataset = pd.DataFrame()

### ERA5

In [14]:
# Read Data
file_name = "ERA5_LAND_Monthly_total_precipitation_sum_2000_2024.csv"
era5_data = pd.read_csv(filepath_or_buffer=DATA_PATH + file_name, na_values=-999)

# Rename ERA5 Dataset Columns
era5_data = era5_data[["St_ID", "date", "mean"]]

era5_data.rename(
    columns={
        "St_ID": "Station_ID",
        "date": "Date",
        "mean": "ERA5_Precipitation",
    },
    inplace=True
)

# Convert Date to Datetime
era5_data["Date"] = pd.to_datetime(era5_data["Date"])

# Merge ERA5 Data with Geoinfo
precip_dataset = era5_data.copy()

# Sort Data
precip_dataset.sort_values(by=["Station_ID", "Date"], inplace=True)

precip_dataset.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7200 entries, 21 to 7182
Data columns (total 3 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   Station_ID          7200 non-null   int64         
 1   Date                7200 non-null   datetime64[ns]
 2   ERA5_Precipitation  7200 non-null   float64       
dtypes: datetime64[ns](1), float64(1), int64(1)
memory usage: 225.0 KB


### GPM

In [15]:
# Read Data
file_name = "GPM_L3_Monthly_precipitation_2000_2024.csv"
gpm_data = pd.read_csv(filepath_or_buffer=DATA_PATH + file_name, na_values=-999)

# Rename GPM Dataset Columns
gpm_data = gpm_data[["St_ID", "date", "mean"]]

gpm_data.rename(
    columns={
        "St_ID": "Station_ID",
        "date": "Date",
        "mean": "GPM_Precipitation",
    },
    inplace=True
)

# Convert Date to Datetime
gpm_data["Date"] = pd.to_datetime(gpm_data["Date"])

# Merge GPM Data with Dataset
precip_dataset = precip_dataset.merge(gpm_data, on=["Station_ID", "Date"], how="outer")

# Sort Data
precip_dataset.sort_values(by=["Station_ID", "Date"], inplace=True)

precip_dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7200 entries, 0 to 7199
Data columns (total 4 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   Station_ID          7200 non-null   int64         
 1   Date                7200 non-null   datetime64[ns]
 2   ERA5_Precipitation  7200 non-null   float64       
 3   GPM_Precipitation   7056 non-null   float64       
dtypes: datetime64[ns](1), float64(2), int64(1)
memory usage: 225.1 KB


### TRMM

In [16]:
# Read Data
file_name = "TRMM_Monthly_precipitation_2000_2024.csv"
trmm_data = pd.read_csv(filepath_or_buffer=DATA_PATH + file_name, na_values=-999)

# Rename TRMM Dataset Columns
trmm_data = trmm_data[["St_ID", "date", "mean"]]

trmm_data.rename(
    columns={
        "St_ID": "Station_ID",
        "date": "Date",
        "mean": "TRMM_Precipitation",
    },
    inplace=True
)

# Convert Date to Datetime
trmm_data["Date"] = pd.to_datetime(trmm_data["Date"])

# Merge TRMM Data with Dataset
precip_dataset = precip_dataset.merge(trmm_data, on=["Station_ID", "Date"], how="outer")

# Sort Data
precip_dataset.sort_values(by=["Station_ID", "Date"], inplace=True)

precip_dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7200 entries, 0 to 7199
Data columns (total 5 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   Station_ID          7200 non-null   int64         
 1   Date                7200 non-null   datetime64[ns]
 2   ERA5_Precipitation  7200 non-null   float64       
 3   GPM_Precipitation   7056 non-null   float64       
 4   TRMM_Precipitation  5760 non-null   float64       
dtypes: datetime64[ns](1), float64(3), int64(1)
memory usage: 281.4 KB


### TerraClimate

In [17]:
# Read Data
file_name = "TERRACLIMATE_Monthly_pr_2000_2024.csv"
tc_data = pd.read_csv(filepath_or_buffer=DATA_PATH + file_name, na_values=-999)

# Rename TERRACLIMATE Dataset Columns
tc_data = tc_data[["St_ID", "date", "mean"]]

tc_data.rename(
    columns={
        "St_ID": "Station_ID",
        "date": "Date",
        "mean": "TERRACLIMATE_Precipitation",
    },
    inplace=True
)

# Convert Date to Datetime
tc_data["Date"] = pd.to_datetime(tc_data["Date"])

# Merge TERRACLIMATE Data with Dataset
precip_dataset = precip_dataset.merge(tc_data, on=["Station_ID", "Date"], how="outer")

# Sort Data
precip_dataset.sort_values(by=["Station_ID", "Date"], inplace=True)

precip_dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7200 entries, 0 to 7199
Data columns (total 6 columns):
 #   Column                      Non-Null Count  Dtype         
---  ------                      --------------  -----         
 0   Station_ID                  7200 non-null   int64         
 1   Date                        7200 non-null   datetime64[ns]
 2   ERA5_Precipitation          7200 non-null   float64       
 3   GPM_Precipitation           7056 non-null   float64       
 4   TRMM_Precipitation          5760 non-null   float64       
 5   TERRACLIMATE_Precipitation  6912 non-null   float64       
dtypes: datetime64[ns](1), float64(4), int64(1)
memory usage: 337.6 KB


### PERSIANNCDR

In [18]:
# Read Data
file_name = "PERSIANN-CDR_Monthly_precipitation_2000_2024.csv"
pcdr_data = pd.read_csv(filepath_or_buffer=DATA_PATH + file_name, na_values=-999)

# Rename PERSIANN-CDR Dataset Columns
pcdr_data = pcdr_data[["St_ID", "date", "mean"]]

pcdr_data.rename(
    columns={
        "St_ID": "Station_ID",
        "date": "Date",
        "mean": "PERSIANNCDR_Precipitation",
    },
    inplace=True
)

# Daily to Monthly
pcdr_data["Date"] = pd.to_datetime(pcdr_data["Date"])
pcdr_data["Date"] = pcdr_data["Date"].dt.to_period("M").astype(str)
pcdr_data = pcdr_data.groupby(["Station_ID", "Date"])["PERSIANNCDR_Precipitation"].sum(min_count=15).reset_index()
pcdr_data["Date"] = pd.to_datetime(pcdr_data["Date"])

# Merge PERSIANN-CDR Data with Dataset
precip_dataset = precip_dataset.merge(pcdr_data, on=["Station_ID", "Date"], how="outer")

# Sort Data
precip_dataset["Date"] = pd.to_datetime(precip_dataset["Date"]) + pd.offsets.MonthEnd(0)
precip_dataset.sort_values(by=["Station_ID", "Date"], inplace=True)

precip_dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7200 entries, 0 to 7199
Data columns (total 7 columns):
 #   Column                      Non-Null Count  Dtype         
---  ------                      --------------  -----         
 0   Station_ID                  7200 non-null   int64         
 1   Date                        7200 non-null   datetime64[ns]
 2   ERA5_Precipitation          7200 non-null   float64       
 3   GPM_Precipitation           7056 non-null   float64       
 4   TRMM_Precipitation          5760 non-null   float64       
 5   TERRACLIMATE_Precipitation  6912 non-null   float64       
 6   PERSIANNCDR_Precipitation   6984 non-null   float64       
dtypes: datetime64[ns](1), float64(5), int64(1)
memory usage: 393.9 KB


In [19]:
# Water-Year: 1385-86 to 1402-03

min_date = '2006-09-01' # 18 Years - 24 Stations - Start: 2006-09-23
max_date = '2024-06-30' # End: 2024-09-21

precip_dataset = precip_dataset\
    .query("Date >= @min_date and Date <= @max_date")

precip_dataset

Unnamed: 0,Station_ID,Date,ERA5_Precipitation,GPM_Precipitation,TRMM_Precipitation,TERRACLIMATE_Precipitation,PERSIANNCDR_Precipitation
80,40709,2006-09-30,130.801336,115.920010,54.357873,117.0,39.081124
81,40709,2006-10-31,280.789058,164.424012,120.467601,212.0,84.028451
82,40709,2006-11-30,245.921390,101.520002,67.297440,122.0,78.084248
83,40709,2006-12-31,183.809788,77.376002,74.464657,110.0,76.726466
84,40709,2007-01-31,64.469668,26.040000,5.355088,26.0,21.264424
...,...,...,...,...,...,...,...
7189,99361,2024-02-29,100.628399,82.824002,,,49.011868
7190,99361,2024-03-31,80.836829,16.368001,,,24.582743
7191,99361,2024-04-30,112.907503,36.000001,,,
7192,99361,2024-05-31,191.929751,52.080000,,,


In [20]:
conn = sqlite3.connect(DATABASE_PATH)

precip_dataset.to_sql('gee_precip_monthly', conn, if_exists='replace', index=False)

conn.commit()
conn.close()