In [1]:
import os
import sys
import sqlite3
import numpy as np
import pandas as pd
import geopandas as gp
import plotly.express as px
import matplotlib.pyplot as plt
import scipy.stats as scs

import pyet
import pyeto
import spei

In [2]:
sys.path.append('/home/pooya/w/DroughtMonitoringIran/')

DATA_PATH = "./assets/data/GEE_CSV_Exports/"
DATABASE_PATH = "./database/database.db"

In [3]:
conn = sqlite3.connect(DATABASE_PATH)

monthly_data = pd.read_sql(sql='SELECT * FROM monthly', con=conn)
monthly_data['Date'] = pd.to_datetime(monthly_data['Date'])

monthly_PET = pd.read_sql(sql='SELECT * FROM pet_monthly', con=conn)
monthly_PET['Date'] = pd.to_datetime(monthly_PET['Date'])

geoinfo = pd.read_sql(sql='SELECT * FROM geoinfo', con=conn)

conn.close()

In [4]:
monthly_data

Unnamed: 0,Station_ID,Date,Temp_Max,Temp_Min,Temp_Mean,Precip
0,40709,2006-01-31,6.8,0.9,3.8,134.46
1,40709,2006-02-28,9.2,3.2,6.2,37.58
2,40709,2006-03-31,13.2,6.8,10.0,85.78
3,40709,2006-04-30,16.5,10.9,13.7,99.07
4,40709,2006-05-31,21.0,14.7,17.9,39.90
...,...,...,...,...,...,...
5570,99361,2024-03-31,12.1,1.8,6.9,13.60
5571,99361,2024-04-30,15.7,6.2,10.9,48.61
5572,99361,2024-05-31,18.0,9.3,13.6,49.11
5573,99361,2024-06-30,23.2,13.8,18.5,158.21


In [5]:
monthly_PET

Unnamed: 0,Station_ID,Date,PET_Hargreaves
0,40709,2006-01-31,23.76
1,40709,2006-02-28,30.78
2,40709,2006-03-31,55.38
3,40709,2006-04-30,70.81
4,40709,2006-05-31,103.12
...,...,...,...
5570,99361,2024-03-31,67.21
5571,99361,2024-04-30,88.16
5572,99361,2024-05-31,107.19
5573,99361,2024-06-30,128.74


In [6]:
geoinfo

Unnamed: 0,Station_ID,Station_Name,Province,Station_Latitude,Station_Longitude,Station_Elevation
0,40759,Sari,Mazandaran,36.536,52.998,23.0
1,99306,Bandar-e-amirabad,Mazandaran,36.856,53.386,-20.0
2,99357,Baladeh,Mazandaran,36.198,51.801,2120.0
3,99299,Galugah,Mazandaran,36.738,53.837,-10.0
4,40737,Gharakhil,Mazandaran,36.487,52.108,14.7
5,40760,Kiyasar,Mazandaran,36.248,53.546,1294.3
6,99361,Alasht,Mazandaran,36.071,52.843,1805.0
7,99309,Amol,Mazandaran,36.479,52.468,23.7
8,99348,Kojur,Mazandaran,36.39,51.729,1550.0
9,99360,Polsefid,Mazandaran,36.104,53.062,610.0


In [7]:
geoinfo

Unnamed: 0,Station_ID,Station_Name,Province,Station_Latitude,Station_Longitude,Station_Elevation
0,40759,Sari,Mazandaran,36.536,52.998,23.0
1,99306,Bandar-e-amirabad,Mazandaran,36.856,53.386,-20.0
2,99357,Baladeh,Mazandaran,36.198,51.801,2120.0
3,99299,Galugah,Mazandaran,36.738,53.837,-10.0
4,40737,Gharakhil,Mazandaran,36.487,52.108,14.7
5,40760,Kiyasar,Mazandaran,36.248,53.546,1294.3
6,99361,Alasht,Mazandaran,36.071,52.843,1805.0
7,99309,Amol,Mazandaran,36.479,52.468,23.7
8,99348,Kojur,Mazandaran,36.39,51.729,1550.0
9,99360,Polsefid,Mazandaran,36.104,53.062,610.0


### Precipitation Data

In [8]:
vi_dataset = pd.DataFrame()

### NDVI & EVI

In [9]:
# Find *.csv files in the directory with NDVI
list_fles = [x for x in os.listdir(DATA_PATH) if x.endswith(".csv") and ("NDVI" in x or "EVI" in x)]
vi_dataset = pd.DataFrame()
for lf in list_fles:
    data = pd.read_csv(filepath_or_buffer=DATA_PATH + lf)
    data = data[["Province", "St_Name", "date", "mean"]]
    data.rename(
        columns={
            "Province": "Province",
            "St_Name": "Station_Name",
            "date": "Date",
            "mean": f"{lf.split('_')[2]}_{lf.split('_')[0]}"
        },
        inplace=True
    )
    if vi_dataset.empty:
        vi_dataset = data
    else:
        vi_dataset = vi_dataset.merge(data, on=["Province", "Station_Name", "Date"], how="outer")


def merge_columns(df, col_prefix):
    cols = [col for col in df.columns if col.startswith(col_prefix)]
    if len(cols) > 1:
        df[col_prefix] = df[cols].mean(axis=1, skipna=True)
        df.drop(columns=cols, inplace=True)
    elif len(cols) == 1:
        df.rename(columns={cols[0]: col_prefix}, inplace=True)


unique_prefixes = set(col.split('_')[0] + '_' + col.split('_')[1] for col in vi_dataset.columns if '_' in col)
for prefix in unique_prefixes:
    merge_columns(vi_dataset, prefix)

# Select the columns of interest
gi = geoinfo[["Station_ID", "Station_Name", "Province"]]

vi_dataset = gi.merge(vi_dataset, on=["Province", "Station_Name"], how="left")

vi_dataset.sort_values(by=["Province", "Station_Name", "Date"], inplace=True)
vi_dataset.reset_index(inplace=True, drop=True)
vi_dataset.drop_duplicates(inplace=True)

vi_dataset = vi_dataset[["Station_ID", "Date", "NDVI_MOD13A3", "NDVI_MYD13A3", "EVI_MOD13A3", "EVI_MYD13A3"]]

vi_dataset

Unnamed: 0,Station_ID,Date,NDVI_MOD13A3,NDVI_MYD13A3,EVI_MOD13A3,EVI_MYD13A3
0,40709,2000-02,0.4377,,0.1973,
1,40709,2000-03,0.3788,,0.1674,
2,40709,2000-04,0.4353,,0.2141,
3,40709,2000-05,0.4418,,0.2004,
4,40709,2000-06,0.6711,,0.3808,
...,...,...,...,...,...,...
7445,40735,2024-07,0.5367,0.6263,0.3090,0.4214
7446,40735,2024-08,0.5125,0.5674,0.2687,0.3086
7447,40735,2024-09,0.4449,0.5261,0.2082,0.3110
7448,40735,2024-10,0.5335,0.4783,0.2351,0.2891


### LST

In [10]:
# Find *.csv files in the directory with NDVI
list_fles = [x for x in os.listdir(DATA_PATH) if x.endswith(".csv") and ("LSTNight" in x or "LSTDay" in x)]
lst_dataset = pd.DataFrame()
for lf in list_fles:
    data = pd.read_csv(filepath_or_buffer=DATA_PATH + lf, na_values=-999)
    data = data[["Province", "St_Name", "date", "mean"]]
    data.rename(
        columns={
            "Province": "Province",
            "St_Name": "Station_Name",
            "date": "Date",
            "mean": f"{lf.split('_')[2]}_{lf.split('_')[0]}"
        },
        inplace=True
    )
    if lst_dataset.empty:
        lst_dataset = data
    else:
        lst_dataset = lst_dataset.merge(data, on=["Province", "Station_Name", "Date"], how="outer")


def merge_columns(df, col_prefix):
    cols = [col for col in df.columns if col.startswith(col_prefix)]
    if len(cols) > 1:
        df[col_prefix] = df[cols].mean(axis=1, skipna=True)
        df.drop(columns=cols, inplace=True)
    elif len(cols) == 1:
        df.rename(columns={cols[0]: col_prefix}, inplace=True)


unique_prefixes = set(col.split('_')[0] + '_' + col.split('_')[1] for col in lst_dataset.columns if '_' in col)
for prefix in unique_prefixes:
    merge_columns(lst_dataset, prefix)

# Select the columns of interest
gi = geoinfo[["Station_ID", "Station_Name", "Province"]]

lst_dataset = gi.merge(lst_dataset, on=["Province", "Station_Name"], how="left")

lst_dataset.sort_values(by=["Province", "Station_Name", "Date"], inplace=True)
lst_dataset.reset_index(inplace=True, drop=True)
lst_dataset.drop_duplicates(inplace=True)

lst_dataset = lst_dataset[["Station_ID", "Date", "LSTDay_MOD21C3", "LSTDay_MYD21C3", "LSTNight_MOD21C3", "LSTNight_MYD21C3"]]

lst_dataset

Unnamed: 0,Station_ID,Date,LSTDay_MOD21C3,LSTDay_MYD21C3,LSTNight_MOD21C3,LSTNight_MYD21C3
0,40709,2000-02,14.81,,,
4,40709,2000-03,16.83,,8.27,
5,40709,2000-04,24.07,,12.57,
6,40709,2000-05,28.87,,18.57,
7,40709,2000-06,31.21,,20.45,
...,...,...,...,...,...,...
7595,40735,2024-07,23.57,35.83,12.31,11.89
7596,40735,2024-08,26.19,38.95,12.79,13.17
7597,40735,2024-09,27.23,36.13,10.55,12.29
7598,40735,2024-10,17.93,21.03,4.43,6.65


In [11]:
conn = sqlite3.connect(DATABASE_PATH)

vi_dataset.to_sql('vi_monthly', conn, if_exists='replace', index=False)
conn.commit()

lst_dataset.to_sql('lst_monthly', conn, if_exists='replace', index=False)
conn.commit()

conn.close()