In [50]:
import os
import sys
import sqlite3
import numpy as np
import pandas as pd
import geopandas as gp
import plotly.express as px
import matplotlib.pyplot as plt
import scipy.stats as scs

import pyet
import pyeto
import spei

In [51]:
sys.path.append('/home/pooya/w/DroughtMonitoringIran/')

DATA_PATH = "./assets/data/GEE_CSV_Exports/"
DATABASE_PATH = "./database/database.db"

In [52]:
conn = sqlite3.connect(DATABASE_PATH)

geoinfo = pd.read_sql(sql='SELECT * FROM ground_data_geoinfo', con=conn)

conn.close()

In [53]:
geoinfo

Unnamed: 0,Station_ID,Station_Name,Province,Station_Latitude,Station_Longitude,Station_Elevation
0,40759,Sari,Mazandaran,36.536,52.998,23.0
1,99306,Bandar-e-amirabad,Mazandaran,36.856,53.386,-20.0
2,99357,Baladeh,Mazandaran,36.198,51.801,2120.0
3,99299,Galugah,Mazandaran,36.738,53.837,-10.0
4,40737,Gharakhil,Mazandaran,36.487,52.108,14.7
5,40760,Kiyasar,Mazandaran,36.248,53.546,1294.3
6,99361,Alasht,Mazandaran,36.071,52.843,1805.0
7,99309,Amol,Mazandaran,36.479,52.468,23.7
8,99348,Kojur,Mazandaran,36.39,51.729,1550.0
9,99360,Polsefid,Mazandaran,36.104,53.062,610.0


### PET MODIS

In [54]:
pet_dataset = pd.DataFrame()

list_fles = [x for x in os.listdir(DATA_PATH) if x.endswith(".csv") and ("MOD16A2GF" in x) and ("Scale" not in x)]
for lf in list_fles:
    data = pd.read_csv(filepath_or_buffer=DATA_PATH + lf, na_values=-999)
    data = data[["Province", "St_Name", "date", "mean"]]
    data.rename(
        columns={
            "Province": "Province",
            "St_Name": "Station_Name",
            "date": "Date",
            "mean": f"{lf.split('_')[2]}_{lf.split('_')[0]}"
        },
        inplace=True
    )
    if pet_dataset.empty:
        pet_dataset = data
    else:
        pet_dataset = pd.concat([pet_dataset, data])

pet_dataset.sort_values(by=["Province", "Station_Name", "Date"], inplace=True)
pet_dataset.reset_index(drop=True, inplace=True)
pet_dataset['Date'] = pd.to_datetime(pet_dataset['Date'])

expanded_rows = []

for _, row in pet_dataset.iterrows():
    for i in range(8):  # Expand each row to 8 rows
        expanded_rows.append({
            'Province': row['Province'],
            'Station_Name': row['Station_Name'],
            'Date': row['Date'] - pd.Timedelta(days=i),  # Spread over 8 days
            'PET_MOD16A2GF': row['PET_MOD16A2GF'] / 8  # Distribute the value
        })

pet_daily_dataset = pd.DataFrame(expanded_rows)
stations_to_remove = ['Babolsar', 'Nowshahr', 'Bandar-e-anzali']
pet_daily_dataset = pet_daily_dataset[~pet_daily_dataset['Station_Name'].isin(stations_to_remove)]
pet_daily_dataset = pet_daily_dataset.sort_values(['Province', 'Station_Name', 'Date']).reset_index(drop=True)




pet_dataset_scale = pd.DataFrame()
list_fles = [x for x in os.listdir(DATA_PATH) if x.endswith(".csv") and ("MOD16A2GF" in x) and ("Scale" in x)]
for lf in list_fles:
    data = pd.read_csv(filepath_or_buffer=DATA_PATH + lf, na_values=-999)
    data = data[["Province", "St_Name", "date", "mean"]]
    data.rename(
        columns={
            "Province": "Province",
            "St_Name": "Station_Name",
            "date": "Date",
            "mean": f"{lf.split('_')[2]}_{lf.split('_')[0]}"
        },
        inplace=True
    )
    if pet_dataset_scale.empty:
        pet_dataset_scale = data
    else:
        pet_dataset_scale = pd.concat([pet_dataset_scale, data])

pet_dataset_scale.sort_values(by=["Province", "Station_Name", "Date"], inplace=True)
pet_dataset_scale.reset_index(drop=True, inplace=True)
pet_dataset_scale['Date'] = pd.to_datetime(pet_dataset_scale['Date'])

expanded_rows = []

for _, row in pet_dataset_scale.iterrows():
    for i in range(8):  # Expand each row to 8 rows
        expanded_rows.append({
            'Province': row['Province'],
            'Station_Name': row['Station_Name'],
            'Date': row['Date'] - pd.Timedelta(days=i),  # Spread over 8 days
            'PET_MOD16A2GF': row['PET_MOD16A2GF'] / 8  # Distribute the value
        })

pet_daily_dataset_scale = pd.DataFrame(expanded_rows)
stations_to_select = ['Babolsar', 'Nowshahr', 'Bandar-e-anzali']
pet_daily_dataset_scale = pet_daily_dataset_scale[pet_daily_dataset_scale['Station_Name'].isin(stations_to_select)]
pet_daily_dataset_scale = pet_daily_dataset_scale.sort_values(['Province', 'Station_Name', 'Date']).reset_index(drop=True)




pet_daily_dataset = pd.concat([pet_daily_dataset, pet_daily_dataset_scale])
pet_daily_dataset = pet_daily_dataset.sort_values(['Province', 'Station_Name', 'Date']).reset_index(drop=True)
pet_daily_dataset = pet_daily_dataset.merge(geoinfo[["Station_ID", "Station_Name", "Province"]], on=["Province", "Station_Name"], how="left")



pet_daily_dataset

Unnamed: 0,Province,Station_Name,Date,PET_MOD16A2GF,Station_ID
0,Gilan,Astara,1999-12-25,1.6000,40709
1,Gilan,Astara,1999-12-26,1.6000,40709
2,Gilan,Astara,1999-12-27,1.6000,40709
3,Gilan,Astara,1999-12-28,1.6000,40709
4,Gilan,Astara,1999-12-29,1.6000,40709
...,...,...,...,...,...
220795,Mazandaran,Siahbisheh,2023-12-23,1.4875,40735
220796,Mazandaran,Siahbisheh,2023-12-24,1.4875,40735
220797,Mazandaran,Siahbisheh,2023-12-25,1.4875,40735
220798,Mazandaran,Siahbisheh,2023-12-26,1.4875,40735


### Daily to Monthly

In [55]:
pet_daily_dataset["Date"] = pd.to_datetime(pet_daily_dataset["Date"])

tmp = pet_daily_dataset.copy()
tmp["Date"] = tmp["Date"].dt.to_period("M").astype(str)

pet_monthly_dataset = tmp.groupby(["Station_ID", "Province", "Station_Name", "Date"])["PET_MOD16A2GF"].sum(min_count=15).reset_index()
pet_monthly_dataset = pet_monthly_dataset[['Station_ID', 'Date', 'PET_MOD16A2GF']]
pet_monthly_dataset

Unnamed: 0,Station_ID,Date,PET_MOD16A2GF
0,40709,1999-12,
1,40709,2000-01,44.9250
2,40709,2000-02,72.2375
3,40709,2000-03,127.6375
4,40709,2000-04,165.7000
...,...,...,...
7220,99361,2023-08,239.2750
7221,99361,2023-09,176.6500
7222,99361,2023-10,123.0125
7223,99361,2023-11,98.8000


In [56]:
conn = sqlite3.connect(DATABASE_PATH)

pet_monthly_dataset[['Station_ID', 'Date', 'PET_MOD16A2GF']].to_sql('pet_monthly_modis', conn, if_exists='replace', index=False)

conn.commit()

conn.close()