In [1]:
import os
import sys
import sqlite3
import numpy as np
import pandas as pd
import geopandas as gp
import plotly.express as px
import matplotlib.pyplot as plt
import scipy.stats as scs

import pyet
import spei

In [2]:
# sys.path.append('/home/pooya/w/DroughtMonitoringIran/')

DATABASE_PATH = "../database/database.db"

In [3]:
conn = sqlite3.connect(DATABASE_PATH)

data = pd.read_sql(sql='SELECT * FROM ground_pet_monthly', con=conn)
data['date'] = pd.to_datetime(data['date'])

geoinfo = pd.read_sql(sql='SELECT * FROM ground_data_geoinfo', con=conn)

conn.close()

In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3915 entries, 0 to 3914
Data columns (total 19 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   year               3915 non-null   int64         
 1   month              3915 non-null   int64         
 2   date               3915 non-null   datetime64[ns]
 3   region_id          3915 non-null   object        
 4   region_name        3915 non-null   object        
 5   station_id         3915 non-null   object        
 6   station_name       3915 non-null   object        
 7   lat                3915 non-null   float64       
 8   lon                3915 non-null   float64       
 9   station_elevation  3915 non-null   float64       
 10  tmax               3815 non-null   float64       
 11  tmax_count         3915 non-null   int64         
 12  tmin               3815 non-null   float64       
 13  tmin_count         3915 non-null   int64         
 14  tm      

In [5]:
data_si = data\
    .set_index(['date'])[['station_id', 'rrr24', 'Hargreaves']]

data_si['PE_Hargreaves'] = data_si['rrr24'] - data_si['Hargreaves']

timescale = [1, 3, 6, 9, 12, 15, 18, 21, 24]
i = 1

for ts in timescale:
    df_spi = data_si\
        .groupby(by='station_id')\
        .apply(
            lambda x: spei.spi(
                series=x.rrr24,
                dist=scs.gamma,
                prob_zero=True,
                timescale=ts
            ),
            include_groups=False
        )\
        .reset_index()

    df_spi = df_spi.rename(columns={0: f'SPI_{ts}'})

    if i == 1:
        results = df_spi.copy()
    else:
        results = results.merge(df_spi, on=['station_id', 'date'], how='outer')
    
    df_spei = data_si\
        .groupby(by='station_id')\
        .apply(
            lambda x: spei.spei(
                series=x.PE_Hargreaves,
                dist=scs.fisk,
                prob_zero=True,
                timescale=ts
            ),
            include_groups=False
        )\
        .reset_index()

    df_spei = df_spei.rename(columns={0: f'SPEI_{ts}'})
    
    results = results.merge(df_spei, on=['station_id', 'date'], how='outer')

    i += 1


In [6]:
conn = sqlite3.connect(DATABASE_PATH)

results.to_sql('ground_di_monthly', conn, if_exists='replace', index=False)

conn.commit()
conn.close()