هدف من این است که نتایج جدول زیر 15 درصد بهتر شوند.
خروجی مد نظر:
1- جدول پیش‌بینی‌شده از Report جدید
2- Confusion Matrix بهبود‌یافته رو به شکل Heatmap تصویری هم بکش

Overall Accuracy: 0.6979
Macro-F1 Score: 0.3946
Cohen's Kappa: 0.3476

Classification Report:
              precision    recall  f1-score   support

          ED       1.00      0.50      0.67        10
          EW       0.00      0.00      0.00         3
          MD       0.21      0.08      0.12        49
          MW       0.49      0.31      0.38        54
          NN       0.75      0.94      0.84       309
          SD       0.52      0.34      0.42        32
          VW       0.50      0.26      0.34        23

    accuracy                           0.70       480
   macro avg       0.50      0.35      0.39       480
weighted avg       0.64      0.70      0.65       480




# Load Libraries

In [42]:
import os
import sys
import sqlite3
import warnings
import numpy as np
import pandas as pd
import geopandas as gp
import plotly.express as px
import skill_metrics as sm
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import scipy.stats as scs
import seaborn as sns

import spei

warnings.filterwarnings('ignore')

# Load Data

In [43]:
RESULTS_DIR = 'results/Stations'
os.makedirs(RESULTS_DIR, exist_ok=True)

sys.path.append('/home/pooya/w/DroughtMonitoringIran/')
DATABASE_PATH = "./database/database.db"
conn = sqlite3.connect(DATABASE_PATH)
data_raw = pd.read_sql(sql='SELECT * FROM data', con=conn)
data_raw['Date'] = pd.to_datetime(data_raw["Date"], format="%Y-%m")
conn.close()

# Filter Data
# - Cluster 1 - Sari
# - Cluster 2 - Alasht
# - Cluster 3 - Ramsar
selected_stations = ["Sari", "Alasht", "Ramsar"]
start_date = "2006-09"
end_date = "2023-10"

selected_features = [
    'Station_ID',
    'Station_Name',
    'Date',
    'NDVI',
    'EVI',
    'VCI',
    'TCI',
    'VHI',
    'PCI_GPM',
    'CI_GPM',
]

for di in ['SPEI', 'SPI']:
    for scale in [1, 3, 6, 9, 12, 15, 18, 21, 24]:
        selected_features.append(f'{di}_{scale}')
        selected_features.append(f'GPM_{di}_{scale}')

data = data_raw[selected_features]\
    .query("Station_Name in @selected_stations and Date >= @start_date and Date < @end_date")


for di in ['SPEI', 'SPI']:
    for scale in [1, 3, 6, 9, 12, 15, 18, 21, 24]:
        data[f'{di}_{scale}_Class'] = pd.cut(data[f'{di}_{scale}'], bins=[-10, -2, -1.5, -1, 1, 1.5, 2, 10], labels=['ED', 'SD', 'MD', 'NN', 'MW', 'VW', 'EW'])
        data[f'{di}_{scale}_Class'] = data[f'{di}_{scale}_Class'].astype('category')

        data[f'GPM_{di}_{scale}_Class'] = pd.cut(data[f'GPM_{di}_{scale}'], bins=[-10, -2, -1.5, -1, 1, 1.5, 2, 10], labels=['ED', 'SD', 'MD', 'NN', 'MW', 'VW', 'EW'])
        data[f'GPM_{di}_{scale}_Class'] = data[f'GPM_{di}_{scale}_Class'].astype('category')
        

# Evaluator

In [45]:
results_df = pd.DataFrame(columns=[
    'Station', 'Index', 'Scale', 'Accuracy',
    'Precision', 'Recall', 'F1-score'
])

for st in data['Station_Name'].unique():
    df_st = data.query("Station_Name == @st")
    for di in ['SPEI', 'SPI']:
        for scale in [1, 3, 6, 9, 12, 15, 18, 21, 24]:
            
            df = df_st[[
                'Station_ID',
                'Station_Name',
                'Date', 
                f'{di}_{scale}',
                f'GPM_{di}_{scale}',
                f'{di}_{scale}_Class', 
                f'GPM_{di}_{scale}_Class'
                
            ]].dropna()
            
            df.set_index('Date', inplace=True)
            
            print(f"Station: {st}, Index: {di}, Scale: {scale}")
            report_dict = classification_report(
                y_true=df[f'{di}_{scale}_Class'],
                y_pred=df[f'GPM_{di}_{scale}_Class'],
                labels=['ED', 'SD', 'MD', 'NN', 'MW', 'VW', 'EW'],
                output_dict=True
            )
            
            results_df.loc[len(results_df)] = [
                st, di, scale,
                report_dict['accuracy'],
                report_dict['weighted avg']['precision'],
                report_dict['weighted avg']['recall'],
                report_dict['weighted avg']['f1-score']
            ]
            
            cm = confusion_matrix(
                y_true=df[f'{di}_{scale}_Class'], 
                y_pred=df[f'GPM_{di}_{scale}_Class'],
                labels=['ED', 'SD', 'MD', 'NN', 'MW', 'VW', 'EW']
            )
            plt.figure(figsize=(10, 8))
            sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['ED', 'SD', 'MD', 'NN', 'MW', 'VW', 'EW'], yticklabels=['ED', 'SD', 'MD', 'NN', 'MW', 'VW', 'EW'])
            plt.title(f'{st} - {di} {scale}-month')
            plt.ylabel('Actual')
            plt.xlabel('Predicted')
            fig_path = os.path.join(RESULTS_DIR, f'{st}-{di}-{scale} - Confusion Matrix.png')
            plt.savefig(fig_path, bbox_inches='tight')
            plt.close()            
            
            # Plot time series for station and GPM index
            f, ax = plt.subplots(2, 1, figsize=(12, 8), sharex=True)
            spei.plot.si(df[f'{di}_{scale}'], ax=ax[0], cmap="vik_r")
            spei.plot.si(df[f'GPM_{di}_{scale}'], ax=ax[1], cmap="vik_r")
            ax[0].set_xlim(pd.to_datetime([start_date, end_date]))
            ax[0].set_title(f'{st} - {di} {scale}-month', fontsize=16)
            [x.grid() for x in ax]
            [ax[i].set_ylabel(n, fontsize=14) for i, n in enumerate([f'{di}{scale} (Station)', f'{di}{scale} (GPM)'])]
            plt.tight_layout()
            fig_path = os.path.join(RESULTS_DIR, f'{st}-{di}-{scale} - Timeseries.png')
            f.savefig(fig_path, bbox_inches='tight')
            plt.close(f)

results_df.to_csv(os.path.join(RESULTS_DIR, 'classification_results.csv'), index=False)


Station: Ramsar, Index: SPEI, Scale: 1



Station: Ramsar, Index: SPEI, Scale: 3



Station: Ramsar, Index: SPEI, Scale: 6



Station: Ramsar, Index: SPEI, Scale: 9



Station: Ramsar, Index: SPEI, Scale: 12



Station: Ramsar, Index: SPEI, Scale: 15



Station: Ramsar, Index: SPEI, Scale: 18



Station: Ramsar, Index: SPEI, Scale: 21



Station: Ramsar, Index: SPEI, Scale: 24



Station: Ramsar, Index: SPI, Scale: 1



Station: Ramsar, Index: SPI, Scale: 3



Station: Ramsar, Index: SPI, Scale: 6



Station: Ramsar, Index: SPI, Scale: 9



Station: Ramsar, Index: SPI, Scale: 12



Station: Ramsar, Index: SPI, Scale: 15



Station: Ramsar, Index: SPI, Scale: 18



Station: Ramsar, Index: SPI, Scale: 21



Station: Ramsar, Index: SPI, Scale: 24



Station: Sari, Index: SPEI, Scale: 1



Station: Sari, Index: SPEI, Scale: 3



Station: Sari, Index: SPEI, Scale: 6



Station: Sari, Index: SPEI, Scale: 9



Station: Sari, Index: SPEI, Scale: 12



Station: Sari, Index: SPEI, Scale: 15



S

In [None]:
from utils.model_evaluator import ModelEvaluator

for st in selected_stations:
    print(f"\nMetrics for Station ID: {st}")
    ModelEvaluator(df.query('Station_Name == @st'), f'{di}_{scale}', f'GPM_{di}_{scale}').display_metrics()

In [None]:
import plotly.express as px

for st in selected_stations:
    station_data = df.query('Station_Name == @st')
    fig = px.scatter(station_data, x=f'{di}_{scale}', y=f'GPM_{di}_{scale}', title=st)
    max_limit = max(station_data[f'{di}_{scale}'].max(), station_data[f'GPM_{di}_{scale}'].max())
    min_limit = max(station_data[f'{di}_{scale}'].min(), station_data[f'GPM_{di}_{scale}'].min())
    fig.update_layout(xaxis=dict(range=[min_limit, max_limit]), yaxis=dict(range=[min_limit, max_limit]))
    fig.update_yaxes(scaleanchor="x", scaleratio=1)
    fig.update_xaxes(constrain='domain')
    fig.show()

In [None]:
import plotly.express as px
import plotly.graph_objects as go
import numpy as np


x=f'{di}_{scale}'
y=f'GPM_{di}_{scale}'

for st in selected_stations:
    station_data = data.query("Station_Name == @st")[['Date', x, y]].dropna()

    fig = px.scatter(
        station_data,
        x=x,
        y=y,
        labels={
            x: f"{st}<br>{di}{scale} (Station)",
            y: f'{di}{scale} (GPM)'
        },
        opacity=0.7,
        color_discrete_sequence=["black"],  # make points black
    )

    # Make scatter markers bigger
    fig.update_traces(marker=dict(size=8, color="black"))

    # Equal axis limits
    max_limit = max(station_data[x].max(),
                    station_data[y].max()) + 0.5
    min_limit = min(station_data[x].min(),
                    station_data[y].min()) - 0.5
    
    # Equal ticks
    tick_vals = np.arange(min_limit, max_limit, 1)

    fig.update_layout(
        xaxis=dict(range=[min_limit, max_limit]),
        yaxis=dict(range=[min_limit, max_limit]),
        width=600,
        height=600,
        title=None,
        margin=dict(l=20, r=20, t=20, b=20),
    )
    fig.update_yaxes(scaleanchor="x", scaleratio=1)

    # Add 1:1 line (hidden from legend)
    fig.add_trace(go.Scatter(
        x=[min_limit, max_limit],
        y=[min_limit, max_limit],
        mode="lines",
        line=dict(color="red", dash="dash"),
        showlegend=False
    ))

    fig.show()


In [None]:
df_plot = df.query('Station_Name == "Bandar-e-anzali"')
df_plot.set_index('Date', inplace=True)

f, ax = plt.subplots(7, 1, figsize=(8, 12), sharex=True)
spei.plot.si(df_plot['NDVI'], ax=ax[0], cmap="vik_r")
spei.plot.si(df_plot['EVI'], ax=ax[1], cmap="roma")
spei.plot.si(df_plot['PCI_GPM'], ax=ax[2], cmap="seismic_r")
spei.plot.si(df_plot['VCI'], ax=ax[3], cmap="roma")
spei.plot.si(df_plot['TCI'], ax=ax[4], cmap="vik_r")
spei.plot.si(df_plot['VHI'], ax=ax[5], cmap="seismic_r")
spei.plot.si(df_plot['CI_GPM'], ax=ax[6], cmap="roma")
ax[0].set_xlim(pd.to_datetime(["2014", "2024"]))
ax[0].set_ylim(-0.2, 1)
ax[1].set_ylim(-0.2, 1)
ax[2].set_ylim(0, 1)
ax[3].set_ylim(0, 1)
ax[4].set_ylim(0, 1)
ax[5].set_ylim(0, 1)
ax[6].set_ylim(0, 1)
[x.grid() for x in ax]
[ax[i].set_ylabel(n, fontsize=14) for i, n in enumerate(['NDVI', 'EVI', 'PCI', 'VCI', 'TCI', 'VHI', 'CI'])];