#### Load data

In [1]:
import pandas as pd
# df = pd.read_csv('data/smap_input.csv')
df = pd.read_csv('data/bacia-camargos.csv')

# Data cleaning
df['Ep'] = df['Ep'].str.replace(',', '.').astype('float')
df['Pr'] = df['Pr'].str.replace(',', '.').astype('float')

df.set_index(pd.to_datetime(df['data']), inplace=True)
df.drop('data', axis=1, inplace=True)

df.head()

Unnamed: 0_level_0,Qobs,Ep,Pr
data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1995-01-01,204,4.94,4.3
1995-01-02,181,4.94,9.1
1995-01-03,176,4.94,22.8
1995-01-04,194,4.94,9.2
1995-01-05,198,4.94,1.7


---
# Modelo Base

#### Set optimal parameters obtained by the ONS

In [2]:
# Define default parameters
params_ons = dict(
    Ad = 6279.0,  # Example area in square km (or appropriate units)
    Str = 100.0,  # Storage capacity or other parameter (example value)
    K2t = 5.5,  # Example decay coefficient
    Crec = 100,  # Example recharge coefficient
    Ai = 2,  # Example threshold value
    Capc = 42.0,  # Example capacity percentage
    Kkt = 150,  # Another example decay coefficient
    Pcof = 1.0,  # Example precipitation coefficient
    Tuin = 20.0,  # Example initial moisture content
    Ebin = 45.0,  # Example baseflow initial value
    Supin = 1.0,  # Example surface flow initial value
    kep = 1.05153505864843, # 0.8  # Example parameter for evaporation adjustment
    H = 200.0,  # Example storage height or capacity
    K1t = 10.0,  # Example decay coefficient for marginal storage
    K3t = 10.0,  # Another example decay coefficient                
)


### Métricas de erro

In [3]:
from modules.metrics import (
    nash_sutcliffe_efficacy,
    relative_error_coefficient,
    correlation_coefficient,
    mean_error,
    normalized_rmse,
    rmse
)

from sklearn.metrics import mean_squared_error

#### Busca Randomizada

In [9]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import uniform
from modules.smap import SmapModel
from modules.metrics import nash_sutcliffe_efficacy
from sklearn.metrics import mean_squared_error

def nash_sutcliffe_efficacy_score(estimator, X_test, y_test):
    y_pred = estimator.predict(X_test)
    return nash_sutcliffe_efficacy(y_test, y_pred)

def neg_soma_coef_score(estimator, X_test, y_test):
    y_pred = estimator.predict(X_test)
    cef = nash_sutcliffe_efficacy(y_test, y_pred)
    cer = relative_error_coefficient(y_test, y_pred)
    return - (cef + cer)
    
start_date = '1995-08-01'
end_date = '2000-08-01'

data = df[start_date: end_date]
X = data[['Ep', 'Pr']]
y = data['Qobs'].values

# Define the parameter distributions (using a wide range with fewer values for random sampling)
param_distributions = {
    'H': uniform(0, 200), 
    'Str': uniform(50, 2000), 
    'K2t': uniform(0.2, 10),  
    'Crec': uniform(0, 100), 
    'Ai': uniform(2, 5), 
    'Capc': uniform(30, 50), 
    'Kkt': uniform(30, 180), 
    'K3t': uniform(10, 60), 
    'kep': uniform(0.8, 1.2),
}

# Initialize the model
model = SmapModel(**params_ons)

# Perform Randomized Search
random_search = RandomizedSearchCV(model, param_distributions, n_iter=1000, scoring='neg_mean_squared_error', error_score='raise', cv=2, verbose=1)
random_search.fit(X, y)

# Get the best parameters
print(f"Best Score: {random_search.best_score_}")
print(f"Best Parameters:")
display(random_search.best_params_)


Fitting 2 folds for each of 1000 candidates, totalling 2000 fits
Best Score: -4428.094354255619
Best Parameters:


{'Ai': 2.651257630789426,
 'Capc': 69.07957723444942,
 'Crec': 41.27193684391827,
 'H': 12.997519289389503,
 'K2t': 4.848116172919958,
 'K3t': 59.17341602309514,
 'Kkt': 131.09761991746313,
 'Str': 138.94968497821765,
 'kep': 1.5672399740027982}

In [None]:
1000: {'Ai': 2.651257630789426,
 'Capc': 69.07957723444942,
 'Crec': 41.27193684391827,
 'H': 12.997519289389503,
 'K2t': 4.848116172919958,
 'K3t': 59.17341602309514,
 'Kkt': 131.09761991746313,
 'Str': 138.94968497821765,
 'kep': 1.5672399740027982}

2500: {'Ai': 2.0493036319763256,
 'Capc': 70.78300818809907,
 'Crec': 18.085390128484136,
 'H': 85.5355405433689,
 'K2t': 9.940278272576476,
 'K3t': 60.640118634680526,
 'Kkt': 89.03233172313816,
 'Str': 88.2606299696524,
 'kep': 1.2334358471099778}

5000: {'Ai': 2.24701094226066,
 'Capc': 65.21589501631206,
 'Crec': 53.04708892064727,
 'H': 22.733477312890216,
 'K2t': 8.040232211765824,
 'K3t': 66.72556394584248,
 'Kkt': 71.15383130783022,
 'Str': 133.28852962269417,
 'kep': 1.9763228672233566}