#### Load data

In [1]:
import pandas as pd
# df = pd.read_csv('data/smap_input.csv')
df = pd.read_csv('data/bacia-camargos.csv')

# Data cleaning
df['Ep'] = df['Ep'].str.replace(',', '.').astype('float')
df['Pr'] = df['Pr'].str.replace(',', '.').astype('float')

df.set_index(pd.to_datetime(df['data']), inplace=True)
df.drop('data', axis=1, inplace=True)

df.head()

Unnamed: 0_level_0,Qobs,Ep,Pr
data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1995-01-01,204,4.94,4.3
1995-01-02,181,4.94,9.1
1995-01-03,176,4.94,22.8
1995-01-04,194,4.94,9.2
1995-01-05,198,4.94,1.7


---
# Modelo Base

#### Set optimal parameters obtained by the ONS

In [2]:
# Define default parameters
params_ons = dict(
    Ad = 6279.0,  # Example area in square km (or appropriate units)
    Str = 100.0,  # Storage capacity or other parameter (example value)
    K2t = 5.5,  # Example decay coefficient
    Crec = 100,  # Example recharge coefficient
    Ai = 2,  # Example threshold value
    Capc = 42.0,  # Example capacity percentage
    Kkt = 150,  # Another example decay coefficient
    Pcof = 1.0,  # Example precipitation coefficient
    Tuin = 20.0,  # Example initial moisture content
    Ebin = 45.0,  # Example baseflow initial value
    Supin = 1.0,  # Example surface flow initial value
    kep = 1.05153505864843, # 0.8  # Example parameter for evaporation adjustment
    H = 200.0,  # Example storage height or capacity
    K1t = 10.0,  # Example decay coefficient for marginal storage
    K3t = 10.0,  # Another example decay coefficient                
)


### Métricas de erro

In [3]:
from modules.metrics import (
    nash_sutcliffe_efficacy,
    relative_error_coefficient,
    correlation_coefficient,
    mean_error,
    normalized_rmse,
    rmse
)

from sklearn.metrics import mean_squared_error

#### Genetic Algorithms (e.g., using DEAP or tpot)

In [None]:
# !pip install deap
from modules.smap import SmapModel
from deap import base, creator, tools, algorithms
import random

start_date = '1995-08-01'
end_date = '2000-08-01'

data = df[start_date: end_date]
X = data[['Ep', 'Pr']]
y = data['Qobs'].values

# Define the problem as minimization
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", list, fitness=creator.FitnessMin)

# Define the toolbox
toolbox = base.Toolbox()
toolbox.register("H", random.uniform, 0, 200)
toolbox.register("Str", random.uniform, 50, 2000)
toolbox.register("K2t", random.uniform, 0.2, 10)
toolbox.register("Crec", random.uniform, 0, 100)
toolbox.register("Ai", random.uniform, 2, 5)
toolbox.register("Capc", random.uniform, 30, 50)
toolbox.register("Kkt", random.uniform, 30, 180)
toolbox.register("K3t", random.uniform, 10, 60)
toolbox.register("kep", random.uniform, 0.8, 1.2)

# Register individual and population
toolbox.register("individual", tools.initCycle, creator.Individual,
                 (toolbox.H, toolbox.Str, toolbox.K2t, toolbox.Crec, toolbox.Ai, toolbox.Capc, toolbox.Kkt, toolbox.K3t, toolbox.kep))
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

# Define the evaluation function
def evaluate(individual):
    params = {
        'H': individual[0],
        'Str': individual[1],
        'K2t': individual[2],
        'Crec': individual[3],
        'Ai': individual[4],
        'Capc': individual[5],
        'Kkt': individual[6],
        'K3t': individual[7],
        'kep': individual[8]
    }
    model = SmapModel(**{**params_ons, **params})
    predictions = model.predict(X)
    mse = mean_squared_error(y, predictions)
    return (mse,)

toolbox.register("evaluate", evaluate)
toolbox.register("mate", tools.cxBlend, alpha=0.5)
toolbox.register("mutate", tools.mutGaussian, mu=0, sigma=1, indpb=0.1)
toolbox.register("select", tools.selTournament, tournsize=3)

# Perform the genetic algorithm
population = toolbox.population(n=100)
algorithms.eaSimple(population, toolbox, cxpb=0.7, mutpb=0.2, ngen=500, verbose=True)

# Get the best individual
individual = tools.selBest(population, 1)[0]
best_params = {
    'H': individual[0],
    'Str': individual[1],
    'K2t': individual[2],
    'Crec': individual[3],
    'Ai': individual[4],
    'Capc': individual[5],
    'Kkt': individual[6],
    'K3t': individual[7],
    'kep': individual[8]
}
print(f"Best parameters:")
display(best_params)


In [None]:
40: {'H': 126.07718304613897,
 'Str': 84.19902791756634,
 'K2t': 19.744344769256745,
 'Crec': 11.915952193792771,
 'Ai': -0.2594809682052412,
 'Capc': 29.970682519957638,
 'Kkt': -29.54220082924919,
 'K3t': 107.22303117766266,
 'kep': 0.3046603737315083}

100: {'H': 104.38363354229276,
 'Str': 56.20572573137785,
 'K2t': 17.07925782312802,
 'Crec': 98.05473507097138,
 'Ai': -0.0006972611509479462,
 'Capc': 36.04296911106097,
 'Kkt': 155.77674756065196,
 'K3t': 94.54710903886107,
 'kep': 0.1209807319332773},

150: {'H': 76.62051200835192,
 'Str': 77.58133274241217,
 'K2t': 12.458053426725709,
 'Crec': 292.1655533589644,
 'Ai': -4.599125654125872,
 'Capc': 23.434235560726943,
 'Kkt': 84.29490997198076,
 'K3t': 92.40444257279475,
 'kep': 4.6524706214062626}

250: {'H': 78.78471407043702,
 'Str': 191.04762937207383,
 'K2t': 8.087543050695219,
 'Crec': 94.99793281367697,
 'Ai': -9.903240185866013,
 'Capc': 49.724993099393416,
 'Kkt': 90.1798512276936,
 'K3t': 9.948916908314086,
 'kep': -0.057373672702039094}

500: {'H': 89.36931106287157,
 'Str': 60.08811974452388,
 'K2t': 15.831750861345997,
 'Crec': 450.2613409299566,
 'Ai': -3.116099416028139,
 'Capc': 19.269487351834215,
 'Kkt': -138.75381470775318,
 'K3t': 143.13546461982975,
 'kep': 0.003998848735678814}