# **Geração de IDW automatizada**

## Instalando bibliotecas

In [None]:
!pip install hydrobr #para download e load dos dados de precipitação e vazão
!git clone https://github.com/gespla-iph-ufrgs/gespla #Toolbox para análise de séries e outras ferramentas
!pip install chart_studio #para plotagem da série de vazão
!pip install geopandas
!pip install scipy
#!pip install baseflow #para separação de escoamento
#!pip install cartopy
#!pip install rasterio
#!pip install gdal

Collecting hydrobr
  Downloading hydrobr-0.1.1.tar.gz (16 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: hydrobr
  Building wheel for hydrobr (setup.py) ... [?25l[?25hdone
  Created wheel for hydrobr: filename=hydrobr-0.1.1-py3-none-any.whl size=15070 sha256=7b9fe57147efea23cad38179c8563b82b5b5cd009d8456699fe3de0cf2c2f631
  Stored in directory: /root/.cache/pip/wheels/44/9b/89/8fd8f9bf127ee2c08c7fb9c31520f5a80d238702bdd8fd8cd0
Successfully built hydrobr
Installing collected packages: hydrobr
Successfully installed hydrobr-0.1.1
Cloning into 'gespla'...
remote: Enumerating objects: 256, done.[K
remote: Counting objects: 100% (55/55), done.[K
remote: Compressing objects: 100% (38/38), done.[K
remote: Total 256 (delta 31), reused 39 (delta 17), pack-reused 201[K
Receiving objects: 100% (256/256), 107.08 KiB | 5.95 MiB/s, done.
Resolving deltas: 100% (152/152), done.


## Importando classes e funções

In [None]:
from gespla import download, load, resample, tsa, visuals
import hydrobr
import geopandas as gpd

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# para plotar Time Series data com Rangeslider
import chart_studio.plotly as plotly
import plotly.figure_factory as ff
from plotly import graph_objs as go
from plotly.offline import plot

from scipy.interpolate import Rbf

import matplotlib.pyplot as plt
import matplotlib as mpl
from shapely.geometry import Point # Gera os pontos das coordenadas das estações

## Download de dados da ANA

In [None]:
!mkdir -p "/content/IDW" # Cria pasta para o projeto

#### Importa poligonos das subbacias da ANA

In [None]:
!wget -P "/content/IDW" https://dadosabertos.ana.gov.br/datasets/10480692111f443bb5a38d9bb156851f_0.geojson?outSR={"latestWkid":4618,"wkid":4291} #EPSG4291 = SAD69

df_shape_sub_brasil = gpd.read_file('/content/IDW/10480692111f443bb5a38d9bb156851f_0.geojson?outSR=latestWkid:4618')
df_shape_sub_brasil = df_shape_sub_brasil.sort_values(by=['DNS_DNB_CD','DNS_NU_SUB']) #ordena por codigo e subbacia
df_shape_sub_brasil.columns

df_shape_sub_brasil

In [None]:
df_shape_sub_brasil.plot(figsize=(8,8))

In [None]:
# Filtro por sub-bacia
sb = int(input("Digite a Sub-bacia de interesse: "))
subbacia_select = df_shape_sub_brasil.loc[df_shape_sub_brasil.DNS_NU_SUB== sb ].to_crs(epsg='4674') #CONVERTE A GEOMETRIA PARA O SISTEMA MÉTRICO
#subbacia_select = 76
# Filtro por bacia
#b = int(input("Digite a Bacia de interesse: "))
#bacia_select = df_shape_sub_brasil.loc[df_shape_sub_brasil.DNS_DNB_CD== b ].to_crs(epsg='4326') #CONVERTE A GEOMETRIA PARA O SISTEMA MÉTRICO
#bacia_select

Digite a Sub-bacia de interesse: 76


NameError: name 'df_shape_sub_brasil' is not defined

In [None]:
indice_selecionado = subbacia_select.index
name_bacia = subbacia_select.DNS_NM.to_list()
name_bacia = name_bacia[0]

# Plotagem da sub-bacia selecionada
subbacia_select.plot(figsize=(8,8), legend='true', color= 'red', alpha = .8)
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.title('Mapa da bacia %s'%name_bacia)

### Download metadados da ANA

In [None]:

# Importando dataframes de vazão e precipitação da ANA
meta_flow = download.metadata_ana_flow(folder='/content/IDW',)
meta_prec = download.metadata_ana_prec(folder='/content/IDW',)

# Importando DataFrames dos metadados
df_meta_flow = load.metadata_ana_flow(file=meta_flow)  # vazões
df_meta_prec = load.metadata_ana_prec(file=meta_prec)  # precipitação

# print das 10 primeiras linhas do dataframe
print('Metadados de vazão')
print(df_meta_flow.head(10).to_string())
print()
print('Metadados de precipitação')
print(df_meta_prec.head(10).to_string())

#df_meta_flow.info() #informações do dataframe

Metadados de vazão
                   Name CodEstacao Type  DrainageArea  SubBasin              City     State   Responsible  Latitude  Longitude  StartDate    EndDate  NYD    MD  N_YWOMD  YWMD
0                 BORJA   10064000    1      115000.0        10              PERU      PERU  SENAMHI-PERU   -4.4700   -77.5500 1986-02-02 2013-03-15   28  27.6       17  39.3
1             SAN REGIS   10070500    1      316000.0        10              PERU      PERU  SENAMHI-PERU   -4.5100   -73.9500 1994-05-16 2005-10-17   12   0.1       10  16.7
2               REQUENA   10074800    1        4738.0        10              PERU      PERU  SENAMHI-PERU   -5.0300   -73.8300 1984-10-25 2005-10-17   22   8.6       15  31.8
3           TAMISHIYACU   10075000    1        8985.0        10              PERU      PERU  SENAMHI-PERU   -4.0036   -73.1611 1983-10-02 2010-09-13   28   5.0       23  17.9
4  FRANCISCO DE ORELLAN   10080900    1           NaN        10              PERU      PERU  SENAMHI-PERU 

#### Plotagem das estações

In [None]:

# Criando uma lista de geometrias no formato de ponto
pontos = [Point(x) for x in zip(df_meta_prec.Longitude,
                                df_meta_prec.Latitude)]
# Definindo o sistema de coordenadas
crs={'proj':'latlong','ellps':'WGS84','datum':'WGS84','no_def':True}

# Criando o geodataframe
df_meta_prec_geo = gpd.GeoDataFrame(df_meta_prec, geometry=pontos,crs=4674)

df_meta_prec_geo.plot(figsize=(5,5))

In [None]:
# Recorta o Dataframe pelos dados das Sub-bacias do Rio Uruguai (70 a 79)
df_prec_=  df_meta_prec[(df_meta_prec['SubBasin'] >=74) & (df_meta_prec['SubBasin']<77)]
df_prec_ = df_prec_.sort_values(by=['SubBasin','CodEstacao'])
df_prec_

Unnamed: 0,Name,CodEstacao,Type,SubBasin,City,State,Responsible,Latitude,Longitude,StartDate,EndDate,NYD,MD,N_YWOMD,YWMD
11148,CAMPO ERÊ - EMPASC,02653001,2,74,CAMPO ERÊ,SANTA CATARINA,ANA,-26.4578,-53.0794,1969-11-22,2019-08-31,51,5.5,31,39.2
11149,DIONÍSIO CERQUEIRA,02653002,2,74,DIONÍSIO CERQUEIRA,SANTA CATARINA,ANA,-26.2692,-53.6275,1972-03-01,2019-10-31,48,5.2,30,37.5
11151,PONTE DO SARGENTO,02653004,2,74,ROMELÂNDIA,SANTA CATARINA,ANA,-26.6828,-53.2867,1969-11-22,2019-10-31,51,1.6,45,11.8
11152,SÃO JOSÉ DO CEDRO,02653005,2,74,SÃO JOSÉ DO CEDRO,SANTA CATARINA,ANA,-26.4650,-53.4536,1972-03-01,2019-09-30,48,2.7,30,37.5
11153,SÃO MIGUEL D'OESTE I,02653006,2,74,SÃO MIGUEL DO OESTE,SANTA CATARINA,ANA,-26.7333,-53.5167,1965-11-19,1975-02-28,11,3.6,6,45.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11760,SÃO CARLOS,03055007,2,76,ROSÁRIO DO SUL,RIO GRANDE DO SUL,ANA,-30.2042,-55.4919,1986-04-24,2019-10-30,34,7.1,18,47.1
11762,BR-293,03055009,2,76,SANTANA DO LIVRAMEN,RIO GRANDE DO SUL,ANA,-30.7917,-55.1792,2000-02-13,2019-10-31,20,9.7,10,50.0
11814,TORQUATO SEVERO,03154003,2,76,DOM PEDRITO,RIO GRANDE DO SUL,ANA,-31.0283,-54.1789,1976-09-01,2019-10-31,44,2.6,34,22.7
11818,SERRILHADA,03154008,2,76,DOM PEDRITO,RIO GRANDE DO SUL,ANA,-31.4375,-54.7117,2000-06-12,2019-10-31,20,5.6,4,80.0


In [None]:
estacoes_gantt_list =  df_prec_['CodEstacao'].tolist() # gera uma lista de string com as estacoes recortadas

In [None]:
# Estas 28 estações foram as que encontrei boas de dados no Ibicui
stations_code_list = ['02850019','02854012','02855004','02953008','02953030','02954001','02954004','02954005','02954031','02955006','02955007','02955013',
                      '02955017','02956006','03053004','03053023','03053024','03053028','03054002','03054016','03054019','03054020','03054021','03055003',
                      '03056007','03154003','03154008','03155001']
print(stations_code_list)
print(len(stations_code_list))

['02850019', '02854012', '02855004', '02953008', '02953030', '02954001', '02954004', '02954005', '02954031', '02955006', '02955007', '02955013', '02955017', '02956006', '03053004', '03053023', '03053024', '03053028', '03054002', '03054016', '03054019', '03054020', '03054021', '03055003', '03056007', '03154003', '03154008', '03155001']
28


In [None]:
# Alterar variavel caso não tenha selecionado as estações "boas"
data_stations = hydrobr.get_data.ANA.prec_data(stations_code_list, only_consisted=False)

  4%|▎         | 1/28 [00:03<01:35,  3.53s/it]


KeyboardInterrupt: 

In [None]:
data_stations_ = data_stations.reset_index()
data_stations_.set_index('index', inplace=True)
data_stations_

In [None]:
gantt_fig = hydrobr.Plot.gantt(data_stations) #Get the Gantt Fig

#Updating the layout
gantt_fig.update_layout(
    autosize=False,
    width=1200,
    height=500,
    xaxis_title = 'Ano',
    yaxis_title = 'Codigo da estação',
    font=dict(family="Courier New, monospace", size=12))

#To plot and save the gantt plot as html
#plot(gantt_fig,filename='gantt' + '.html')

In [None]:
# @title
df_meta_prec = df_meta_prec.astype({"CodEstacao": int}) # Transforma a coluna CodEstacao em int
#
!mkdir -p "/content/Separacao_flow/Estacoes_prec"
#
for j in range(len(stations_code_list)): #range coloca cada vez um codigo de estação na lista pro download
  file_prec = download.ana_prec(code= stations_code_list[j],folder='/content/Separacao_flow/Estacoes_prec')
  print('Arquivo salvo em: {}'.format(file_prec))

100%|██████████| 1/1 [00:01<00:00,  1.80s/it]


Arquivo salvo em: /content/Separacao_flow/Estacoes_prec/ANA-prec_02850019_2002-2023__by-2024-01-16.txt


  0%|          | 0/1 [00:01<?, ?it/s]


KeyboardInterrupt: 

### Escolha de estação de vazão para separação de eventos

In [None]:
df_meta_flow = df_meta_flow.astype({"CodEstacao": int}) # Transforma a coluna CodEstacao em int
flow_station = '76800000' # Estação de vazão selecionada para separação de eventos
#df = df_meta_flow[df_meta_flow.CodEstacao.isin(flow_station)] # Filtra Dataframe da estação de vazão selecionada
file_flow = download.ana_flow(code=flow_station, folder='/content/IDW') # Download da série de vazão

#precisa alterar o nome do arquivo que foi feito o download
df_flow = pd.read_csv(file_flow, sep=";") #Carregando série de vazão
df_flow

In [None]:
# Plotagem da estação de vazão em forma que simplifica seleção de eventos
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_flow['Date'], y=df_flow['Flow'], name="Vazão"))#,line_color='black'))
fig.layout.update(title_text='Série de vazão da estação selecionada',xaxis_rangeslider_visible=True, xaxis_title='Tempo (dias)', yaxis_title='Vazão')
fig

### Estações de precipitação para gerar IDW

# **Gerando um geodataframe com coordenadas e precipitacoes para espacialização**

In [None]:
df_meta_prec_filtrado = df_meta_prec[df_meta_prec['CodEstacao'].isin(stations_code_list)]
df_meta_prec_filtrado

colunas_desejadas = ['CodEstacao', 'Latitude',  'Longitude']

# Recortar o DataFrame para manter apenas as colunas desejadas
df_recortado_lat_lon = df_meta_prec_filtrado[colunas_desejadas]
df_recortado_lat_lon.set_index('CodEstacao', inplace=True)
df_recortado_lat_lon

In [None]:
data_stations_.index = pd.to_datetime(data_stations_.index)
data_inicio_filtro = pd.to_datetime('2013-08-21')
data_fim_filtro = pd.to_datetime('2013-09-13')

df_ev_teste = data_stations_.loc[(data_stations_.index >= data_inicio_filtro) & (data_stations_.index <= data_fim_filtro)]
df_ev_teste_transposto_t = df_ev_teste.T
df_ev_teste_transposto_t

df_teste = pd.merge(df_ev_teste_transposto_t, df_recortado_lat_lon, left_index=True, right_index=True)

# Cria uma coluna de geometria de ponto
geometry = [Point(lon, lat) for lon, lat in zip(df_teste['Longitude'], df_teste['Latitude'])]
geo_df = gpd.GeoDataFrame(df_teste, geometry=geometry)

geo_df.crs = 'EPSG:4326' # Sistema de referencoa de coordenada especifico
geo_df

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))

subbacia_select.plot(ax=ax, color='red', label='Sub-bacia')
geo_df.plot(ax=ax, color='blue', label='Estações')
ax.legend(loc='upper left', title='Sub-bacia')
ax.legend(loc='upper right', title='Estações')
plt.title('Sub-bacia com estações')
plt.show()

# Definição das variáveis para gerar IDW

In [None]:
# Gerando IDW e mapas
latitude = df_recortado_lat_lon['Latitude']
longitude = df_recortado_lat_lon['Longitude']

list_long = df_recortado_lat_lon['Longitude'].tolist()
list_lat = df_recortado_lat_lon['Latitude'].tolist()

polygon = subbacia_select.geometry.iloc[0]

min_x, min_y, max_x, max_y = polygon.bounds

list_long.append(min_x)
list_long.append(max_x)
list_lat.append(min_y)
list_lat.append(max_y)

In [None]:
n = 28 ## Numero de pontos para interpolar
nx, ny = 100, 100 # Malha de interpolacao, matrix 100x100
x = longitude # Eixo X vira longitude
y = latitude # Eixo Y vira latitude
xi = np.linspace(min(list_long), max(list_long), nx) # funcao linspace divide long em quantidade do grid especificado
yi = np.linspace(min(list_lat), max(list_lat), ny)
xi, yi = np.meshgrid(xi, yi) # Gera a matrix xi,yi (100x100 neste caso)
xi, yi = xi.flatten(), yi.flatten()

### Evento teste com função IDW, apenas (Ignorar saída)

In [None]:
# @title
# TENTATIVA COM GEOPANDAS

import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt

def idw_from_geometries(df, geometry_col, value_col, xi, yi):
    longitude = df[geometry_col].x.values
    latitude = df[geometry_col].y.values
    values = df[col].values

    grid1 = simple_idw(longitude, latitude, values, xi, yi)
    grid1 = grid1.reshape((ny, nx))

    plot(df[geometry_col], values, grid1)

def simple_idw(longitude, latitude, values, xi, yi):
    dist = distance_matrix(longitude, latitude, xi, yi)
    weights = 1.0 / dist**2
    weights /= weights.sum(axis=0)
    zi = np.dot(weights.T, values)
    return zi

def distance_matrix(x0, y0, x1, y1):
    obs = np.vstack((x0, y0)).T
    interp = np.vstack((x1, y1)).T
    d0 = np.subtract.outer(obs[:, 0], interp[:, 0])
    d1 = np.subtract.outer(obs[:, 1], interp[:, 1])
    return np.hypot(d0, d1)

def plot(geometries, values, z_grid):
    plt.figure()

    min_x, max_x, min_y, max_y = geometries.total_bounds
    plt.imshow(z_grid, extent=(min_x, max_x, min_y, max_y), origin='lower')
    plt.scatter(geometries.x, geometries.y, c=values, edgecolors='k', marker='o')
    plt.colorbar(label='Value')
    plt.title("Interpolated Map")
    plt.ylabel("Latitude")
    plt.xlabel("Longitude")

if __name__ == '__main__':
    # Load your GeoPandas DataFrame with geometries and values
    gdf = geo_df#gpd.read_file('your_shapefile.shp')

    xi = np.linspace(min(gdf.geometry.x), max(gdf.geometry.x), nx)
    yi = np.linspace(min(gdf.geometry.y), max(gdf.geometry.y), ny)

    idw_from_geometries(gdf, 'geometry', 'value_column', xi, yi)


In [None]:
for col in df_ev1_transposto_t.columns:
    values = df_ev1_transposto_t[col].values
    idw_()

In [None]:
# Função que calcula as IDW ###

### Igonrar mapa de Saida

def idw_():
    # Calculate IDW
    grid1 = simple_idw(longitude,latitude,values,xi,yi)
    grid1 = grid1.reshape((ny, nx))

    # Comparisons...
    plot(longitude,latitude,values,grid1)

    #plt.show()

def simple_idw(longitude, latitude, values, xi, yi):
    dist = distance_matrix(longitude,latitude, xi,yi)

    # In IDW, weights are 1 / distance
    weights = 1.0 / dist**2

    # Make weights sum to one
    weights /= weights.sum(axis=0)

    # Multiply the weights for each interpolated point by all observed Z-values
    zi = np.dot(weights.T, values)
    return zi


def distance_matrix(x0, y0, x1, y1):
    obs = np.vstack((x0, y0)).T
    interp = np.vstack((x1, y1)).T

    # Make a distance matrix between pairwise observations
    # Note: from <http://stackoverflow.com/questions/1871536>
    # (Yay for ufuncs!)
    d0 = np.subtract.outer(obs[:,0], interp[:,0])
    d1 = np.subtract.outer(obs[:,1], interp[:,1])

    return np.hypot(d0, d1)


def plot(longitude,latitude,values,z_grid):
    plt.figure()
    plt.imshow(z_grid, extent=(longitude.min(), longitude.max(), latitude.min(), latitude.max()), origin='lower')
    plt.scatter(longitude,latitude,c=values,  edgecolors='k', marker='o')
    plt.colorbar(label=col)
    plt.title(f"Mapa Interpolado - {col}")
    plt.ylabel("Latitude")
    plt.xlabel("Longitude")

if __name__ == '__main__':
    idw_()


## Evento 1

In [None]:

# Supondo que as colunas de data estejam em formato de string, você pode convertê-las para datetime assim:
data_stations_.index = pd.to_datetime(data_stations_.index)
data_inicio_filtro = pd.to_datetime('2013-08-21')
data_fim_filtro = pd.to_datetime('2013-09-13')

df_ev1 = data_stations_.loc[(data_stations_.index >= data_inicio_filtro) & (data_stations_.index <= data_fim_filtro)]
df_ev1_transposto_t = df_ev1.T
df_ev1_transposto_t

# juntando os dataframes
df_final_ev1 = pd.merge(df_ev1_transposto_t, df_recortado_lat_lon, left_index=True, right_index=True)
df_final_ev1


# exportar para CSV, caso usuario queira gerar IDW no QGIS/ArcGIS
output_file = 'Evento_1_to_idw.csv'

# Exportar o DataFrame para o arquivo CSV
df_final_ev1.to_csv(output_file, index=True)


In [None]:
df_ev1_transposto_t

In [None]:
for col in df_ev1_transposto_t.columns:
    values = df_ev1_transposto_t[col].values
    idw_()

## Testes IDW Falhos

In [None]:
# poligono de mascara é a subbacia selecionada

mask_polygon = subbacia_select.geometry.iloc[0]
sistema_coordenadas = df_shape_sub_brasil.crs

print(sistema_coordenadas)

In [None]:
import cartopy.crs as ccrs
from cartopy.io.img_tiles import OSM
import cartopy.feature as cfeature
import rasterio
from rasterio.transform import from_origin

In [None]:
max(list_long)

In [None]:
def idw_():
    n = 28 ## Numero de pontos para interpolar
    nx, ny = 100, 100 # Grid de interpolacao
    x = geo_df['geometry'].x
    y = geo_df['geometry'].y
    #xi = np.linspace(max(list_long), min(list_long), nx) # funcao linspace divide long em quantidade do grid especificado
    #yi = np.linspace(max(list_lat), min(list_lat), ny)
    xi = np.linspace(longitude.min(), longitude.max(), nx)
    yi = np.linspace(latitude.min(), latitude.max(), ny)
    xi, yi = np.meshgrid(xi, yi)
    xi, yi = xi.flatten(), yi.flatten()

    # Calculate IDW
    grid1 = simple_idw(x,y,values,xi,yi)
    grid1 = grid1.reshape((ny, nx))

    # Comparisons...
    plot(x,y,values,grid1)
    plt.title('IDW')
    plt.show()

def simple_idw(x, y, values, xi, yi):
    dist = distance_matrix(x,y, xi,yi)

    # In IDW, weights are 1 / distance
    weights = 1.0 / dist**2

    # Make weights sum to one
    weights /= weights.sum(axis=0)
    #weights = np.flip(weights)
    # Multiply the weights for each interpolated point by all observed Z-values
    zi = np.dot(weights.T, values)
    return zi


def distance_matrix(x0, y0, x1, y1):
    obs = np.vstack((x0, y0)).T
    interp = np.vstack((x1, y1)).T

    # Make a distance matrix between pairwise observations
    # Note: from <http://stackoverflow.com/questions/1871536>
    # (Yay for ufuncs!)
    d0 = np.subtract.outer(obs[:,0], interp[:,0])
    d1 = np.subtract.outer(obs[:,1], interp[:,1])

    return np.hypot(d0, d1)


def plot(x,y,values,z_grid):
   plt.figure()
   plt.imshow(z_grid, extent=(min(list_long), max(list_long), min(list_lat), max(list_lat)))
   plt.scatter(x,y,c=values)
   plt.colorbar()

# def plot(x, y, values, z_grid, other_geometries=None):
#     plt.figure()
#     plt.imshow(z_grid, extent=(min(list_long), max(list_long), min(list_lat), max(list_lat)))
#     plt.scatter(x, y, c=values)

#     if other_geometries:
#         gdf = gpd.GeoDataFrame(geometry=subbacia_select)
#         gdf.plot(ax=plt.gca(), color='red')  # Plot other geometries in red

#     plt.colorbar()
#     plt.show()

if __name__ == '__main__':
    idw_()

## Evento 2

In [None]:
data_stations_.index = pd.to_datetime(data_stations_.index)
data_inicio_filtro = pd.to_datetime('2014-05-02')
data_fim_filtro = pd.to_datetime('2014-05-20')

df_ev2 = data_stations_.loc[(data_stations_.index >= data_inicio_filtro) & (data_stations_.index <= data_fim_filtro)]
df_ev2_transposto_t = df_ev2.T
df_ev2_transposto_t


# juntando os dataframes
df_final_ev2 = pd.merge(df_ev2_transposto_t, df_recortado_lat_lon, left_index=True, right_index=True)
df_final_ev2

# exportar para CSV
output_file_2 = 'Evento_2_to_idw.csv'

# Exportar o DataFrame para o arquivo CSV
df_final_ev2.to_csv(output_file_2, index=True)

In [None]:
df_ev2_transposto_t

In [None]:
for col in df_ev2_transposto_t.columns:
    values = df_ev2_transposto_t[col].values
    idw_()

## Evento 3

In [None]:
data_stations_.index = pd.to_datetime(data_stations_.index)
data_inicio_filtro = pd.to_datetime('2015-05-01')
data_fim_filtro = pd.to_datetime('2015-05-19')

df_ev3 = data_stations_.loc[(data_stations_.index >= data_inicio_filtro) & (data_stations_.index <= data_fim_filtro)]
df_ev3_transposto_t = df_ev3.T
df_ev3_transposto_t


# juntando os dataframes
df_final_ev3 = pd.merge(df_ev3_transposto_t, df_recortado_lat_lon, left_index=True, right_index=True)
df_final_ev3

#exportar para CSV
output_file_3 = 'Evento_3_to_idw.csv'

# Exportar o DataFrame para o arquivo CSV
df_final_ev3.to_csv(output_file_3, index=True)

In [None]:
df_ev3_transposto_t

In [None]:
for col in df_ev3_transposto_t.columns:
    values = df_ev3_transposto_t[col].values
    idw_()

## Evento 4

In [None]:
data_stations_.index = pd.to_datetime(data_stations_.index)
data_inicio_filtro = pd.to_datetime('2015-07-05')
data_fim_filtro = pd.to_datetime('2015-08-11')

df_ev4 = data_stations_.loc[(data_stations_.index >= data_inicio_filtro) & (data_stations_.index <= data_fim_filtro)]
df_ev4_transposto_t = df_ev4.T
df_ev4_transposto_t


# juntando os dataframes
df_final_ev4 = pd.merge(df_ev4_transposto_t, df_recortado_lat_lon, left_index=True, right_index=True)
df_final_ev4

#exportar para CSV
output_file_4 = 'Evento_4_to_idw.csv'

# Exportar o DataFrame para o arquivo CSV
df_final_ev4.to_csv(output_file_4, index=True)

In [None]:
df_ev4_transposto_t

In [None]:
for col in df_ev3_transposto_t.columns:
    values = df_ev3_transposto_t[col].values
    idw_()

## Evento 5

In [None]:
data_stations_.index = pd.to_datetime(data_stations_.index)
data_inicio_filtro = pd.to_datetime('2016-01-29')
data_fim_filtro = pd.to_datetime('2016-02-10')

df_ev5 = data_stations_.loc[(data_stations_.index >= data_inicio_filtro) & (data_stations_.index <= data_fim_filtro)]
df_ev5_transposto_t = df_ev5.T
df_ev5_transposto_t


# juntando os dataframes
df_final_ev5 = pd.merge(df_ev5_transposto_t, df_recortado_lat_lon, left_index=True, right_index=True)
df_final_ev5

#exportar para CSV
output_file_5 = 'Evento_5_to_idw.csv'

# Exportar o DataFrame para o arquivo CSV
df_final_ev5.to_csv(output_file_5, index=True)

In [None]:
df_ev5_transposto_t

In [None]:
for col in df_ev5_transposto_t.columns:
    values = df_ev5_transposto_t[col].values
    idw_()

## Evento 6

In [None]:
data_stations_.index = pd.to_datetime(data_stations_.index)
data_inicio_filtro = pd.to_datetime('2016-02-08')
data_fim_filtro = pd.to_datetime('2016-02-19')

df_ev6 = data_stations_.loc[(data_stations_.index >= data_inicio_filtro) & (data_stations_.index <= data_fim_filtro)]
df_ev6_transposto_t = df_ev6.T
df_ev6_transposto_t


# juntando os dataframes
df_final_ev6 = pd.merge(df_ev6_transposto_t, df_recortado_lat_lon, left_index=True, right_index=True)
df_final_ev6

#exportar para CSV
output_file_6 = 'Evento_6_to_idw.csv'

# Exportar o DataFrame para o arquivo CSV
df_final_ev6.to_csv(output_file_6, index=True)

In [None]:
df_ev6_transposto_t

In [None]:
for col in df_ev6_transposto_t.columns:
    values = df_ev6_transposto_t[col].values
    idw_()

## Evento 7

In [None]:
data_stations_.index = pd.to_datetime(data_stations_.index)
data_inicio_filtro = pd.to_datetime('2016-05-14')
data_fim_filtro = pd.to_datetime('2016-05-31')

df_ev7 = data_stations_.loc[(data_stations_.index >= data_inicio_filtro) & (data_stations_.index <= data_fim_filtro)]
df_ev7_transposto_t = df_ev7.T
df_ev7_transposto_t


# juntando os dataframes
df_final_ev7 = pd.merge(df_ev7_transposto_t, df_recortado_lat_lon, left_index=True, right_index=True)
df_final_ev7

#exportar para CSV
output_file_7 = 'Evento_7_to_idw.csv'

# Exportar o DataFrame para o arquivo CSV
df_final_ev7.to_csv(output_file_7, index=True)

In [None]:
df_ev7_transposto_t

In [None]:
for col in df_ev7_transposto_t.columns:
    values = df_ev7_transposto_t[col].values
    idw_()

## Evento 8

In [None]:
data_stations_.index = pd.to_datetime(data_stations_.index)
data_inicio_filtro = pd.to_datetime('2016-05-28')
data_fim_filtro = pd.to_datetime('2016-06-10')

df_ev8 = data_stations_.loc[(data_stations_.index >= data_inicio_filtro) & (data_stations_.index <= data_fim_filtro)]
df_ev8_transposto_t = df_ev8.T
df_ev8_transposto_t


# juntando os dataframes
df_final_ev8 = pd.merge(df_ev8_transposto_t, df_recortado_lat_lon, left_index=True, right_index=True)
df_final_ev8

#exportar para CSV
output_file_8 = 'Evento_8_to_idw.csv'

# Exportar o DataFrame para o arquivo CSV
df_final_ev8.to_csv(output_file_8, index=True)

In [None]:
df_ev8_transposto_t

In [None]:
for col in df_ev8_transposto_t.columns:
    values = df_ev8_transposto_t[col].values
    idw_()

## Evento 9

In [None]:
data_stations_.index = pd.to_datetime(data_stations_.index)
data_inicio_filtro = pd.to_datetime('2019-06-28')
data_fim_filtro = pd.to_datetime('2019-07-13')

df_ev9 = data_stations_.loc[(data_stations_.index >= data_inicio_filtro) & (data_stations_.index <= data_fim_filtro)]
df_ev9_transposto_t = df_ev9.T
df_ev9_transposto_t


# juntando os dataframes
df_final_ev9 = pd.merge(df_ev9_transposto_t, df_recortado_lat_lon, left_index=True, right_index=True)
#df_final_ev9

#exportar para CSV
output_file_9 = 'Evento_9_to_idw.csv'

# Exportar o DataFrame para o arquivo CSV
df_final_ev9.to_csv(output_file_9, index=True)

In [None]:
df_ev9_transposto_t

In [None]:
for col in df_ev9_transposto_t.columns:
    values = df_ev9_transposto_t[col].values
    idw_()

## Evento 10

In [None]:
data_stations_.index = pd.to_datetime(data_stations_.index)
data_inicio_filtro = pd.to_datetime('2019-07-13')
data_fim_filtro = pd.to_datetime('2019-07-23')

df_ev10 = data_stations_.loc[(data_stations_.index >= data_inicio_filtro) & (data_stations_.index <= data_fim_filtro)]
df_ev10_transposto_t = df_ev10.T
df_ev10_transposto_t


# juntando os dataframes
df_final_ev10 = pd.merge(df_ev10_transposto_t, df_recortado_lat_lon, left_index=True, right_index=True)
df_final_ev10

#exportar para CSV
#output_file_10 = 'Evento_10_to_idw.csv'

# Exportar o DataFrame para o arquivo CSV
#df_final_ev10.to_csv(output_file_10, index=True)

In [None]:
df_ev10_transposto_t

In [None]:
for col in df_ev10_transposto_t.columns:
    values = df_ev10_transposto_t[col].values
    idw_()

## Evento 11

In [None]:
data_stations_.index = pd.to_datetime(data_stations_.index)
data_inicio_filtro = pd.to_datetime('2019-08-30')
data_fim_filtro = pd.to_datetime('2019-09-11')

df_ev11 = data_stations_.loc[(data_stations_.index >= data_inicio_filtro) & (data_stations_.index <= data_fim_filtro)]
df_ev11_transposto_t = df_ev11.T
df_ev11_transposto_t


# juntando os dataframes
df_final_ev11 = pd.merge(df_ev11_transposto_t, df_recortado_lat_lon, left_index=True, right_index=True)
df_final_ev11

#exportar para CSV
output_file_11 = 'Evento_11_to_idw.csv'

# Exportar o DataFrame para o arquivo CSV
df_final_ev11.to_csv(output_file_11, index=True)

In [None]:
df_ev11_transposto_t

In [None]:
for col in df_ev11_transposto_t.columns:
    values = df_ev11_transposto_t[col].values
    idw_()

# visual

### Painel de visualização gespla.visuals.py

In [None]:
#@title
def pannel_singlets(folder, dataframe_ts, varfield, dataframe_freq, bymonth, rangedetail, detail=True, flenm='Pannel', ylbls='m3/s',
                    datefield='Date', scale='log', show=False):
    """

    A customized function to plot a pannel for a single time series.


    :param folder: string of folder path. Ex: "C:/project/data"
    :param dataframe_ts: dataframe with the time series data
    :param varfield: string of variable field in the dataframe of time series
    :param dataframe_freq: dataframe of frequency analysis.
    Must have a filed called 'Exeedance' and a field called 'Values'. Recommendation: use the tsa.frequency() function to get it
    :param bymonth: dictionary of dataframes time series separated by month. Keys must be: 'January', 'February', ..., 'December'
    Recommendation: use the resample.group_by_month() function to get it
    :param rangedetail: tuple or list with strings of two range dates for the detail plot. Ex.: ('1990-12-29', '2000-01-30')
    :param detail: boolean to allow the insertion of a detail plot. Default: True
    :param flenm: string for the file name. Default: 'Pannel'
    :param ylbls: string for the Y axis label.
    :param datefield: string for the Date field in all dataframes. Default: 'Date'
    :param scale: string for the scale of Y axis on plot b and plot c. Default: 'log'.
    Options: 'linear', 'log', 'symlog', 'logit', 'function', 'functionlog'
    :param show: boolean to show plot instead of saving to file
    :return: string of file path
    """
    #
    fig = plt.figure(figsize=(10, 6))
    gs = mpl.gridspec.GridSpec(2, 3, wspace=0.4, hspace=0.3, top=0.95, bottom=0.1, left=0.1, right=0.95)
    # get dataframes
    df1 = dataframe_ts.copy()
    df2 = dataframe_freq.copy()
    #
    if detail:
        dt_0 = pd.to_datetime(rangedetail[0])
        dt_1 = pd.to_datetime(rangedetail[1])
        df_inset = df1.query('{} >= "{}" and {} < "{}"'.format(datefield, dt_0, datefield, dt_1))
    #
    # Series plot
    ax = plt.subplot(gs[0, :])
    aux_str = r'$\bf{' + 'a.  ' + '}$' + 'Full time series'
    plt.title(aux_str, fontsize=10, loc='left')
    plt.plot(df1[datefield], df1[varfield])
    plt.ylim(0, (1.3 * np.max(df1[varfield])))
    plt.ylabel(ylbls)
    # plt.xlabel('Time')
    plt.grid(True, 'major', axis='y')
    #
    # detail lines:
    if detail:
        ymax_h = np.max(df_inset[varfield].values)
        ymin_h = np.min(df_inset[varfield].values)
        lines_c = 'tab:orange'
        plt.plot([dt_0, dt_0], [ymin_h, ymax_h], lines_c)
        plt.plot([dt_1, dt_1], [ymin_h, ymax_h], lines_c)
        plt.plot([dt_0, dt_1], [ymin_h, ymin_h], lines_c)
        plt.plot([dt_0, dt_1], [ymax_h, ymax_h], lines_c)
        #
        # Detail plot
        inset = ax.inset_axes([0.05, 0.67, 0.2, 0.3])
        inset.plot(df_inset[datefield], df_inset[varfield])
        len_inset = len(df_inset[datefield].values)
        ticks = [df_inset[datefield].values[0], df_inset[datefield].values[int(len_inset/2)], df_inset[datefield].values[-1]]
        inset.set_xticks(ticks)
        inset.tick_params(axis='both', which='major', labelsize=8)
        inset.grid(True, 'both')
    #
    # Exceedance curve
    ax = plt.subplot(gs[1, 0])
    aux_str = r'$\bf{' + 'b.  ' + '}$' + 'Exceedance Prob. Curve'
    plt.title(aux_str, fontsize=10, loc='left')
    plt.plot(df2['Exceedance'], df2['Values'])
    plt.yscale(scale)
    plt.ylabel(ylbls)
    plt.xlabel('Exceedance probability (%)', fontsize=10)
    plt.grid(True, 'both')
    #
    # Violinplot
    tpl = ('January', 'February', 'March', 'April', 'May', 'June', 'July',
           'August', 'September', 'October', 'November', 'December')
    violin_data = list()
    for def_i in range(len(bymonth)):
        lcl_y = bymonth[tpl[def_i]][varfield].values[:]
        violin_data.append(lcl_y)
    ax = plt.subplot(gs[1, 1:])
    aux_str = r'$\bf{' + 'c.  ' + '}$' + 'Seasonality Analysis'
    plt.title(aux_str, fontsize=10, loc='left')
    plt.ylabel(ylbls)
    plt.yscale(scale)
    ax.violinplot(violin_data, showmedians=True)
    ax.set_xticks([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
    ax.set_xticklabels(list('JFMAMJJASOND'))
    plt.xlabel("Month", fontsize=10)
    #
    if show:
        plt.show()
    else:
        aux_str = folder + '/' + flenm + '.png'
        plt.savefig(aux_str)
        plt.close()
    return aux_str

In [None]:
#@title
df_freq_flow_m = tsa.frequency(dataframe=df_flow, var_field='Flow', zero=True)
by_month = resample.group_by_month(dataframe=df_flow, var_field='Flow')

In [None]:
#@title
df_flow_d2m = resample.d2m_flow(dataframe=df_flow, var_field='Flow')
print(df_flow_d2m.tail(10).to_string())
#df_recortado = df_flow_d2m[df_flow_d2m['Date'].dt.year >= 2000]

# a função pannel_singlets() exporta a figura com um painel visual
figfile = pannel_singlets(
    folder='./',
    dataframe_ts=df_flow_d2m,
    varfield='Mean',
    dataframe_freq=df_freq_flow_m,
    bymonth=by_month,
    rangedetail=('2022-01-01', '2023-01-01'),
    scale='linear',
    #Options: 'linear', 'log', 'symlog', 'logit', 'function', 'functionlog'
    show=True)

# Medidas de acuracia do modelo

In [None]:
ERRO = analyst.error(obs, sim) #Error function
SQ_ERROR = analyst.sq_error(obs, sim) #Squared Error function
MSE = analyst.mse(obs, sim) #Mean Squared Error (MSE) function
RMSE = analyst.rmse(obs, sim)  #Root of Mean Squared Error (RMSE) function
NRMSE = analyst.nrmse(obs, sim) #Normalized RMSE by the mean observed value
NSE = analyst.nse(obs, sim) #Nash-Sutcliffe Efficiency (NSE) coeficient
NNSE = analyst.nnse(obs, sim) # Normalized NSE function (NSE re-scaled to [0,1])
KGE = analyst.kge(obs, sim) # Kling-Gupta Efficiency (KGE) coeficient Gupta et al. (2009)
PBIAS = analyst.pbias(obs, sim) # P-Bias function
    #Negative P-Bias ->> Too much water! - ET have to work harder to remove water from the system
    #Positive P-Bias ->> Too less water! -  ET is draining too much water
REG_LIN = analyst.linreg(obs, sim) # Linear regression model function

# TESTE USO BASEFLOW

In [None]:
import baseflow

Q = np.array(df_flow['Flow'])

#KGE=King-Gupta Efficiency

b, KGEs = baseflow.separation(Q, date=None, area=None, ice=None, method='Eckhardt', return_kge=True)
print(f'Best Method: {b.dtype.names[KGEs.argmax()]}')

In [None]:
Q

In [None]:
plt(b)

In [None]:
KGEs

In [None]:

# Transformar a coluna 'coluna1' em um array
array = np.array(df['coluna1'])

# Imprimir o array resultante
print(array)