# Explore CWatM data

In [None]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append("..")

from pathlib import Path

from tqdm.notebook import tqdm
import pandas as pd
import xarray as xr

import plotly.express as px

import src.data.cwatm_data as cwatm_data

In [15]:
PROCESSED_DATA_FOLDER_PATH = Path("../data/processed")

## Load CWatM data

In [16]:
all_df = xr.open_dataset(PROCESSED_DATA_FOLDER_PATH.joinpath("CWatM_data", "all.nc")).to_dataframe()
forcings_df = xr.open_dataset(PROCESSED_DATA_FOLDER_PATH.joinpath("CWatM_data", "forcings.nc")).to_dataframe()
outputs_df = xr.open_dataset(PROCESSED_DATA_FOLDER_PATH.joinpath("CWatM_data", "outputs.nc")).to_dataframe()

In [None]:
all_df

In [None]:
forcings_df

In [None]:
outputs_df

### Process the data

In [None]:
all_land_df = cwatm_data.process_inputs_df(all_df)
all_land_df

In [None]:
forcings_land_df = forcings_df.loc[all_land_df.index]
forcings_land_df

In [None]:
outputs_land_df = outputs_df.loc[all_land_df.index]
outputs_land_df

In [29]:
all_land_df.to_parquet(PROCESSED_DATA_FOLDER_PATH.joinpath("CWatM_data", "all_land.parquet"))
forcings_land_df.to_parquet(PROCESSED_DATA_FOLDER_PATH.joinpath("CWatM_data", "forcings_land.parquet"))
outputs_land_df.to_parquet(PROCESSED_DATA_FOLDER_PATH.joinpath("CWatM_data", "outputs_land.parquet"))

## Load CWatM `_land` data

In [11]:
all_land_df = xr.open_dataset(PROCESSED_DATA_FOLDER_PATH.joinpath("CWatM_data", "all_land.nc")).to_dataframe()
forcings_land_df = xr.open_dataset(PROCESSED_DATA_FOLDER_PATH.joinpath("CWatM_data", "forcings_land.nc")).to_dataframe()
outputs_land_df = xr.open_dataset(PROCESSED_DATA_FOLDER_PATH.joinpath("CWatM_data", "outputs_land.nc")).to_dataframe()

In [None]:
all_land_df.describe()

In [None]:
forcings_land_df.describe()

In [None]:
outputs_land_df.describe()

## Visualize data

In [None]:
from src.visualization import visualize


INPUTS_COLUMNS = []
FORCINGS_COLUMNS = ["pr", "rsds"]
OUTPUTS_COLUMNS = ["evap-total", "potevap", "qr", "qtot"]

data_df = pd.concat((all_land_df[INPUTS_COLUMNS], forcings_land_df[FORCINGS_COLUMNS], outputs_land_df[OUTPUTS_COLUMNS]), axis=1)

visualize.plot_scatter_with_dropdown(df=data_df,
                                     default_x="pr",
                                     default_y="potevap",
                                     valid_x=INPUTS_COLUMNS + FORCINGS_COLUMNS,
                                     valid_y=OUTPUTS_COLUMNS)