In [None]:
import settings

import os

import torch

import pandas as pd

from torch import nn
from helper import *
from datetime import datetime
from torch.utils.data import DataLoader
from utils import GenericDataFrameDataset

# Get the current timestamp
current_time = datetime.now()
# Format the timestamp as YY-MM-DD-HH-MM
formatted_time = current_time.strftime("%y-%m-%d-%H-%M")
MODEL_NAME = f"fnn_{formatted_time}"
VISUAL_DIR = settings.VISUAL_DIR
DATA_DIR = settings.DATA_DIR
MODEL_DIR = settings.MODEL_DIR
# Create working dir
WORKING_DIR = os.path.join(MODEL_DIR, MODEL_NAME)
create_folder_if_not_exists(WORKING_DIR)

In [None]:
DATA_PATH = os.path.join(DATA_DIR, "processed.csv")
DATA_SPLIT_RATIO = 0.7
RAW_COLUMNS = [
    "inlet flow",
    "inlet COD",
    "inlet ammonia nitrogen",
    "inlet total nitrogen",
    "inlet phosphorus",
    "outlet COD",
    "outlet ammonia nitrogen",
    "outlet total nitrogen",
    "outlet phosphorus",
    "line 1 nitrate nitrogen",
    "line 2 nitrate nitrogen",
    "line 1 pump speed",
    "line 2 pump speed",
    "PAC pump 1 speed",
    "PAC pump 2 speed",
]

X_COLUMNS = RAW_COLUMNS[:-4]
Y_COLUMNS = RAW_COLUMNS[-4:]

TGT_COLUMNS = "line 1 pump speed discrete"
BATCH_SIZE = 32

In [None]:
def load_data(data_path) -> pd.DataFrame:
    data = pd.read_csv(
        data_path,
        low_memory=False,
        index_col=0,
        parse_dates=["timestamp"],
    )
    train_size = int(data.shape[0] * DATA_SPLIT_RATIO)
    val_size = data.shape[0] - train_size
    train_data = pd.concat([data[:int(train_size/2)], data[int(train_size/2)+val_size:]])
    val_data = data[int(train_size/2):int(train_size/2)+val_size]
    return train_data, val_data

train_data, val_data = load_data(DATA_PATH)

In [None]:
train_data.info()

In [None]:
val_data.info()

In [None]:
def dataframe_to_loader(dataframe_X: pd.DataFrame, dataframe_y: pd.DataFrame, batch_size: int = 256) -> DataLoader:
    # Dataset
    dataset = GenericDataFrameDataset(dataframe_X, dataframe_y)
    loader = DataLoader(dataset, batch_size = batch_size, shuffle=True)
    return loader

train_loader = dataframe_to_loader(train_data[X_COLUMNS], train_data[TGT_COLUMNS])
val_data = dataframe_to_loader(val_data[X_COLUMNS], val_data[TGT_COLUMNS])