In [None]:
from collections import defaultdict
import math
import numpy as np
import pandas as pd

#@title Debugging
# See https://zohaib.me/debugging-in-google-collab-notebook/ for tips,
# as well as docs for pdb and ipdb.
DEBUG = True #@param {type:"boolean"}
GDRIVE_BASE = "/content/drive" #@param
DATAFRAME_PATH = "/MyDrive/monthly_large.csv"

def get_dataframe_path_from_param() -> str:
  root = GDRIVE_BASE if GDRIVE_BASE else ""
  return f"{root}{DATAFRAME_PATH}"

In [None]:
# Access data stored on Google Drive
if GDRIVE_BASE:
    from google.colab import drive
    drive.mount(GDRIVE_BASE)

if DEBUG:
    %pip install -Uqq ipdb
    import ipdb
    %pdb on

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

def mean_variance_loss(real, predicted):
    (real_value, real_variance) = real
    (predicted_value, predicted_variance) = predicted

    loss = tf.reduce_mean(tf.square(real_value - predicted_value))
    var_from_point = tf.square(real_value - predicted_value)/2
    var_loss = tf.reduce_mean(tf.math.log(var_from_point))
    return mean_loss + var_loss

def train_nn(
        dataset: pd.Dataframe,
        hidden_layers: List[int],
        epochs: int,
        batch_size: int):
    # Layers share between mean and variance regressors.
    shared_layers = []
    for num_nodes in hidden_layers:
        shared_layers.append(layers.Dense(num_nodes, activation='relu'))

    #Initialize input layers and connect them to shared layers.
    num_inputs = dataset.shape[1]
    inputs = keras.Input(shape=(num_inputs,))
    x = inputs
    for shared_layer in shared_layers:
        x = shared_layer(x)

    # Output is variance and mean, and connect to shared nodes.
    mean_output_layer = layers.Dense(1, activation='linear')
    mean_output_node = mean_output_layer(x)
    variance_output_layer = layers.Dense(1, activation='relu')
    variance_output_node = variance_output_layer(x)
    outputs = [mean_output_node, variance_output_node]

    model = keras.Model(inputs=inputs, outputs=outputs)

    model.compile(optimizer='adam', loss=mean_variance_loss)
    X = None #TODO: group_by sample site
    Y_mean = None 
    Y_var = None
    model.Train(X, [Y_mean, Y_var], epochs=epochs, batch_size=batch_size)
    return model

In [None]:
pd.read_csv(get_dataframe_path_from_param())
pd.describe()