In [1]:
import numpy as np
import pandas as pd
import random

import torch
import torch.nn as nn
import torch.nn.functional as F

import seaborn as sns
import matplotlib.pyplot as plt
import os

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from collections import OrderedDict
from tqdm import tqdm

import warnings
warnings.filterwarnings("ignore")

In [2]:
# CUDA support 
if torch.cuda.is_available():
    device = torch.device('cuda:3')
else:
    device = torch.device('cpu')
    
print(device)
device =  torch.device('cpu')

cpu


In [3]:
print(device)

cpu


In [4]:
# the deep neural network
class MLP(torch.nn.Module):
    def __init__(self, layers, activation="relu", init="xavier"):
        super(MLP, self).__init__()
        
        # parameters
        self.depth = len(layers) - 1
        
        if activation == "relu":
            self.activation = torch.nn.ReLU()
        elif activation == "tanh":
            self.activation = torch.nn.Tanh()
        elif activation == "gelu":
            self.activation = torch.nn.GELU()
        else:
            raise ValueError("Unspecified activation type")
        
        
        layer_list = list()
        for i in range(self.depth - 1): 
            layer_list.append(
                ('layer_%d' % i, torch.nn.Linear(layers[i], layers[i+1]))
            )
            layer_list.append(('activation_%d' % i, self.activation))
            
        layer_list.append(
            ('layer_%d' % (self.depth - 1), torch.nn.Linear(layers[-2], layers[-1]))
        )
        layerDict = OrderedDict(layer_list)
        
        # deploy layers
        self.layers = torch.nn.Sequential(layerDict)

        if init=="xavier":
            self.xavier_init_weights()
        elif init=="kaiming":
            self.kaiming_init_weights()
    
    def xavier_init_weights(self):
        with torch.no_grad():
            print("Initializing Network with Xavier Initialization..")
            for m in self.layers.modules():
                if hasattr(m, 'weight'):
                    nn.init.xavier_uniform_(m.weight)
                    m.bias.data.fill_(0.0)

    def kaiming_init_weights(self):
        with torch.no_grad():
            print("Initializing Network with Kaiming Initialization..")
            for m in self.layers.modules():
                if hasattr(m, 'weight'):
                    nn.init.kaiming_uniform_(m.weight)
                    m.bias.data.fill_(0.0)
                        
    def forward(self, x):
        out = self.layers(x)
        return out
    
class DataGenerator(torch.utils.data.Dataset):
    def __init__(self, X, Y):
        self.X = X
        self.Y = Y
        
    def __getitem__(self, index):
        return self.X[index], self.Y[index]
    
    def __len__(self):
        return len(self.X)

In [5]:
data_df = pd.read_csv("all_data_lake_modeling_in_time.csv")
data_df = data_df.drop(columns=['time'])
data_df

Unnamed: 0,depth,AirTemp_degC,Longwave_Wm-2,Latent_Wm-2,Sensible_Wm-2,Shortwave_Wm-2,lightExtinct_m-1,ShearVelocity_mS-1,ShearStress_Nm-2,Area_m2,...,day_of_year,time_of_day,temp_mix03,temp_conv04,temp_initial00,obs_temp,input_obs,ice,snow,snowice
0,1,-1.220007,589.518946,-28.506582,-33.863764,0.0,0.4,-999.0,-999.0,36000000.0,...,1,2,5.151141,5.318267,5.386202,5.318267,-999.0,0.000000,0.000000,0.00000
1,2,-1.220007,589.518946,-28.506582,-33.863764,0.0,0.4,-999.0,-999.0,36000000.0,...,1,2,5.353410,5.344124,5.423218,5.344124,-999.0,0.000000,0.000000,0.00000
2,3,-1.220007,589.518946,-28.506582,-33.863764,0.0,0.4,-999.0,-999.0,36000000.0,...,1,2,5.423272,5.388624,5.423218,5.388624,-999.0,0.000000,0.000000,0.00000
3,4,-1.220007,589.518946,-28.506582,-33.863764,0.0,0.4,-999.0,-999.0,36000000.0,...,1,2,5.461295,5.411186,5.461288,5.411186,-999.0,0.000000,0.000000,0.00000
4,5,-1.220007,589.518946,-28.506582,-33.863764,0.0,0.4,-999.0,-999.0,36000000.0,...,1,2,5.499891,5.454971,5.500005,5.454971,-999.0,0.000000,0.000000,0.00000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
875945,46,-6.479987,511.404030,-32.793318,-51.109839,0.0,0.4,-999.0,-999.0,36000000.0,...,1,0,4.080505,4.080505,4.080417,4.080505,-999.0,0.399259,0.035893,0.07825
875946,47,-6.479987,511.404030,-32.793318,-51.109839,0.0,0.4,-999.0,-999.0,36000000.0,...,1,0,4.190838,4.190838,4.190740,4.190838,-999.0,0.399259,0.035893,0.07825
875947,48,-6.479987,511.404030,-32.793318,-51.109839,0.0,0.4,-999.0,-999.0,36000000.0,...,1,0,4.287957,4.287957,4.287885,4.287957,-999.0,0.399259,0.035893,0.07825
875948,49,-6.479987,511.404030,-32.793318,-51.109839,0.0,0.4,-999.0,-999.0,36000000.0,...,1,0,4.381492,4.381492,4.381462,4.381492,-999.0,0.399259,0.035893,0.07825


In [6]:
training_frac = 0.60
depth_steps = 50
number_days = len(data_df)//depth_steps
n_obs = int(number_days*training_frac)*depth_steps
print(f"Number of days total: {number_days}")
print(f"Number of training points: {n_obs}")

Number of days total: 17519
Number of training points: 525550


# Normalizing Data

In [7]:
data = data_df.values

train_data = data[:n_obs]
test_data = data[n_obs:]

#performing normalization on all the columns
scaler = StandardScaler()
scaler.fit(train_data)
train_data = scaler.transform(train_data)
test_data = scaler.transform(test_data)

# Training Heat Diffusion Model

In [8]:
input_columns = ['depth', 'AirTemp_degC', 'Longwave_Wm-2', 'Latent_Wm-2', 'Sensible_Wm-2', 'Shortwave_Wm-2',
                'lightExtinct_m-1', 'Area_m2', 
                 'buoyancy', 'day_of_year', 'time_of_day', 'diffusivity' ,'temp_heat01']
output_columns = ['temp_diff02']
#, 'ShearVelocity_mS-1', 'ShearStress_Nm-2'

input_column_ix = [data_df.columns.get_loc(column) for column in input_columns]
output_column_ix = [data_df.columns.get_loc(column) for column in output_columns]

X_train, X_test = train_data[:,input_column_ix], test_data[:,input_column_ix]
y_train, y_test = train_data[:,output_column_ix], test_data[:,output_column_ix]

In [9]:
print(f"X_train: {X_train.shape}, X_test: {X_test.shape}")
print(f"y_train: {y_train.shape}, y_test: {y_test.shape}")

X_train: (525550, 13), X_test: (350400, 13)
y_train: (525550, 1), y_test: (350400, 1)


In [10]:
#keeping track of the mean and standard deviations
train_mean = scaler.mean_
train_std = scaler.scale_

input_mean, input_std = train_mean[input_column_ix], train_std[input_column_ix]
output_mean, output_std = train_mean[output_column_ix], train_std[output_column_ix]

In [11]:
# Create data set
batch_size = 1000

assert batch_size % 50 ==0, "Batchsize has to be multiple of 25" 

train_dataset = DataGenerator(X_train, y_train)
test_dataset = DataGenerator(X_test, y_test)
# train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
# test_dataset = torch.utils.data.TensorDataset(X_test, y_test)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, 
                                           shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size,
                                          shuffle=False)

In [12]:
layers = [X_train.shape[-1], 32, 32, y_train.shape[-1]]

model = MLP(layers, activation="gelu").to(device)

Initializing Network with Xavier Initialization..


In [13]:
lr = 1e-3
decay_rate = 0.1
decay_steps = 500


optimizer = torch.optim.Adam(model.parameters(), lr=lr, 
                         betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=decay_steps, gamma=decay_rate)

criterion = torch.nn.MSELoss()

In [14]:
print(model)

MLP(
  (activation): GELU()
  (layers): Sequential(
    (layer_0): Linear(in_features=13, out_features=32, bias=True)
    (activation_0): GELU()
    (layer_1): Linear(in_features=32, out_features=32, bias=True)
    (activation_1): GELU()
    (layer_2): Linear(in_features=32, out_features=1, bias=True)
  )
)


In [15]:
# mean_diff = torch.tensor(input_mean[input_column_ix[13]]).to(device)
# std_diff = torch.tensor(input_std[input_column_ix[13]]).to(device)

# mean_temp = torch.tensor(input_mean[input_column_ix[14]]).to(device)
# std_temp = torch.tensor(input_std[input_column_ix[14]]).to(device)

# mean_out = torch.tensor(output_mean).to(device)
# std_out = torch.tensor(output_std).to(device)
    
# def implicit_diffusion(diff, temp, dt=3600, dx=1, depth_steps=25):
#     # de-normalise data
#     diff = diff * std_diff + mean_diff

#     # INPUT DATA FROM PREVIOUS MODULE
#     t = temp * std_temp + mean_temp # temperature profile from previous module output

#     # IMPLEMENTATION OF CRANK-NICHOLSON SCHEME
#     j = len(t)
#     y = torch.zeros((len(t), len(t)), dtype=torch.float64).to(device)

#     alpha = (dt/dx**2) * diff

#     az = - alpha # subdiagonal
#     bz = 2 * (1 + alpha) # diagonal
#     cz = - alpha # superdiagonal

#     bz[0] = 1
#     az[len(az)-2] = 0
#     bz[len(bz)-1] = 1
#     cz[0] = 0

#     az = az[1:,:]
#     cz = cz[:-1,:]

#     y = torch.diag(bz[:, 0])+torch.diag(az[:, 0],-1)+torch.diag(cz[:, 0],1) #slightly efficient way of computing the diagonal matrices
#     y[j-1, j-1] = 1
    
#     mn = torch.zeros_like(t)  
#     mn[0] = t[0]
#     mn[len(mn)-1] = t[len(t)-1]
    
#     mn[1:j-1] = alpha[1:j-1,0]*t[:j-2] + 2 * (1 - alpha[1:j-1,0])*t[1:j-1] + alpha[1:j-1,0]*t[1:j-1] #is be same as the loop
    
#     # DERIVED TEMPERATURE OUTPUT FOR NEXT MODULE
#     proj = torch.linalg.solve(y, mn)

#     mean, std, var = torch.mean(proj), torch.std(proj), torch.var(proj)
#     proj = (proj-mean_out)/std_out

#     proj = proj.to(torch.double)
#     return proj

In [16]:
print(input_column_ix)
print(input_column_ix[11])
print(input_column_ix[12])
print(input_mean)
print( torch.tensor(input_mean[11]).to(device))
print(torch.tensor(input_mean[12]).to(device))

[0, 1, 2, 3, 4, 5, 6, 9, 31, 35, 36, 32, 33]
32
33
[ 2.55000000e+01  6.66331478e+00  6.35473963e+02 -3.27292232e+01
 -1.18507712e+01  1.58969885e+02  4.00000000e-01  3.60000000e+07
  2.86277967e-04  1.58688612e+02  1.15009990e+01  2.60421347e-02
  5.28905140e+00]
tensor(0.0260, dtype=torch.float64)
tensor(5.2891, dtype=torch.float64)


In [23]:
#mean_diff = torch.tensor(input_mean[input_column_ix[11]]).to(device)
#std_diff = torch.tensor(input_std[input_column_ix[11]]).to(device)

#mean_temp = torch.tensor(input_mean[input_column_ix[12]]).to(device)
#std_temp = torch.tensor(input_std[input_column_ix[12]]).to(device)

mean_diff = torch.tensor(input_mean[11]).to(device)
std_diff = torch.tensor(input_std[11]).to(device)

mean_temp = torch.tensor(input_mean[12]).to(device)
std_temp = torch.tensor(input_std[12]).to(device)

mean_out = torch.tensor(output_mean).to(device)
std_out = torch.tensor(output_std).to(device)
    
def implicit_diffusion(diff, temp, dt=3600, dx=0.5, depth_steps=50):
    # de-normalise data
    diff = diff * std_diff + mean_diff

    diff = diff.view(-1, depth_steps)
    
    # INPUT DATA FROM PREVIOUS MODULE
    t = temp * std_temp + mean_temp # temperature profile from previous module output
    t = t.view(-1, depth_steps)
    
    # IMPLEMENTATION OF CRANK-NICHOLSON SCHEME
#     len_t = t.shape[1]
    y = torch.zeros((t.shape[0], depth_steps, depth_steps), dtype=torch.float64).to(device)

    alpha = (dt/dx**2) * diff

    az = - alpha # subdiagonal
    bz = 2 * (1 + alpha) # diagonal
    cz = - alpha # superdiagonal
    
    bz[:, 0] = 1
    az[:, depth_steps-2] = 0
    bz[:, depth_steps-1] = 1
    cz[:, 0] = 0
    
    az = az[:,1:]
    cz = cz[:,:-1]

    y = torch.diag_embed(bz, offset=0)+torch.diag_embed(az,offset=-1)+torch.diag_embed(cz,offset=1) #slightly efficient way of computing the diagonal matrices
    y[:, depth_steps-1, depth_steps-1] = 1
    
    mn = torch.zeros_like(t)  
    mn[:, 0] = t[:, 0]
    mn[:,depth_steps-1] = t[:, depth_steps-1]
    
    mn[:, 1:depth_steps-1] = alpha[:, 1:depth_steps-1]*t[:, :depth_steps-2] + 2 * (1 - alpha[:,1:depth_steps-1])*t[:,1:depth_steps-1] + alpha[:,1:depth_steps-1]*t[:,1:depth_steps-1] #is be same as the loop
    
    # DERIVED TEMPERATURE OUTPUT FOR NEXT MODULE
    proj = torch.linalg.solve(y, mn)

    mean, std, var = torch.mean(proj), torch.std(proj), torch.var(proj)
    proj = (proj-mean_out)/std_out

    proj = proj.to(torch.float32)
    proj = proj.view(-1, 1)
    return proj

In [18]:
# diffusivity_true = torch.tensor(X_train[:,input_column_ix[13]], device=device).unsqueeze(1)
# temp_heat_true = torch.tensor(X_train[:,input_column_ix[14]], device=device)#.unsqueeze(1)
# mean_diff = torch.tensor(input_mean[input_column_ix[13]]).to(device)
# std_diff = torch.tensor(input_std[input_column_ix[13]]).to(device)
# print(mean_diff, std_diff)

# pred = implicit_diffusion(diff=diffusivity_true, 
#                           temp=temp_heat_true)

# print(torch.mean((pred-y_train)**2))

In [19]:
# time = 20
# # print(pred[25*time:25*(time+1)])
# # print(y_train[25*time:25*(time+1)])
# print((pred[25*time:25*(time+1)]-y_train[25*time:25*(time+1)]).abs())

In [21]:
# # test if the Crank-Nicholson scheme works

temp = torch.rand(50,1).to(device)
diff = torch.rand(50,1).to(device)
print(temp), print(diff)
implicit_diffusion(diff, temp, dt=3600, dx=0.5, depth_steps=50)

tensor([[0.1577],
        [0.0824],
        [0.6146],
        [0.3225],
        [0.2618],
        [0.4812],
        [0.5816],
        [0.3244],
        [0.6956],
        [0.8011],
        [0.1033],
        [0.1026],
        [0.6523],
        [0.0913],
        [0.6203],
        [0.4299],
        [0.3528],
        [0.6935],
        [0.1499],
        [0.2075],
        [0.3029],
        [0.9026],
        [0.1235],
        [0.2483],
        [0.7225],
        [0.9786],
        [0.7033],
        [0.1810],
        [0.1738],
        [0.2951],
        [0.3783],
        [0.2871],
        [0.7155],
        [0.3060],
        [0.8976],
        [0.7875],
        [0.1228],
        [0.7639],
        [0.4032],
        [0.7205],
        [0.3877],
        [0.9195],
        [0.3738],
        [0.3984],
        [0.2412],
        [0.2136],
        [0.4827],
        [0.9127],
        [0.4722],
        [0.6374]])
tensor([[0.4810],
        [0.9352],
        [0.1537],
        [0.1012],
        [0.7774],
        [

tensor([[ 0.1584],
        [-0.1185],
        [-0.4709],
        [-0.2940],
        [-0.4113],
        [-0.5899],
        [-0.5501],
        [-0.4112],
        [-0.5302],
        [-0.2794],
        [ 0.0761],
        [-0.2665],
        [-0.6101],
        [-0.4048],
        [-0.7617],
        [-0.5925],
        [-0.6147],
        [-0.7159],
        [-0.4784],
        [-0.7867],
        [-1.0384],
        [-1.1960],
        [-0.7607],
        [-1.1068],
        [-1.3302],
        [-1.0820],
        [-0.5791],
        [-0.3573],
        [-0.6591],
        [-0.9693],
        [-1.1630],
        [-1.2750],
        [-1.4800],
        [-1.2582],
        [-1.4496],
        [-1.0517],
        [-0.7655],
        [-1.1449],
        [-0.8869],
        [-0.9922],
        [-0.7822],
        [-0.9075],
        [-0.5038],
        [-0.6480],
        [-0.7692],
        [-1.0494],
        [-1.3583],
        [-1.4054],
        [-1.0274],
        [-1.4730]])

In [25]:
n_epochs = 100

train_loss = []
test_loss = []
for it in tqdm(range(n_epochs)):
    loss_epoch = 0
    model.train()
    for x, y in iter(train_loader):
        x, y = x.to(device).float(), y.to(device).float()
        
        # get temperature input
        temp_input = x[:,12]
        
        print(x)
        optimizer.zero_grad()
        proj = model(x)
        
        print(proj)
        breakpoint()

        pred = implicit_diffusion(proj, temp_input)

#         pred = pred.to(dtype=torch.float32)
        
#         print(pred.mean(), y.mean(), pred.std(), y.std())
        loss = criterion(pred, y)
        
        loss.backward()
        optimizer.step()
        loss_epoch += loss.detach().item()
    lr_scheduler.step()
    
    if it % 50 == 0:
        train_loss.append(loss_epoch/len(train_loader))
        model.eval()
        test_loss_epoch = 0
        for x, y in iter(test_loader):
            x, y = x.to(device).float(), y.to(device).float()
            temp_input = x[:,12] #* std + mean

            optimizer.zero_grad()
            proj = model(x)

            pred = implicit_diffusion(proj, temp_input)

            loss = criterion(pred, y)
            test_loss_epoch += loss.detach().item()
        test_loss.append(test_loss_epoch/len(test_loader))
        print(f"Epoch : {it}, Train_loss: {train_loss[-1]}, Test_loss: {test_loss[-1]}")
    

  0%|          | 0/100 [00:00<?, ?it/s]

tensor([[nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [n

--KeyboardInterrupt--

KeyboardInterrupt: Interrupted by user
tensor([[nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        

ipdb> q


  0%|          | 0/100 [00:20<?, ?it/s]


BdbQuit: 

In [None]:
plt.figure(figsize=(8,6))
plt.plot(train_loss, label="Train", linewidth=2.5)
plt.plot(test_loss, label="Test", linewidth=2.5)
plt.grid("on", alpha=0.2)
plt.legend(fontsize=18)
plt.yscale("log")
plt.xlabel("Epochs", fontsize=18)
plt.ylabel("Loss", fontsize=18)
plt.show()

# Evaluating Results

In [None]:
def rmse(true, pred):
    return (((true-pred)**2).mean()**0.5).detach().cpu().numpy()

def l2_error(true, pred):
    return np.linalg.norm(pred.detach().cpu().numpy() - true.detach().cpu().numpy()) / np.linalg.norm(true.detach().cpu().numpy()) 

def compute_metrics(model, loader, mean=0.0, std=1.0):
    model.eval()
    y_ = []
    pred_ = []
    mean = torch.tensor(mean).to(device)
    std = torch.tensor(std).to(device)
    for x, y in iter(loader):
        x, y = x.to(device).float(), y.to(device).float()
        pred = model(x)
        
        temp_input = x[:,14]
        proj = model(x)
        pred = implicit_diffusion(proj, temp_input)        
        pred = pred.to(dtype=torch.float32)
        
#         print(torch.mean((pred-y)**2))
#         print(y.shape)
        
        y = y * std + mean
        pred = pred * std + mean
        
        y_.append(y)
        pred_.append(pred)
    y_ = torch.cat(y_, dim=0) 
    pred_ = torch.cat(pred_, dim=0)
    
    rmse_temp = rmse(y_, pred_)
    l2_error_temp = l2_error(y_, pred_)
    return rmse_temp, l2_error_temp

In [None]:
rmse_temp, l2_error_temp = compute_metrics(model, test_loader,  mean = output_mean, std = output_std)
print(f"Test Rmse of Temp: {rmse_temp}")
print(f"L2 Error  of Temp: {l2_error_temp}")

In [None]:
rmse_temp, l2_error_temp = compute_metrics(model, train_loader,  mean = output_mean, std = output_std)
print(f"Train Rmse of Temp: {rmse_temp}")
print(f"L2 Error  of Temp: {l2_error_temp}")

# Saving Model

In [None]:
PATH = f"./saved_models/heat_diffusion_model_time.pth"
torch.save(model.state_dict(), PATH)

In [None]:
output_mean