In [41]:
import numpy as np
import pandas as pd
import random

import torch
import torch.nn as nn
import torch.nn.functional as F

import seaborn as sns
import matplotlib.pyplot as plt
import os
from datetime import date
import datetime

# Generating the training data for the Heat and Diffusion Model

In [42]:
data_dir = "./LakePIAB/"
depth_steps = 25

print(os.getcwd())

/home/robert/Projects/LakePIAB/MCL/data_processing


In [43]:
meterological_data_df = pd.read_csv("./../../output/meteorology_input.csv")
meterological_data_df = meterological_data_df[1:] # considering everything from 2nd time step

num_time_steps = meterological_data_df.shape[0]
depth_list = np.array(list(range(1, depth_steps+1)) * num_time_steps)
depth_df = pd.DataFrame(data={'depth':depth_list})

#repeating the dataframe depth_steps number of times
meterological_data_df = pd.DataFrame(np.repeat(meterological_data_df.values, depth_steps, axis=0), columns=meterological_data_df.columns)
meterological_data_df = pd.concat([depth_df, meterological_data_df], ignore_index=False, axis=1)
meterological_data_df

Unnamed: 0,depth,time,AirTemp_degC,Longwave_Wm-2,Latent_Wm-2,Sensible_Wm-2,Shortwave_Wm-2,lightExtinct_m-1,ShearVelocity_mS-1,ShearStress_Nm-2,Area_m2
0,1,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,36000000.0
1,2,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,36000000.0
2,3,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,36000000.0
3,4,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,36000000.0
4,5,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,36000000.0
...,...,...,...,...,...,...,...,...,...,...,...
637170,21,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,36000000.0
637171,22,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,36000000.0
637172,23,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,36000000.0
637173,24,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,36000000.0


In [44]:
input_temp_df = pd.read_csv("./../../output/temp_total05.csv")

flattened_inp_temp = input_temp_df.iloc[:-1,1:].to_numpy().flatten() #this iloc is to remove the time column
flattened_out_temp = input_temp_df.iloc[1:,1:].to_numpy().flatten() #this iloc is to remove the time column
time_stamp = input_temp_df['time'][1:].repeat(depth_steps).values
data = {'time':time_stamp, 'input_temp':flattened_inp_temp, 'temp_total05':flattened_out_temp, 'depth':depth_list}
input_temp_df = pd.DataFrame(data=data)
input_temp_df 

Unnamed: 0,time,input_temp,temp_total05,depth
0,2002-06-04 01:00:00,12.270393,12.239573,1
1,2002-06-04 01:00:00,12.206829,12.185556,2
2,2002-06-04 01:00:00,12.129300,12.116260,3
3,2002-06-04 01:00:00,12.043368,12.036908,4
4,2002-06-04 01:00:00,11.953988,11.952504,5
...,...,...,...,...
637170,2019-08-01 23:00:00,8.394774,8.396633,21
637171,2019-08-01 23:00:00,7.347885,7.349262,22
637172,2019-08-01 23:00:00,6.322880,6.323790,23
637173,2019-08-01 23:00:00,5.309239,5.309692,24


In [45]:
depth_list.shape

(637175,)

In [46]:
final_df = meterological_data_df.merge(input_temp_df, how='inner', on=['time','depth'])
final_df

Unnamed: 0,depth,time,AirTemp_degC,Longwave_Wm-2,Latent_Wm-2,Sensible_Wm-2,Shortwave_Wm-2,lightExtinct_m-1,ShearVelocity_mS-1,ShearStress_Nm-2,Area_m2,input_temp,temp_total05
0,1,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,36000000.0,12.270393,12.239573
1,2,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,36000000.0,12.206829,12.185556
2,3,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,36000000.0,12.129300,12.116260
3,4,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,36000000.0,12.043368,12.036908
4,5,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,36000000.0,11.953988,11.952504
...,...,...,...,...,...,...,...,...,...,...,...,...,...
637170,21,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,36000000.0,8.394774,8.396633
637171,22,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,36000000.0,7.347885,7.349262
637172,23,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,36000000.0,6.322880,6.323790
637173,24,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,36000000.0,5.309239,5.309692


In [47]:
buoyancy_data_df = pd.read_csv("./../../output/buoyancy.csv")
#Removing Nans
buoyancy_data_df['n2S-2_1'] = buoyancy_data_df['n2S-2_2']
buoyancy_data_df['n2S-2_25'] = buoyancy_data_df['n2S-2_24']

buoyancy_data_df = buoyancy_data_df[1:] # considering everything from 2nd time step

flattened_buoy = buoyancy_data_df.iloc[:,1:].to_numpy().flatten()
time_stamp = buoyancy_data_df['time'].repeat(depth_steps).values

data = {'time':time_stamp, 'buoyancy':flattened_buoy, 'depth':depth_list}
buoyancy_data_df = pd.DataFrame(data=data)

buoyancy_data_df

Unnamed: 0,time,buoyancy,depth
0,2002-06-04 01:00:00,0.000216,1
1,2002-06-04 01:00:00,0.000216,2
2,2002-06-04 01:00:00,0.000238,3
3,2002-06-04 01:00:00,0.000245,4
4,2002-06-04 01:00:00,0.000241,5
...,...,...,...
637170,2019-08-01 23:00:00,0.001476,21
637171,2019-08-01 23:00:00,0.001077,22
637172,2019-08-01 23:00:00,0.000695,23
637173,2019-08-01 23:00:00,0.000315,24


In [48]:
final_df = final_df.merge(buoyancy_data_df, how='inner', on=['time','depth'])
final_df

Unnamed: 0,depth,time,AirTemp_degC,Longwave_Wm-2,Latent_Wm-2,Sensible_Wm-2,Shortwave_Wm-2,lightExtinct_m-1,ShearVelocity_mS-1,ShearStress_Nm-2,Area_m2,input_temp,temp_total05,buoyancy
0,1,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,36000000.0,12.270393,12.239573,0.000216
1,2,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,36000000.0,12.206829,12.185556,0.000216
2,3,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,36000000.0,12.129300,12.116260,0.000238
3,4,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,36000000.0,12.043368,12.036908,0.000245
4,5,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,36000000.0,11.953988,11.952504,0.000241
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
637170,21,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,36000000.0,8.394774,8.396633,0.001476
637171,22,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,36000000.0,7.347885,7.349262,0.001077
637172,23,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,36000000.0,6.322880,6.323790,0.000695
637173,24,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,36000000.0,5.309239,5.309692,0.000315


In [49]:
out_diffusivity_df = pd.read_csv("./../../output/diff.csv")

out_diffusivity_df = out_diffusivity_df[1:] # considering everything from 2nd time step

flattened_diff = out_diffusivity_df.iloc[:,1:].to_numpy().flatten()
time_stamp = out_diffusivity_df['time'].repeat(depth_steps).values

data = {'time':time_stamp, 'diffusivity':flattened_diff, 'depth':depth_list}
out_diffusivity_df = pd.DataFrame(data=data)

out_diffusivity_df

Unnamed: 0,time,diffusivity,depth
0,2002-06-04 01:00:00,0.000037,1
1,2002-06-04 01:00:00,0.000034,2
2,2002-06-04 01:00:00,0.000032,3
3,2002-06-04 01:00:00,0.000032,4
4,2002-06-04 01:00:00,0.000032,5
...,...,...,...
637170,2019-08-01 23:00:00,0.000015,21
637171,2019-08-01 23:00:00,0.000017,22
637172,2019-08-01 23:00:00,0.000020,23
637173,2019-08-01 23:00:00,0.000029,24


In [50]:
final_df = final_df.merge(out_diffusivity_df, how='inner', on=['time','depth'])
final_df

Unnamed: 0,depth,time,AirTemp_degC,Longwave_Wm-2,Latent_Wm-2,Sensible_Wm-2,Shortwave_Wm-2,lightExtinct_m-1,ShearVelocity_mS-1,ShearStress_Nm-2,Area_m2,input_temp,temp_total05,buoyancy,diffusivity
0,1,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,36000000.0,12.270393,12.239573,0.000216,0.000037
1,2,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,36000000.0,12.206829,12.185556,0.000216,0.000034
2,3,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,36000000.0,12.129300,12.116260,0.000238,0.000032
3,4,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,36000000.0,12.043368,12.036908,0.000245,0.000032
4,5,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,36000000.0,11.953988,11.952504,0.000241,0.000032
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
637170,21,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,36000000.0,8.394774,8.396633,0.001476,0.000015
637171,22,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,36000000.0,7.347885,7.349262,0.001077,0.000017
637172,23,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,36000000.0,6.322880,6.323790,0.000695,0.000020
637173,24,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,36000000.0,5.309239,5.309692,0.000315,0.000029


In [51]:
out_temp_df = pd.read_csv("./../../output/temp_heat01.csv")

out_temp_df = out_temp_df[1:] # considering everything from 2nd time step

flattened_temp = out_temp_df.iloc[:,1:].to_numpy().flatten()
time_stamp = out_temp_df['time'].repeat(depth_steps).values

data = {'time':time_stamp, 'temp_heat01':flattened_temp, 'depth':depth_list}
out_temp_df = pd.DataFrame(data=data)

out_temp_df

Unnamed: 0,time,temp_heat00,depth
0,2002-06-04 01:00:00,12.271817,1
1,2002-06-04 01:00:00,12.206834,2
2,2002-06-04 01:00:00,12.129305,3
3,2002-06-04 01:00:00,12.043372,4
4,2002-06-04 01:00:00,11.953991,5
...,...,...,...
637170,2019-08-01 23:00:00,8.394809,21
637171,2019-08-01 23:00:00,7.347932,22
637172,2019-08-01 23:00:00,6.322960,23
637173,2019-08-01 23:00:00,5.309239,24


In [52]:
final_df = final_df.merge(out_temp_df, how='inner', on=['time','depth'])
final_df

Unnamed: 0,depth,time,AirTemp_degC,Longwave_Wm-2,Latent_Wm-2,Sensible_Wm-2,Shortwave_Wm-2,lightExtinct_m-1,ShearVelocity_mS-1,ShearStress_Nm-2,Area_m2,input_temp,temp_total05,buoyancy,diffusivity,temp_heat00
0,1,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,36000000.0,12.270393,12.239573,0.000216,0.000037,12.271817
1,2,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,36000000.0,12.206829,12.185556,0.000216,0.000034,12.206834
2,3,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,36000000.0,12.129300,12.116260,0.000238,0.000032,12.129305
3,4,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,36000000.0,12.043368,12.036908,0.000245,0.000032,12.043372
4,5,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,36000000.0,11.953988,11.952504,0.000241,0.000032,11.953991
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
637170,21,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,36000000.0,8.394774,8.396633,0.001476,0.000015,8.394809
637171,22,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,36000000.0,7.347885,7.349262,0.001077,0.000017,7.347932
637172,23,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,36000000.0,6.322880,6.323790,0.000695,0.000020,6.322960
637173,24,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,36000000.0,5.309239,5.309692,0.000315,0.000029,5.309239


In [53]:
out_temp_df = pd.read_csv("./../../output/temp_diff02.csv")

out_temp_df = out_temp_df[1:] # considering everything from 2nd time step

flattened_temp = out_temp_df.iloc[:,1:].to_numpy().flatten()
time_stamp = out_temp_df['time'].repeat(depth_steps).values

data = {'time':time_stamp, 'temp_diff02':flattened_temp, 'depth':depth_list}
out_temp_df = pd.DataFrame(data=data)

out_temp_df

Unnamed: 0,time,temp_diff02,depth
0,2002-06-04 01:00:00,12.271817,1
1,2002-06-04 01:00:00,12.205427,2
2,2002-06-04 01:00:00,12.128335,3
3,2002-06-04 01:00:00,12.042970,4
4,2002-06-04 01:00:00,11.954045,5
...,...,...,...
637170,2019-08-01 23:00:00,8.396633,21
637171,2019-08-01 23:00:00,7.349262,22
637172,2019-08-01 23:00:00,6.323790,23
637173,2019-08-01 23:00:00,5.309692,24


In [54]:
final_df = final_df.merge(out_temp_df, how='inner', on=['time','depth'])
final_df

Unnamed: 0,depth,time,AirTemp_degC,Longwave_Wm-2,Latent_Wm-2,Sensible_Wm-2,Shortwave_Wm-2,lightExtinct_m-1,ShearVelocity_mS-1,ShearStress_Nm-2,Area_m2,input_temp,temp_total05,buoyancy,diffusivity,temp_heat00,temp_diff02
0,1,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,36000000.0,12.270393,12.239573,0.000216,0.000037,12.271817,12.271817
1,2,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,36000000.0,12.206829,12.185556,0.000216,0.000034,12.206834,12.205427
2,3,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,36000000.0,12.129300,12.116260,0.000238,0.000032,12.129305,12.128335
3,4,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,36000000.0,12.043368,12.036908,0.000245,0.000032,12.043372,12.042970
4,5,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,36000000.0,11.953988,11.952504,0.000241,0.000032,11.953991,11.954045
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
637170,21,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,36000000.0,8.394774,8.396633,0.001476,0.000015,8.394809,8.396633
637171,22,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,36000000.0,7.347885,7.349262,0.001077,0.000017,7.347932,7.349262
637172,23,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,36000000.0,6.322880,6.323790,0.000695,0.000020,6.322960,6.323790
637173,24,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,36000000.0,5.309239,5.309692,0.000315,0.000029,5.309239,5.309692


In [55]:
datetime_list =[datetime.datetime.strptime(date, '%Y-%m-%d %H:%M:%S') for date in final_df['time']]
day_of_year_list = [t.timetuple().tm_yday for t in datetime_list]
time_of_day_list = [t.hour for t in datetime_list]

In [56]:
final_df['day_of_year']=day_of_year_list
final_df['time_of_day']=time_of_day_list

In [57]:
final_df

Unnamed: 0,depth,time,AirTemp_degC,Longwave_Wm-2,Latent_Wm-2,Sensible_Wm-2,Shortwave_Wm-2,lightExtinct_m-1,ShearVelocity_mS-1,ShearStress_Nm-2,Area_m2,input_temp,temp_total05,buoyancy,diffusivity,temp_heat00,temp_diff02,day_of_year,time_of_day
0,1,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,36000000.0,12.270393,12.239573,0.000216,0.000037,12.271817,12.271817,155,1
1,2,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,36000000.0,12.206829,12.185556,0.000216,0.000034,12.206834,12.205427,155,1
2,3,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,36000000.0,12.129300,12.116260,0.000238,0.000032,12.129305,12.128335,155,1
3,4,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,36000000.0,12.043368,12.036908,0.000245,0.000032,12.043372,12.042970,155,1
4,5,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,36000000.0,11.953988,11.952504,0.000241,0.000032,11.953991,11.954045,155,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
637170,21,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,36000000.0,8.394774,8.396633,0.001476,0.000015,8.394809,8.396633,213,23
637171,22,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,36000000.0,7.347885,7.349262,0.001077,0.000017,7.347932,7.349262,213,23
637172,23,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,36000000.0,6.322880,6.323790,0.000695,0.000020,6.322960,6.323790,213,23
637173,24,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,36000000.0,5.309239,5.309692,0.000315,0.000029,5.309239,5.309692,213,23


In [58]:
temp_mix03_df = pd.read_csv("./../../output/temp_mix03.csv")

temp_mix03_df = temp_mix03_df[1:] # considering everything from 2nd time step

flattened_temp = temp_mix03_df.iloc[:,1:].to_numpy().flatten()
time_stamp = temp_mix03_df['time'].repeat(depth_steps).values

data = {'time':time_stamp, 'temp_mix03':flattened_temp, 'depth':depth_list}
temp_mix03_df = pd.DataFrame(data=data)

temp_mix03_df

Unnamed: 0,time,temp_mix03,depth
0,2002-06-04 01:00:00,12.239573,1
1,2002-06-04 01:00:00,12.185556,2
2,2002-06-04 01:00:00,12.116260,3
3,2002-06-04 01:00:00,12.036908,4
4,2002-06-04 01:00:00,11.952504,5
...,...,...,...
637170,2019-08-01 23:00:00,8.396633,21
637171,2019-08-01 23:00:00,7.349262,22
637172,2019-08-01 23:00:00,6.323790,23
637173,2019-08-01 23:00:00,5.309692,24


In [59]:
final_df = final_df.merge(temp_mix03_df, how='inner', on=['time','depth'])
final_df

Unnamed: 0,depth,time,AirTemp_degC,Longwave_Wm-2,Latent_Wm-2,Sensible_Wm-2,Shortwave_Wm-2,lightExtinct_m-1,ShearVelocity_mS-1,ShearStress_Nm-2,Area_m2,input_temp,temp_total05,buoyancy,diffusivity,temp_heat00,temp_diff02,day_of_year,time_of_day,temp_mix03
0,1,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,36000000.0,12.270393,12.239573,0.000216,0.000037,12.271817,12.271817,155,1,12.239573
1,2,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,36000000.0,12.206829,12.185556,0.000216,0.000034,12.206834,12.205427,155,1,12.185556
2,3,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,36000000.0,12.129300,12.116260,0.000238,0.000032,12.129305,12.128335,155,1,12.116260
3,4,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,36000000.0,12.043368,12.036908,0.000245,0.000032,12.043372,12.042970,155,1,12.036908
4,5,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,36000000.0,11.953988,11.952504,0.000241,0.000032,11.953991,11.954045,155,1,11.952504
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
637170,21,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,36000000.0,8.394774,8.396633,0.001476,0.000015,8.394809,8.396633,213,23,8.396633
637171,22,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,36000000.0,7.347885,7.349262,0.001077,0.000017,7.347932,7.349262,213,23,7.349262
637172,23,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,36000000.0,6.322880,6.323790,0.000695,0.000020,6.322960,6.323790,213,23,6.323790
637173,24,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,36000000.0,5.309239,5.309692,0.000315,0.000029,5.309239,5.309692,213,23,5.309692


In [60]:
temp_conv03_df = pd.read_csv("./../../output/temp_conv04.csv")

temp_conv03_df = temp_conv03_df[1:] # considering everything from 2nd time step

flattened_temp = temp_conv03_df.iloc[:,1:].to_numpy().flatten()
time_stamp = temp_conv03_df['time'].repeat(depth_steps).values

data = {'time':time_stamp, 'temp_conv04':flattened_temp, 'depth':depth_list}
temp_conv03_df = pd.DataFrame(data=data)

temp_conv03_df

Unnamed: 0,time,temp_conv04,depth
0,2002-06-04 01:00:00,12.239573,1
1,2002-06-04 01:00:00,12.185556,2
2,2002-06-04 01:00:00,12.116260,3
3,2002-06-04 01:00:00,12.036908,4
4,2002-06-04 01:00:00,11.952504,5
...,...,...,...
637170,2019-08-01 23:00:00,8.396633,21
637171,2019-08-01 23:00:00,7.349262,22
637172,2019-08-01 23:00:00,6.323790,23
637173,2019-08-01 23:00:00,5.309692,24


In [61]:
final_df = final_df.merge(temp_conv03_df, how='inner', on=['time','depth'])
final_df

Unnamed: 0,depth,time,AirTemp_degC,Longwave_Wm-2,Latent_Wm-2,Sensible_Wm-2,Shortwave_Wm-2,lightExtinct_m-1,ShearVelocity_mS-1,ShearStress_Nm-2,...,input_temp,temp_total05,buoyancy,diffusivity,temp_heat00,temp_diff02,day_of_year,time_of_day,temp_mix03,temp_conv04
0,1,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,...,12.270393,12.239573,0.000216,0.000037,12.271817,12.271817,155,1,12.239573,12.239573
1,2,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,...,12.206829,12.185556,0.000216,0.000034,12.206834,12.205427,155,1,12.185556,12.185556
2,3,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,...,12.129300,12.116260,0.000238,0.000032,12.129305,12.128335,155,1,12.116260,12.116260
3,4,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,...,12.043368,12.036908,0.000245,0.000032,12.043372,12.042970,155,1,12.036908,12.036908
4,5,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,...,11.953988,11.952504,0.000241,0.000032,11.953991,11.954045,155,1,11.952504,11.952504
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
637170,21,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,...,8.394774,8.396633,0.001476,0.000015,8.394809,8.396633,213,23,8.396633,8.396633
637171,22,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,...,7.347885,7.349262,0.001077,0.000017,7.347932,7.349262,213,23,7.349262,7.349262
637172,23,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,...,6.322880,6.323790,0.000695,0.000020,6.322960,6.323790,213,23,6.323790,6.323790
637173,24,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,...,5.309239,5.309692,0.000315,0.000029,5.309239,5.309692,213,23,5.309692,5.309692


In [62]:
temp_initial00_df = pd.read_csv("./../../output/temp_initial00.csv")

temp_initial00_df = temp_initial00_df[1:] # considering everything from 2nd time step

flattened_temp = temp_initial00_df.iloc[:,1:].to_numpy().flatten()
time_stamp = temp_initial00_df['time'].repeat(depth_steps).values

data = {'time':time_stamp, 'temp_initial00':flattened_temp, 'depth':depth_list}
temp_initial00_df = pd.DataFrame(data=data)

temp_initial00_df

Unnamed: 0,time,temp_initial00,depth
0,2002-06-04 01:00:00,12.270393,1
1,2002-06-04 01:00:00,12.206829,2
2,2002-06-04 01:00:00,12.129300,3
3,2002-06-04 01:00:00,12.043368,4
4,2002-06-04 01:00:00,11.953988,5
...,...,...,...
637170,2019-08-01 23:00:00,8.394774,21
637171,2019-08-01 23:00:00,7.347885,22
637172,2019-08-01 23:00:00,6.322880,23
637173,2019-08-01 23:00:00,5.309239,24


In [63]:
final_df = final_df.merge(temp_initial00_df, how='inner', on=['time','depth'])
final_df

Unnamed: 0,depth,time,AirTemp_degC,Longwave_Wm-2,Latent_Wm-2,Sensible_Wm-2,Shortwave_Wm-2,lightExtinct_m-1,ShearVelocity_mS-1,ShearStress_Nm-2,...,temp_total05,buoyancy,diffusivity,temp_heat00,temp_diff02,day_of_year,time_of_day,temp_mix03,temp_conv04,temp_initial00
0,1,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,...,12.239573,0.000216,0.000037,12.271817,12.271817,155,1,12.239573,12.239573,12.270393
1,2,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,...,12.185556,0.000216,0.000034,12.206834,12.205427,155,1,12.185556,12.185556,12.206829
2,3,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,...,12.116260,0.000238,0.000032,12.129305,12.128335,155,1,12.116260,12.116260,12.129300
3,4,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,...,12.036908,0.000245,0.000032,12.043372,12.042970,155,1,12.036908,12.036908,12.043368
4,5,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,...,11.952504,0.000241,0.000032,11.953991,11.954045,155,1,11.952504,11.952504,11.953988
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
637170,21,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,...,8.396633,0.001476,0.000015,8.394809,8.396633,213,23,8.396633,8.396633,8.394774
637171,22,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,...,7.349262,0.001077,0.000017,7.347932,7.349262,213,23,7.349262,7.349262,7.347885
637172,23,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,...,6.323790,0.000695,0.000020,6.322960,6.323790,213,23,6.323790,6.323790,6.322880
637173,24,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,...,5.309692,0.000315,0.000029,5.309239,5.309692,213,23,5.309692,5.309692,5.309239


In [64]:
temp_obs_df = pd.read_csv("./../../output/observed_temp.csv")

temp_obs_df = temp_obs_df[1:] # considering everything from 2nd time step

flattened_temp = temp_obs_df.iloc[:,1:].to_numpy().flatten()
time_stamp = temp_obs_df['time'].repeat(depth_steps).values

data = {'time':time_stamp, 'obs_temp':flattened_temp, 'depth':depth_list}
temp_obs_df = pd.DataFrame(data=data)

temp_obs_df

print(flattened_temp.shape)

print(time_stamp.shape)

print(depth_list.shape)

(637175,)
(637175,)
(637175,)


In [65]:
final_df = final_df.merge(temp_obs_df, how='inner', on=['time','depth'])
final_df

Unnamed: 0,depth,time,AirTemp_degC,Longwave_Wm-2,Latent_Wm-2,Sensible_Wm-2,Shortwave_Wm-2,lightExtinct_m-1,ShearVelocity_mS-1,ShearStress_Nm-2,...,buoyancy,diffusivity,temp_heat00,temp_diff02,day_of_year,time_of_day,temp_mix03,temp_conv04,temp_initial00,obs_temp
0,1,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,...,0.000216,0.000037,12.271817,12.271817,155,1,12.239573,12.239573,12.270393,22.256
1,2,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,...,0.000216,0.000034,12.206834,12.205427,155,1,12.185556,12.185556,12.206829,22.264
2,3,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,...,0.000238,0.000032,12.129305,12.128335,155,1,12.116260,12.116260,12.129300,22.071
3,4,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,...,0.000245,0.000032,12.043372,12.042970,155,1,12.036908,12.036908,12.043368,22.259
4,5,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,...,0.000241,0.000032,11.953991,11.954045,155,1,11.952504,11.952504,11.953988,22.071
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
637170,21,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,...,0.001476,0.000015,8.394809,8.396633,213,23,8.396633,8.396633,8.394774,11.099
637171,22,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,...,0.001077,0.000017,7.347932,7.349262,213,23,7.349262,7.349262,7.347885,11.099
637172,23,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,...,0.000695,0.000020,6.322960,6.323790,213,23,6.323790,6.323790,6.322880,11.099
637173,24,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,...,0.000315,0.000029,5.309239,5.309692,213,23,5.309692,5.309692,5.309239,11.099


In [66]:

#temp_obs_df2 = pd.read_csv("./../output/observed_temp.csv")
#temp_obs_df = pd.read_csv("./../output/observed_temp.csv")


#print(flattened_inp_temp_obs.shape)

#temp_obs_df2 = temp_obs_df2[:-1] # considering everything from 2nd time step

#flattened_inp_temp_obs = temp_obs_df2.iloc[:,1:].to_numpy().flatten()

#time_stamp = input_temp_df['time'][1:].repeat(depth_steps).values

#data = {'time':time_stamp, 'obs_input':flattened_inp_temp_obs, 'depth':depth_list}
#temp_obs_df2 = pd.DataFrame(data=data)

#temp_obs_df2


temp_obs_df = pd.read_csv("./../../output/observed_temp.csv")

flattened_inp_temp = temp_obs_df.iloc[:-1,1:].to_numpy().flatten() #this iloc is to remove the time column
print(flattened_inp_temp.shape)

time_stamp = temp_obs_df['time'][1:].repeat(depth_steps).values
print(time_stamp.shape)

print(depth_list.shape)
data = {'time':time_stamp, 'input_obs':flattened_inp_temp,'depth':depth_list}
temp_obs_df = pd.DataFrame(data=data)
temp_obs_df 

(637175,)
(637175,)
(637175,)


Unnamed: 0,time,input_obs,depth
0,2002-06-04 01:00:00,22.279,1
1,2002-06-04 01:00:00,22.295,2
2,2002-06-04 01:00:00,22.091,3
3,2002-06-04 01:00:00,22.296,4
4,2002-06-04 01:00:00,22.231,5
...,...,...,...
637170,2019-08-01 23:00:00,11.099,21
637171,2019-08-01 23:00:00,11.099,22
637172,2019-08-01 23:00:00,11.099,23
637173,2019-08-01 23:00:00,11.099,24


In [67]:
final_df = final_df.merge(temp_obs_df, how='inner', on=['time','depth'])
final_df

Unnamed: 0,depth,time,AirTemp_degC,Longwave_Wm-2,Latent_Wm-2,Sensible_Wm-2,Shortwave_Wm-2,lightExtinct_m-1,ShearVelocity_mS-1,ShearStress_Nm-2,...,diffusivity,temp_heat00,temp_diff02,day_of_year,time_of_day,temp_mix03,temp_conv04,temp_initial00,obs_temp,input_obs
0,1,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,...,0.000037,12.271817,12.271817,155,1,12.239573,12.239573,12.270393,22.256,22.279
1,2,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,...,0.000034,12.206834,12.205427,155,1,12.185556,12.185556,12.206829,22.264,22.295
2,3,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,...,0.000032,12.129305,12.128335,155,1,12.116260,12.116260,12.129300,22.071,22.091
3,4,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,...,0.000032,12.043372,12.042970,155,1,12.036908,12.036908,12.043368,22.259,22.296
4,5,2002-06-04 01:00:00,13.894998,717.905843,6.197036,7.548233,0.0,0.8,2.840812,0.017342,...,0.000032,11.953991,11.954045,155,1,11.952504,11.952504,11.953988,22.071,22.231
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
637170,21,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,...,0.000015,8.394809,8.396633,213,23,8.396633,8.396633,8.394774,11.099,11.099
637171,22,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,...,0.000017,7.347932,7.349262,213,23,7.349262,7.349262,7.347885,11.099,11.099
637172,23,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,...,0.000020,6.322960,6.323790,213,23,6.323790,6.323790,6.322880,11.099,11.099
637173,24,2019-08-01 23:00:00,17.945001,796.188764,-59.458821,-13.847566,0.0,0.8,0.322169,0.000534,...,0.000029,5.309239,5.309692,213,23,5.309692,5.309692,5.309239,11.099,11.099


In [68]:
final_df.to_csv("all_data_lake_modeling_in_time.csv", index=False)