In [3]:
import numpy as np
import pandas as pd
import random

import torch
import torch.nn as nn
import torch.nn.functional as F

import seaborn as sns
import matplotlib.pyplot as plt
import os
from datetime import date
import datetime

# Generating the training data for the Heat and Diffusion Model

In [4]:
data_dir = "./LakePIAB/"
depth_steps = 25

print(os.getcwd())

/home/robert/Projects/LakePIAB/MCL/01_data_processing


In [6]:
meterological_data_df = pd.read_csv("./../../output/py_meteorology_input.csv")
meterological_data_df = meterological_data_df[1:] # considering everything from 2nd time step

num_time_steps = meterological_data_df.shape[0]
depth_list = np.array(list(range(1, depth_steps+1)) * num_time_steps)
depth_df = pd.DataFrame(data={'depth':depth_list})

#repeating the dataframe depth_steps number of times
meterological_data_df = pd.DataFrame(np.repeat(meterological_data_df.values, depth_steps, axis=0), columns=meterological_data_df.columns)
meterological_data_df = pd.concat([depth_df, meterological_data_df], ignore_index=False, axis=1)
meterological_data_df

Unnamed: 0,depth,time,AirTemp_degC,Longwave_Wm-2,Latent_Wm-2,Sensible_Wm-2,Shortwave_Wm-2,lightExtinct_m-1,ShearVelocity_mS-1,ShearStress_Nm-2,Area_m2
0,1,2006-01-01 02:00:00,-1.220007,590.851292,-30.129462,-35.559056,0.0,0.8,1.757216,0.008038,36000000.0
1,2,2006-01-01 02:00:00,-1.220007,590.851292,-30.129462,-35.559056,0.0,0.8,1.757216,0.008038,36000000.0
2,3,2006-01-01 02:00:00,-1.220007,590.851292,-30.129462,-35.559056,0.0,0.8,1.757216,0.008038,36000000.0
3,4,2006-01-01 02:00:00,-1.220007,590.851292,-30.129462,-35.559056,0.0,0.8,1.757216,0.008038,36000000.0
4,5,2006-01-01 02:00:00,-1.220007,590.851292,-30.129462,-35.559056,0.0,0.8,1.757216,0.008038,36000000.0
...,...,...,...,...,...,...,...,...,...,...,...
3065970,21,2019-12-29 00:00:00,3.86001,597.535984,25.682372,36.65838,0.0,0.8,6.709571,0.068577,36000000.0
3065971,22,2019-12-29 00:00:00,3.86001,597.535984,25.682372,36.65838,0.0,0.8,6.709571,0.068577,36000000.0
3065972,23,2019-12-29 00:00:00,3.86001,597.535984,25.682372,36.65838,0.0,0.8,6.709571,0.068577,36000000.0
3065973,24,2019-12-29 00:00:00,3.86001,597.535984,25.682372,36.65838,0.0,0.8,6.709571,0.068577,36000000.0


In [4]:
input_temp_df = pd.read_csv("./../../output/py_temp_total05.csv")

flattened_inp_temp = input_temp_df.iloc[:-1,1:].to_numpy().flatten() #this iloc is to remove the time column
flattened_out_temp = input_temp_df.iloc[1:,1:].to_numpy().flatten() #this iloc is to remove the time column
time_stamp = input_temp_df['time'][1:].repeat(depth_steps).values
data = {'time':time_stamp, 'input_temp':flattened_inp_temp, 'temp_total05':flattened_out_temp, 'depth':depth_list}
input_temp_df = pd.DataFrame(data=data)
input_temp_df 

Unnamed: 0,time,input_temp,temp_total05,depth
0,2006-06-04 01:00:00,15.489510,15.426904,1
1,2006-06-04 01:00:00,15.448078,15.401481,2
2,2006-06-04 01:00:00,15.376617,15.346109,3
3,2006-06-04 01:00:00,15.287987,15.272596,4
4,2006-06-04 01:00:00,15.195121,15.193004,5
...,...,...,...,...
495570,2019-08-01 23:00:00,8.373683,8.375543,21
495571,2019-08-01 23:00:00,7.324806,7.326184,22
495572,2019-08-01 23:00:00,6.297760,6.298671,23
495573,2019-08-01 23:00:00,5.282023,5.282477,24


In [5]:
depth_list.shape

(495575,)

In [6]:
final_df = meterological_data_df.merge(input_temp_df, how='inner', on=['time','depth'])
final_df

Unnamed: 0,depth,time,AirTemp_degC,Longwave_Wm-2,Latent_Wm-2,Sensible_Wm-2,Shortwave_Wm-2,lightExtinct_m-1,ShearVelocity_mS-1,ShearStress_Nm-2,Area_m2,input_temp,temp_total05
0,1,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,36000000.0,15.489510,15.426904
1,2,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,36000000.0,15.448078,15.401481
2,3,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,36000000.0,15.376617,15.346109
3,4,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,36000000.0,15.287987,15.272596
4,5,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,36000000.0,15.195121,15.193004
...,...,...,...,...,...,...,...,...,...,...,...,...,...
495570,21,2019-08-01 23:00:00,17.945001,796.182785,-59.448422,-13.843945,0.0,0.8,0.32217,0.000534,36000000.0,8.373683,8.375543
495571,22,2019-08-01 23:00:00,17.945001,796.182785,-59.448422,-13.843945,0.0,0.8,0.32217,0.000534,36000000.0,7.324806,7.326184
495572,23,2019-08-01 23:00:00,17.945001,796.182785,-59.448422,-13.843945,0.0,0.8,0.32217,0.000534,36000000.0,6.297760,6.298671
495573,24,2019-08-01 23:00:00,17.945001,796.182785,-59.448422,-13.843945,0.0,0.8,0.32217,0.000534,36000000.0,5.282023,5.282477


In [7]:
buoyancy_data_df = pd.read_csv("./../../output/py_buoyancy.csv")
#Removing Nans
buoyancy_data_df['n2S-2_1'] = buoyancy_data_df['n2S-2_2']
buoyancy_data_df['n2S-2_25'] = buoyancy_data_df['n2S-2_24']

buoyancy_data_df = buoyancy_data_df[1:] # considering everything from 2nd time step

flattened_buoy = buoyancy_data_df.iloc[:,1:].to_numpy().flatten()
time_stamp = buoyancy_data_df['time'].repeat(depth_steps).values

data = {'time':time_stamp, 'buoyancy':flattened_buoy, 'depth':depth_list}
buoyancy_data_df = pd.DataFrame(data=data)

buoyancy_data_df

Unnamed: 0,time,buoyancy,depth
0,2006-06-04 01:00:00,0.000267,1
1,2006-06-04 01:00:00,0.000267,2
2,2006-06-04 01:00:00,0.000329,3
3,2006-06-04 01:00:00,0.000343,4
4,2006-06-04 01:00:00,0.000315,5
...,...,...,...
495570,2019-08-01 23:00:00,0.001471,21
495571,2019-08-01 23:00:00,0.001070,22
495572,2019-08-01 23:00:00,0.000686,23
495573,2019-08-01 23:00:00,0.000305,24


In [8]:
final_df = final_df.merge(buoyancy_data_df, how='inner', on=['time','depth'])
final_df

Unnamed: 0,depth,time,AirTemp_degC,Longwave_Wm-2,Latent_Wm-2,Sensible_Wm-2,Shortwave_Wm-2,lightExtinct_m-1,ShearVelocity_mS-1,ShearStress_Nm-2,Area_m2,input_temp,temp_total05,buoyancy
0,1,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,36000000.0,15.489510,15.426904,0.000267
1,2,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,36000000.0,15.448078,15.401481,0.000267
2,3,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,36000000.0,15.376617,15.346109,0.000329
3,4,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,36000000.0,15.287987,15.272596,0.000343
4,5,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,36000000.0,15.195121,15.193004,0.000315
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495570,21,2019-08-01 23:00:00,17.945001,796.182785,-59.448422,-13.843945,0.0,0.8,0.32217,0.000534,36000000.0,8.373683,8.375543,0.001471
495571,22,2019-08-01 23:00:00,17.945001,796.182785,-59.448422,-13.843945,0.0,0.8,0.32217,0.000534,36000000.0,7.324806,7.326184,0.001070
495572,23,2019-08-01 23:00:00,17.945001,796.182785,-59.448422,-13.843945,0.0,0.8,0.32217,0.000534,36000000.0,6.297760,6.298671,0.000686
495573,24,2019-08-01 23:00:00,17.945001,796.182785,-59.448422,-13.843945,0.0,0.8,0.32217,0.000534,36000000.0,5.282023,5.282477,0.000305


In [9]:
out_diffusivity_df = pd.read_csv("./../../output/py_diff.csv")

out_diffusivity_df = out_diffusivity_df[1:] # considering everything from 2nd time step

flattened_diff = out_diffusivity_df.iloc[:,1:].to_numpy().flatten()
time_stamp = out_diffusivity_df['time'].repeat(depth_steps).values

data = {'time':time_stamp, 'diffusivity':flattened_diff, 'depth':depth_list}
out_diffusivity_df = pd.DataFrame(data=data)

out_diffusivity_df

Unnamed: 0,time,diffusivity,depth
0,2006-06-04 01:00:00,0.000037,1
1,2006-06-04 01:00:00,0.000031,2
2,2006-06-04 01:00:00,0.000028,3
3,2006-06-04 01:00:00,0.000028,4
4,2006-06-04 01:00:00,0.000029,5
...,...,...,...
495570,2019-08-01 23:00:00,0.000015,21
495571,2019-08-01 23:00:00,0.000017,22
495572,2019-08-01 23:00:00,0.000020,23
495573,2019-08-01 23:00:00,0.000029,24


In [10]:
final_df = final_df.merge(out_diffusivity_df, how='inner', on=['time','depth'])
final_df

Unnamed: 0,depth,time,AirTemp_degC,Longwave_Wm-2,Latent_Wm-2,Sensible_Wm-2,Shortwave_Wm-2,lightExtinct_m-1,ShearVelocity_mS-1,ShearStress_Nm-2,Area_m2,input_temp,temp_total05,buoyancy,diffusivity
0,1,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,36000000.0,15.489510,15.426904,0.000267,0.000037
1,2,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,36000000.0,15.448078,15.401481,0.000267,0.000031
2,3,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,36000000.0,15.376617,15.346109,0.000329,0.000028
3,4,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,36000000.0,15.287987,15.272596,0.000343,0.000028
4,5,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,36000000.0,15.195121,15.193004,0.000315,0.000029
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495570,21,2019-08-01 23:00:00,17.945001,796.182785,-59.448422,-13.843945,0.0,0.8,0.32217,0.000534,36000000.0,8.373683,8.375543,0.001471,0.000015
495571,22,2019-08-01 23:00:00,17.945001,796.182785,-59.448422,-13.843945,0.0,0.8,0.32217,0.000534,36000000.0,7.324806,7.326184,0.001070,0.000017
495572,23,2019-08-01 23:00:00,17.945001,796.182785,-59.448422,-13.843945,0.0,0.8,0.32217,0.000534,36000000.0,6.297760,6.298671,0.000686,0.000020
495573,24,2019-08-01 23:00:00,17.945001,796.182785,-59.448422,-13.843945,0.0,0.8,0.32217,0.000534,36000000.0,5.282023,5.282477,0.000305,0.000029


In [11]:
out_temp_df = pd.read_csv("./../../output/py_temp_heat01.csv")

out_temp_df = out_temp_df[1:] # considering everything from 2nd time step

flattened_temp = out_temp_df.iloc[:,1:].to_numpy().flatten()
time_stamp = out_temp_df['time'].repeat(depth_steps).values

data = {'time':time_stamp, 'temp_heat01':flattened_temp, 'depth':depth_list}
out_temp_df = pd.DataFrame(data=data)

out_temp_df

Unnamed: 0,time,temp_heat01,depth
0,2006-06-04 01:00:00,15.416676,1
1,2006-06-04 01:00:00,15.448083,2
2,2006-06-04 01:00:00,15.376622,3
3,2006-06-04 01:00:00,15.287991,4
4,2006-06-04 01:00:00,15.195124,5
...,...,...,...
495570,2019-08-01 23:00:00,8.373718,21
495571,2019-08-01 23:00:00,7.324853,22
495572,2019-08-01 23:00:00,6.297841,23
495573,2019-08-01 23:00:00,5.282023,24


In [12]:
final_df = final_df.merge(out_temp_df, how='inner', on=['time','depth'])
final_df

Unnamed: 0,depth,time,AirTemp_degC,Longwave_Wm-2,Latent_Wm-2,Sensible_Wm-2,Shortwave_Wm-2,lightExtinct_m-1,ShearVelocity_mS-1,ShearStress_Nm-2,Area_m2,input_temp,temp_total05,buoyancy,diffusivity,temp_heat01
0,1,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,36000000.0,15.489510,15.426904,0.000267,0.000037,15.416676
1,2,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,36000000.0,15.448078,15.401481,0.000267,0.000031,15.448083
2,3,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,36000000.0,15.376617,15.346109,0.000329,0.000028,15.376622
3,4,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,36000000.0,15.287987,15.272596,0.000343,0.000028,15.287991
4,5,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,36000000.0,15.195121,15.193004,0.000315,0.000029,15.195124
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495570,21,2019-08-01 23:00:00,17.945001,796.182785,-59.448422,-13.843945,0.0,0.8,0.32217,0.000534,36000000.0,8.373683,8.375543,0.001471,0.000015,8.373718
495571,22,2019-08-01 23:00:00,17.945001,796.182785,-59.448422,-13.843945,0.0,0.8,0.32217,0.000534,36000000.0,7.324806,7.326184,0.001070,0.000017,7.324853
495572,23,2019-08-01 23:00:00,17.945001,796.182785,-59.448422,-13.843945,0.0,0.8,0.32217,0.000534,36000000.0,6.297760,6.298671,0.000686,0.000020,6.297841
495573,24,2019-08-01 23:00:00,17.945001,796.182785,-59.448422,-13.843945,0.0,0.8,0.32217,0.000534,36000000.0,5.282023,5.282477,0.000305,0.000029,5.282023


In [13]:
out_temp_df = pd.read_csv("./../../output/py_temp_diff02.csv")

out_temp_df = out_temp_df[1:] # considering everything from 2nd time step

flattened_temp = out_temp_df.iloc[:,1:].to_numpy().flatten()
time_stamp = out_temp_df['time'].repeat(depth_steps).values

data = {'time':time_stamp, 'temp_diff02':flattened_temp, 'depth':depth_list}
out_temp_df = pd.DataFrame(data=data)

out_temp_df

Unnamed: 0,time,temp_diff02,depth
0,2006-06-04 01:00:00,15.416676,1
1,2006-06-04 01:00:00,15.437735,2
2,2006-06-04 01:00:00,15.374550,3
3,2006-06-04 01:00:00,15.287547,4
4,2006-06-04 01:00:00,15.195829,5
...,...,...,...
495570,2019-08-01 23:00:00,8.375543,21
495571,2019-08-01 23:00:00,7.326184,22
495572,2019-08-01 23:00:00,6.298671,23
495573,2019-08-01 23:00:00,5.282477,24


In [14]:
final_df = final_df.merge(out_temp_df, how='inner', on=['time','depth'])
final_df

Unnamed: 0,depth,time,AirTemp_degC,Longwave_Wm-2,Latent_Wm-2,Sensible_Wm-2,Shortwave_Wm-2,lightExtinct_m-1,ShearVelocity_mS-1,ShearStress_Nm-2,Area_m2,input_temp,temp_total05,buoyancy,diffusivity,temp_heat01,temp_diff02
0,1,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,36000000.0,15.489510,15.426904,0.000267,0.000037,15.416676,15.416676
1,2,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,36000000.0,15.448078,15.401481,0.000267,0.000031,15.448083,15.437735
2,3,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,36000000.0,15.376617,15.346109,0.000329,0.000028,15.376622,15.374550
3,4,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,36000000.0,15.287987,15.272596,0.000343,0.000028,15.287991,15.287547
4,5,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,36000000.0,15.195121,15.193004,0.000315,0.000029,15.195124,15.195829
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495570,21,2019-08-01 23:00:00,17.945001,796.182785,-59.448422,-13.843945,0.0,0.8,0.32217,0.000534,36000000.0,8.373683,8.375543,0.001471,0.000015,8.373718,8.375543
495571,22,2019-08-01 23:00:00,17.945001,796.182785,-59.448422,-13.843945,0.0,0.8,0.32217,0.000534,36000000.0,7.324806,7.326184,0.001070,0.000017,7.324853,7.326184
495572,23,2019-08-01 23:00:00,17.945001,796.182785,-59.448422,-13.843945,0.0,0.8,0.32217,0.000534,36000000.0,6.297760,6.298671,0.000686,0.000020,6.297841,6.298671
495573,24,2019-08-01 23:00:00,17.945001,796.182785,-59.448422,-13.843945,0.0,0.8,0.32217,0.000534,36000000.0,5.282023,5.282477,0.000305,0.000029,5.282023,5.282477


In [15]:
datetime_list =[datetime.datetime.strptime(date, '%Y-%m-%d %H:%M:%S') for date in final_df['time']]
day_of_year_list = [t.timetuple().tm_yday for t in datetime_list]
time_of_day_list = [t.hour for t in datetime_list]

In [16]:
final_df['day_of_year']=day_of_year_list
final_df['time_of_day']=time_of_day_list

In [17]:
final_df

Unnamed: 0,depth,time,AirTemp_degC,Longwave_Wm-2,Latent_Wm-2,Sensible_Wm-2,Shortwave_Wm-2,lightExtinct_m-1,ShearVelocity_mS-1,ShearStress_Nm-2,Area_m2,input_temp,temp_total05,buoyancy,diffusivity,temp_heat01,temp_diff02,day_of_year,time_of_day
0,1,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,36000000.0,15.489510,15.426904,0.000267,0.000037,15.416676,15.416676,155,1
1,2,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,36000000.0,15.448078,15.401481,0.000267,0.000031,15.448083,15.437735,155,1
2,3,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,36000000.0,15.376617,15.346109,0.000329,0.000028,15.376622,15.374550,155,1
3,4,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,36000000.0,15.287987,15.272596,0.000343,0.000028,15.287991,15.287547,155,1
4,5,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,36000000.0,15.195121,15.193004,0.000315,0.000029,15.195124,15.195829,155,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495570,21,2019-08-01 23:00:00,17.945001,796.182785,-59.448422,-13.843945,0.0,0.8,0.32217,0.000534,36000000.0,8.373683,8.375543,0.001471,0.000015,8.373718,8.375543,213,23
495571,22,2019-08-01 23:00:00,17.945001,796.182785,-59.448422,-13.843945,0.0,0.8,0.32217,0.000534,36000000.0,7.324806,7.326184,0.001070,0.000017,7.324853,7.326184,213,23
495572,23,2019-08-01 23:00:00,17.945001,796.182785,-59.448422,-13.843945,0.0,0.8,0.32217,0.000534,36000000.0,6.297760,6.298671,0.000686,0.000020,6.297841,6.298671,213,23
495573,24,2019-08-01 23:00:00,17.945001,796.182785,-59.448422,-13.843945,0.0,0.8,0.32217,0.000534,36000000.0,5.282023,5.282477,0.000305,0.000029,5.282023,5.282477,213,23


In [18]:
temp_mix03_df = pd.read_csv("./../../output/py_temp_mix03.csv")

temp_mix03_df = temp_mix03_df[1:] # considering everything from 2nd time step

flattened_temp = temp_mix03_df.iloc[:,1:].to_numpy().flatten()
time_stamp = temp_mix03_df['time'].repeat(depth_steps).values

data = {'time':time_stamp, 'temp_mix03':flattened_temp, 'depth':depth_list}
temp_mix03_df = pd.DataFrame(data=data)

temp_mix03_df

Unnamed: 0,time,temp_mix03,depth
0,2006-06-04 01:00:00,15.426904,1
1,2006-06-04 01:00:00,15.401481,2
2,2006-06-04 01:00:00,15.346109,3
3,2006-06-04 01:00:00,15.272596,4
4,2006-06-04 01:00:00,15.193004,5
...,...,...,...
495570,2019-08-01 23:00:00,8.375543,21
495571,2019-08-01 23:00:00,7.326184,22
495572,2019-08-01 23:00:00,6.298671,23
495573,2019-08-01 23:00:00,5.282477,24


In [19]:
final_df = final_df.merge(temp_mix03_df, how='inner', on=['time','depth'])
final_df

Unnamed: 0,depth,time,AirTemp_degC,Longwave_Wm-2,Latent_Wm-2,Sensible_Wm-2,Shortwave_Wm-2,lightExtinct_m-1,ShearVelocity_mS-1,ShearStress_Nm-2,Area_m2,input_temp,temp_total05,buoyancy,diffusivity,temp_heat01,temp_diff02,day_of_year,time_of_day,temp_mix03
0,1,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,36000000.0,15.489510,15.426904,0.000267,0.000037,15.416676,15.416676,155,1,15.426904
1,2,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,36000000.0,15.448078,15.401481,0.000267,0.000031,15.448083,15.437735,155,1,15.401481
2,3,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,36000000.0,15.376617,15.346109,0.000329,0.000028,15.376622,15.374550,155,1,15.346109
3,4,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,36000000.0,15.287987,15.272596,0.000343,0.000028,15.287991,15.287547,155,1,15.272596
4,5,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,36000000.0,15.195121,15.193004,0.000315,0.000029,15.195124,15.195829,155,1,15.193004
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495570,21,2019-08-01 23:00:00,17.945001,796.182785,-59.448422,-13.843945,0.0,0.8,0.32217,0.000534,36000000.0,8.373683,8.375543,0.001471,0.000015,8.373718,8.375543,213,23,8.375543
495571,22,2019-08-01 23:00:00,17.945001,796.182785,-59.448422,-13.843945,0.0,0.8,0.32217,0.000534,36000000.0,7.324806,7.326184,0.001070,0.000017,7.324853,7.326184,213,23,7.326184
495572,23,2019-08-01 23:00:00,17.945001,796.182785,-59.448422,-13.843945,0.0,0.8,0.32217,0.000534,36000000.0,6.297760,6.298671,0.000686,0.000020,6.297841,6.298671,213,23,6.298671
495573,24,2019-08-01 23:00:00,17.945001,796.182785,-59.448422,-13.843945,0.0,0.8,0.32217,0.000534,36000000.0,5.282023,5.282477,0.000305,0.000029,5.282023,5.282477,213,23,5.282477


In [20]:
temp_conv03_df = pd.read_csv("./../../output/py_temp_conv04.csv")

temp_conv03_df = temp_conv03_df[1:] # considering everything from 2nd time step

flattened_temp = temp_conv03_df.iloc[:,1:].to_numpy().flatten()
time_stamp = temp_conv03_df['time'].repeat(depth_steps).values

data = {'time':time_stamp, 'temp_conv04':flattened_temp, 'depth':depth_list}
temp_conv03_df = pd.DataFrame(data=data)

temp_conv03_df

Unnamed: 0,time,temp_conv04,depth
0,2006-06-04 01:00:00,15.426904,1
1,2006-06-04 01:00:00,15.401481,2
2,2006-06-04 01:00:00,15.346109,3
3,2006-06-04 01:00:00,15.272596,4
4,2006-06-04 01:00:00,15.193004,5
...,...,...,...
495570,2019-08-01 23:00:00,8.375543,21
495571,2019-08-01 23:00:00,7.326184,22
495572,2019-08-01 23:00:00,6.298671,23
495573,2019-08-01 23:00:00,5.282477,24


In [21]:
final_df = final_df.merge(temp_conv03_df, how='inner', on=['time','depth'])
final_df

Unnamed: 0,depth,time,AirTemp_degC,Longwave_Wm-2,Latent_Wm-2,Sensible_Wm-2,Shortwave_Wm-2,lightExtinct_m-1,ShearVelocity_mS-1,ShearStress_Nm-2,...,input_temp,temp_total05,buoyancy,diffusivity,temp_heat01,temp_diff02,day_of_year,time_of_day,temp_mix03,temp_conv04
0,1,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,...,15.489510,15.426904,0.000267,0.000037,15.416676,15.416676,155,1,15.426904,15.426904
1,2,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,...,15.448078,15.401481,0.000267,0.000031,15.448083,15.437735,155,1,15.401481,15.401481
2,3,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,...,15.376617,15.346109,0.000329,0.000028,15.376622,15.374550,155,1,15.346109,15.346109
3,4,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,...,15.287987,15.272596,0.000343,0.000028,15.287991,15.287547,155,1,15.272596,15.272596
4,5,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,...,15.195121,15.193004,0.000315,0.000029,15.195124,15.195829,155,1,15.193004,15.193004
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495570,21,2019-08-01 23:00:00,17.945001,796.182785,-59.448422,-13.843945,0.0,0.8,0.32217,0.000534,...,8.373683,8.375543,0.001471,0.000015,8.373718,8.375543,213,23,8.375543,8.375543
495571,22,2019-08-01 23:00:00,17.945001,796.182785,-59.448422,-13.843945,0.0,0.8,0.32217,0.000534,...,7.324806,7.326184,0.001070,0.000017,7.324853,7.326184,213,23,7.326184,7.326184
495572,23,2019-08-01 23:00:00,17.945001,796.182785,-59.448422,-13.843945,0.0,0.8,0.32217,0.000534,...,6.297760,6.298671,0.000686,0.000020,6.297841,6.298671,213,23,6.298671,6.298671
495573,24,2019-08-01 23:00:00,17.945001,796.182785,-59.448422,-13.843945,0.0,0.8,0.32217,0.000534,...,5.282023,5.282477,0.000305,0.000029,5.282023,5.282477,213,23,5.282477,5.282477


In [22]:
temp_initial00_df = pd.read_csv("./../../output/py_temp_initial00.csv")

temp_initial00_df = temp_initial00_df[1:] # considering everything from 2nd time step

flattened_temp = temp_initial00_df.iloc[:,1:].to_numpy().flatten()
time_stamp = temp_initial00_df['time'].repeat(depth_steps).values

data = {'time':time_stamp, 'temp_initial00':flattened_temp, 'depth':depth_list}
temp_initial00_df = pd.DataFrame(data=data)

temp_initial00_df

Unnamed: 0,time,temp_initial00,depth
0,2006-06-04 01:00:00,15.489510,1
1,2006-06-04 01:00:00,15.448078,2
2,2006-06-04 01:00:00,15.376617,3
3,2006-06-04 01:00:00,15.287987,4
4,2006-06-04 01:00:00,15.195121,5
...,...,...,...
495570,2019-08-01 23:00:00,8.373683,21
495571,2019-08-01 23:00:00,7.324806,22
495572,2019-08-01 23:00:00,6.297760,23
495573,2019-08-01 23:00:00,5.282023,24


In [23]:
final_df = final_df.merge(temp_initial00_df, how='inner', on=['time','depth'])
final_df

Unnamed: 0,depth,time,AirTemp_degC,Longwave_Wm-2,Latent_Wm-2,Sensible_Wm-2,Shortwave_Wm-2,lightExtinct_m-1,ShearVelocity_mS-1,ShearStress_Nm-2,...,temp_total05,buoyancy,diffusivity,temp_heat01,temp_diff02,day_of_year,time_of_day,temp_mix03,temp_conv04,temp_initial00
0,1,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,...,15.426904,0.000267,0.000037,15.416676,15.416676,155,1,15.426904,15.426904,15.489510
1,2,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,...,15.401481,0.000267,0.000031,15.448083,15.437735,155,1,15.401481,15.401481,15.448078
2,3,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,...,15.346109,0.000329,0.000028,15.376622,15.374550,155,1,15.346109,15.346109,15.376617
3,4,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,...,15.272596,0.000343,0.000028,15.287991,15.287547,155,1,15.272596,15.272596,15.287987
4,5,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,...,15.193004,0.000315,0.000029,15.195124,15.195829,155,1,15.193004,15.193004,15.195121
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495570,21,2019-08-01 23:00:00,17.945001,796.182785,-59.448422,-13.843945,0.0,0.8,0.32217,0.000534,...,8.375543,0.001471,0.000015,8.373718,8.375543,213,23,8.375543,8.375543,8.373683
495571,22,2019-08-01 23:00:00,17.945001,796.182785,-59.448422,-13.843945,0.0,0.8,0.32217,0.000534,...,7.326184,0.001070,0.000017,7.324853,7.326184,213,23,7.326184,7.326184,7.324806
495572,23,2019-08-01 23:00:00,17.945001,796.182785,-59.448422,-13.843945,0.0,0.8,0.32217,0.000534,...,6.298671,0.000686,0.000020,6.297841,6.298671,213,23,6.298671,6.298671,6.297760
495573,24,2019-08-01 23:00:00,17.945001,796.182785,-59.448422,-13.843945,0.0,0.8,0.32217,0.000534,...,5.282477,0.000305,0.000029,5.282023,5.282477,213,23,5.282477,5.282477,5.282023


In [24]:
temp_obs_df = pd.read_csv("./../../output/py_observed_temp.csv")

temp_obs_df = temp_obs_df[1:] # considering everything from 2nd time step

flattened_temp = temp_obs_df.iloc[:,1:].to_numpy().flatten()
time_stamp = temp_obs_df['time'].repeat(depth_steps).values

data = {'time':time_stamp, 'obs_temp':flattened_temp, 'depth':depth_list}
temp_obs_df = pd.DataFrame(data=data)

temp_obs_df

print(flattened_temp.shape)

print(time_stamp.shape)

print(depth_list.shape)

(495575,)
(495575,)
(495575,)


In [25]:
final_df = final_df.merge(temp_obs_df, how='inner', on=['time','depth'])
final_df

Unnamed: 0,depth,time,AirTemp_degC,Longwave_Wm-2,Latent_Wm-2,Sensible_Wm-2,Shortwave_Wm-2,lightExtinct_m-1,ShearVelocity_mS-1,ShearStress_Nm-2,...,buoyancy,diffusivity,temp_heat01,temp_diff02,day_of_year,time_of_day,temp_mix03,temp_conv04,temp_initial00,obs_temp
0,1,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,...,0.000267,0.000037,15.416676,15.416676,155,1,15.426904,15.426904,15.489510,22.279
1,2,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,...,0.000267,0.000031,15.448083,15.437735,155,1,15.401481,15.401481,15.448078,22.295
2,3,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,...,0.000329,0.000028,15.376622,15.374550,155,1,15.346109,15.346109,15.376617,22.091
3,4,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,...,0.000343,0.000028,15.287991,15.287547,155,1,15.272596,15.272596,15.287987,22.296
4,5,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,...,0.000315,0.000029,15.195124,15.195829,155,1,15.193004,15.193004,15.195121,22.231
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495570,21,2019-08-01 23:00:00,17.945001,796.182785,-59.448422,-13.843945,0.0,0.8,0.32217,0.000534,...,0.001471,0.000015,8.373718,8.375543,213,23,8.375543,8.375543,8.373683,11.099
495571,22,2019-08-01 23:00:00,17.945001,796.182785,-59.448422,-13.843945,0.0,0.8,0.32217,0.000534,...,0.001070,0.000017,7.324853,7.326184,213,23,7.326184,7.326184,7.324806,11.099
495572,23,2019-08-01 23:00:00,17.945001,796.182785,-59.448422,-13.843945,0.0,0.8,0.32217,0.000534,...,0.000686,0.000020,6.297841,6.298671,213,23,6.298671,6.298671,6.297760,11.099
495573,24,2019-08-01 23:00:00,17.945001,796.182785,-59.448422,-13.843945,0.0,0.8,0.32217,0.000534,...,0.000305,0.000029,5.282023,5.282477,213,23,5.282477,5.282477,5.282023,11.099


In [26]:

#temp_obs_df2 = pd.read_csv("./../output/observed_temp.csv")
#temp_obs_df = pd.read_csv("./../output/observed_temp.csv")


#print(flattened_inp_temp_obs.shape)

#temp_obs_df2 = temp_obs_df2[:-1] # considering everything from 2nd time step

#flattened_inp_temp_obs = temp_obs_df2.iloc[:,1:].to_numpy().flatten()

#time_stamp = input_temp_df['time'][1:].repeat(depth_steps).values

#data = {'time':time_stamp, 'obs_input':flattened_inp_temp_obs, 'depth':depth_list}
#temp_obs_df2 = pd.DataFrame(data=data)

#temp_obs_df2


temp_obs_df = pd.read_csv("./../../output/py_observed_temp.csv")

flattened_inp_temp = temp_obs_df.iloc[:-1,1:].to_numpy().flatten() #this iloc is to remove the time column
print(flattened_inp_temp.shape)

time_stamp = temp_obs_df['time'][1:].repeat(depth_steps).values
print(time_stamp.shape)

print(depth_list.shape)
data = {'time':time_stamp, 'input_obs':flattened_inp_temp,'depth':depth_list}
temp_obs_df = pd.DataFrame(data=data)
temp_obs_df 

(495575,)
(495575,)
(495575,)


Unnamed: 0,time,input_obs,depth
0,2006-06-04 01:00:00,22.279,1
1,2006-06-04 01:00:00,22.295,2
2,2006-06-04 01:00:00,22.091,3
3,2006-06-04 01:00:00,22.296,4
4,2006-06-04 01:00:00,22.231,5
...,...,...,...
495570,2019-08-01 23:00:00,11.099,21
495571,2019-08-01 23:00:00,11.099,22
495572,2019-08-01 23:00:00,11.099,23
495573,2019-08-01 23:00:00,11.099,24


In [27]:
final_df = final_df.merge(temp_obs_df, how='inner', on=['time','depth'])
final_df

Unnamed: 0,depth,time,AirTemp_degC,Longwave_Wm-2,Latent_Wm-2,Sensible_Wm-2,Shortwave_Wm-2,lightExtinct_m-1,ShearVelocity_mS-1,ShearStress_Nm-2,...,diffusivity,temp_heat01,temp_diff02,day_of_year,time_of_day,temp_mix03,temp_conv04,temp_initial00,obs_temp,input_obs
0,1,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,...,0.000037,15.416676,15.416676,155,1,15.426904,15.426904,15.489510,22.279,22.279
1,2,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,...,0.000031,15.448083,15.437735,155,1,15.401481,15.401481,15.448078,22.295,22.295
2,3,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,...,0.000028,15.376622,15.374550,155,1,15.346109,15.346109,15.376617,22.091,22.091
3,4,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,...,0.000028,15.287991,15.287547,155,1,15.272596,15.272596,15.287987,22.296,22.296
4,5,2006-06-04 01:00:00,13.965021,717.887954,-32.080993,-6.880394,0.0,0.8,1.803546,0.008386,...,0.000029,15.195124,15.195829,155,1,15.193004,15.193004,15.195121,22.231,22.231
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495570,21,2019-08-01 23:00:00,17.945001,796.182785,-59.448422,-13.843945,0.0,0.8,0.32217,0.000534,...,0.000015,8.373718,8.375543,213,23,8.375543,8.375543,8.373683,11.099,11.099
495571,22,2019-08-01 23:00:00,17.945001,796.182785,-59.448422,-13.843945,0.0,0.8,0.32217,0.000534,...,0.000017,7.324853,7.326184,213,23,7.326184,7.326184,7.324806,11.099,11.099
495572,23,2019-08-01 23:00:00,17.945001,796.182785,-59.448422,-13.843945,0.0,0.8,0.32217,0.000534,...,0.000020,6.297841,6.298671,213,23,6.298671,6.298671,6.297760,11.099,11.099
495573,24,2019-08-01 23:00:00,17.945001,796.182785,-59.448422,-13.843945,0.0,0.8,0.32217,0.000534,...,0.000029,5.282023,5.282477,213,23,5.282477,5.282477,5.282023,11.099,11.099


In [None]:
ice_data_df = pd.read_csv("./../../output/py_icesnow.csv")
ice_data_df = ice_data_df[1:] # considering everything from 2nd time step

num_time_steps = ice_data_df.shape[0]
depth_list = np.array(list(range(1, depth_steps+1)) * num_time_steps)
depth_df = pd.DataFrame(data={'depth':depth_list})

#repeating the dataframe depth_steps number of times
ice_data_df = pd.DataFrame(np.repeat(ice_data_df.values, depth_steps, axis=0), columns=ice_data_df.columns)
ice_data_df = pd.concat([depth_df, ice_data_df], ignore_index=False, axis=1)
print(ice_data_df)

final_df = final_df.merge(ice_data_df, how='inner', on=['time','depth'])
final_df

In [30]:
final_df.to_csv("./../02_training/all_data_lake_modeling_in_time.csv", index=False)

In [31]:
final_df.to_csv("all_data_lake_modeling_in_time.csv", index=False)