In [1]:
import numpy as np
import pandas as pd
import random

import torch
import torch.nn as nn
import torch.nn.functional as F

import seaborn as sns
import matplotlib.pyplot as plt
import os
from datetime import date
import datetime

  from .autonotebook import tqdm as notebook_tqdm


# Generating the training data for the Heat and Diffusion Model

In [2]:
data_dir = "./LakeModeling/1D_HeatMixing_PGDL/output/"
depth_steps = 25

print(os.getcwd())

C:\Users\ladwi\Documents\Projects\R\LakeModeling\1D_HeatMixing_PGDL\MCL


In [3]:
meterological_data_df = pd.read_csv("./../output/meteorology_input.csv")
meterological_data_df = meterological_data_df[1:] # considering everything from 2nd time step

num_time_steps = meterological_data_df.shape[0]
depth_list = np.array(list(range(1, depth_steps+1)) * num_time_steps)
depth_df = pd.DataFrame(data={'depth':depth_list})

#repeating the dataframe depth_steps number of times
meterological_data_df = pd.DataFrame(np.repeat(meterological_data_df.values, depth_steps, axis=0), columns=meterological_data_df.columns)
meterological_data_df = pd.concat([depth_df, meterological_data_df], ignore_index=False, axis=1)
meterological_data_df

Unnamed: 0,depth,time,AirTemp_degC,Longwave_Wm-2,Latent_Wm-2,Sensible_Wm-2,Shortwave_Wm-2,lightExtinct_m-1,ShearVelocity_mS-1,ShearStress_Nm-2,Area_m2
0,1,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,36000000.0
1,2,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,36000000.0
2,3,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,36000000.0
3,4,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,36000000.0
4,5,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,36000000.0
...,...,...,...,...,...,...,...,...,...,...,...
35370,21,2009-08-01 23:00:00,13.595026,718.54707,-230.901096,-40.903561,0.0,2.069661,2.343012,0.007849,36000000.0
35371,22,2009-08-01 23:00:00,13.595026,718.54707,-230.901096,-40.903561,0.0,2.069661,2.343012,0.007849,36000000.0
35372,23,2009-08-01 23:00:00,13.595026,718.54707,-230.901096,-40.903561,0.0,2.069661,2.343012,0.007849,36000000.0
35373,24,2009-08-01 23:00:00,13.595026,718.54707,-230.901096,-40.903561,0.0,2.069661,2.343012,0.007849,36000000.0


In [4]:
input_temp_df = pd.read_csv("./../output/temp_total04.csv")

flattened_inp_temp = input_temp_df.iloc[:-1,1:].to_numpy().flatten() #this iloc is to remove the time column
flattened_out_temp = input_temp_df.iloc[1:,1:].to_numpy().flatten() #this iloc is to remove the time column
time_stamp = input_temp_df['time'][1:].repeat(depth_steps).values
data = {'time':time_stamp, 'input_temp':flattened_inp_temp, 'temp_total04':flattened_out_temp, 'depth':depth_list}
input_temp_df = pd.DataFrame(data=data)
input_temp_df 

Unnamed: 0,time,input_temp,temp_total04,depth
0,2009-06-04 01:00:00,11.641386,11.570472,1
1,2009-06-04 01:00:00,11.650003,11.570472,2
2,2009-06-04 01:00:00,11.650003,11.575860,3
3,2009-06-04 01:00:00,11.394495,11.393058,4
4,2009-06-04 01:00:00,11.123800,11.130929,5
...,...,...,...,...
35370,2009-08-01 23:00:00,6.772400,6.773650,21
35371,2009-08-01 23:00:00,5.995832,5.996763,22
35372,2009-08-01 23:00:00,5.229428,5.230045,23
35373,2009-08-01 23:00:00,4.467800,4.468109,24


In [5]:
depth_list.shape

(35375,)

In [6]:
final_df = meterological_data_df.merge(input_temp_df, how='inner', on=['time','depth'])
final_df

Unnamed: 0,depth,time,AirTemp_degC,Longwave_Wm-2,Latent_Wm-2,Sensible_Wm-2,Shortwave_Wm-2,lightExtinct_m-1,ShearVelocity_mS-1,ShearStress_Nm-2,Area_m2,input_temp,temp_total04
0,1,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,36000000.0,11.641386,11.570472
1,2,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,36000000.0,11.650003,11.570472
2,3,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,36000000.0,11.650003,11.575860
3,4,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,36000000.0,11.394495,11.393058
4,5,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,36000000.0,11.123800,11.130929
...,...,...,...,...,...,...,...,...,...,...,...,...,...
35370,21,2009-08-01 23:00:00,13.595026,718.54707,-230.901096,-40.903561,0.0,2.069661,2.343012,0.007849,36000000.0,6.772400,6.773650
35371,22,2009-08-01 23:00:00,13.595026,718.54707,-230.901096,-40.903561,0.0,2.069661,2.343012,0.007849,36000000.0,5.995832,5.996763
35372,23,2009-08-01 23:00:00,13.595026,718.54707,-230.901096,-40.903561,0.0,2.069661,2.343012,0.007849,36000000.0,5.229428,5.230045
35373,24,2009-08-01 23:00:00,13.595026,718.54707,-230.901096,-40.903561,0.0,2.069661,2.343012,0.007849,36000000.0,4.467800,4.468109


In [7]:
buoyancy_data_df = pd.read_csv("./../output/buoyancy.csv")
#Removing Nans
buoyancy_data_df['n2S-2_1'] = buoyancy_data_df['n2S-2_2']
buoyancy_data_df['n2S-2_25'] = buoyancy_data_df['n2S-2_24']

buoyancy_data_df = buoyancy_data_df[1:] # considering everything from 2nd time step

flattened_buoy = buoyancy_data_df.iloc[:,1:].to_numpy().flatten()
time_stamp = buoyancy_data_df['time'].repeat(depth_steps).values

data = {'time':time_stamp, 'buoyancy':flattened_buoy, 'depth':depth_list}
buoyancy_data_df = pd.DataFrame(data=data)

buoyancy_data_df

Unnamed: 0,time,buoyancy,depth
0,2009-06-04 01:00:00,0.000000,1
1,2009-06-04 01:00:00,0.000000,2
2,2009-06-04 01:00:00,0.000271,3
3,2009-06-04 01:00:00,0.000278,4
4,2009-06-04 01:00:00,0.000185,5
...,...,...,...
35370,2009-08-01 23:00:00,0.000282,21
35371,2009-08-01 23:00:00,0.000191,22
35372,2009-08-01 23:00:00,0.000102,23
35373,2009-08-01 23:00:00,0.000013,24


In [8]:
final_df = final_df.merge(buoyancy_data_df, how='inner', on=['time','depth'])
final_df

Unnamed: 0,depth,time,AirTemp_degC,Longwave_Wm-2,Latent_Wm-2,Sensible_Wm-2,Shortwave_Wm-2,lightExtinct_m-1,ShearVelocity_mS-1,ShearStress_Nm-2,Area_m2,input_temp,temp_total04,buoyancy
0,1,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,36000000.0,11.641386,11.570472,0.000000
1,2,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,36000000.0,11.650003,11.570472,0.000000
2,3,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,36000000.0,11.650003,11.575860,0.000271
3,4,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,36000000.0,11.394495,11.393058,0.000278
4,5,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,36000000.0,11.123800,11.130929,0.000185
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35370,21,2009-08-01 23:00:00,13.595026,718.54707,-230.901096,-40.903561,0.0,2.069661,2.343012,0.007849,36000000.0,6.772400,6.773650,0.000282
35371,22,2009-08-01 23:00:00,13.595026,718.54707,-230.901096,-40.903561,0.0,2.069661,2.343012,0.007849,36000000.0,5.995832,5.996763,0.000191
35372,23,2009-08-01 23:00:00,13.595026,718.54707,-230.901096,-40.903561,0.0,2.069661,2.343012,0.007849,36000000.0,5.229428,5.230045,0.000102
35373,24,2009-08-01 23:00:00,13.595026,718.54707,-230.901096,-40.903561,0.0,2.069661,2.343012,0.007849,36000000.0,4.467800,4.468109,0.000013


In [9]:
out_diffusivity_df = pd.read_csv("./../output/diff.csv")

out_diffusivity_df = out_diffusivity_df[1:] # considering everything from 2nd time step

flattened_diff = out_diffusivity_df.iloc[:,1:].to_numpy().flatten()
time_stamp = out_diffusivity_df['time'].repeat(depth_steps).values

data = {'time':time_stamp, 'diffusivity':flattened_diff, 'depth':depth_list}
out_diffusivity_df = pd.DataFrame(data=data)

out_diffusivity_df

Unnamed: 0,time,diffusivity,depth
0,2009-06-04 01:00:00,0.000037,1
1,2009-06-04 01:00:00,0.000037,2
2,2009-06-04 01:00:00,0.000021,3
3,2009-06-04 01:00:00,0.000021,4
4,2009-06-04 01:00:00,0.000024,5
...,...,...,...
35370,2009-08-01 23:00:00,0.000020,21
35371,2009-08-01 23:00:00,0.000024,22
35372,2009-08-01 23:00:00,0.000032,23
35373,2009-08-01 23:00:00,0.000037,24


In [10]:
final_df = final_df.merge(out_diffusivity_df, how='inner', on=['time','depth'])
final_df

Unnamed: 0,depth,time,AirTemp_degC,Longwave_Wm-2,Latent_Wm-2,Sensible_Wm-2,Shortwave_Wm-2,lightExtinct_m-1,ShearVelocity_mS-1,ShearStress_Nm-2,Area_m2,input_temp,temp_total04,buoyancy,diffusivity
0,1,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,36000000.0,11.641386,11.570472,0.000000,0.000037
1,2,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,36000000.0,11.650003,11.570472,0.000000,0.000037
2,3,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,36000000.0,11.650003,11.575860,0.000271,0.000021
3,4,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,36000000.0,11.394495,11.393058,0.000278,0.000021
4,5,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,36000000.0,11.123800,11.130929,0.000185,0.000024
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35370,21,2009-08-01 23:00:00,13.595026,718.54707,-230.901096,-40.903561,0.0,2.069661,2.343012,0.007849,36000000.0,6.772400,6.773650,0.000282,0.000020
35371,22,2009-08-01 23:00:00,13.595026,718.54707,-230.901096,-40.903561,0.0,2.069661,2.343012,0.007849,36000000.0,5.995832,5.996763,0.000191,0.000024
35372,23,2009-08-01 23:00:00,13.595026,718.54707,-230.901096,-40.903561,0.0,2.069661,2.343012,0.007849,36000000.0,5.229428,5.230045,0.000102,0.000032
35373,24,2009-08-01 23:00:00,13.595026,718.54707,-230.901096,-40.903561,0.0,2.069661,2.343012,0.007849,36000000.0,4.467800,4.468109,0.000013,0.000037


In [11]:
out_temp_df = pd.read_csv("./../output/temp_heat00.csv")

out_temp_df = out_temp_df[1:] # considering everything from 2nd time step

flattened_temp = out_temp_df.iloc[:,1:].to_numpy().flatten()
time_stamp = out_temp_df['time'].repeat(depth_steps).values

data = {'time':time_stamp, 'temp_heat00':flattened_temp, 'depth':depth_list}
out_temp_df = pd.DataFrame(data=data)

out_temp_df

Unnamed: 0,time,temp_heat00,depth
0,2009-06-04 01:00:00,11.467275,1
1,2009-06-04 01:00:00,11.650008,2
2,2009-06-04 01:00:00,11.650008,3
3,2009-06-04 01:00:00,11.394500,4
4,2009-06-04 01:00:00,11.123803,5
...,...,...,...
35370,2009-08-01 23:00:00,6.772435,21
35371,2009-08-01 23:00:00,5.995879,22
35372,2009-08-01 23:00:00,5.229508,23
35373,2009-08-01 23:00:00,4.467800,24


In [12]:
final_df = final_df.merge(out_temp_df, how='inner', on=['time','depth'])
final_df

Unnamed: 0,depth,time,AirTemp_degC,Longwave_Wm-2,Latent_Wm-2,Sensible_Wm-2,Shortwave_Wm-2,lightExtinct_m-1,ShearVelocity_mS-1,ShearStress_Nm-2,Area_m2,input_temp,temp_total04,buoyancy,diffusivity,temp_heat00
0,1,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,36000000.0,11.641386,11.570472,0.000000,0.000037,11.467275
1,2,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,36000000.0,11.650003,11.570472,0.000000,0.000037,11.650008
2,3,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,36000000.0,11.650003,11.575860,0.000271,0.000021,11.650008
3,4,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,36000000.0,11.394495,11.393058,0.000278,0.000021,11.394500
4,5,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,36000000.0,11.123800,11.130929,0.000185,0.000024,11.123803
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35370,21,2009-08-01 23:00:00,13.595026,718.54707,-230.901096,-40.903561,0.0,2.069661,2.343012,0.007849,36000000.0,6.772400,6.773650,0.000282,0.000020,6.772435
35371,22,2009-08-01 23:00:00,13.595026,718.54707,-230.901096,-40.903561,0.0,2.069661,2.343012,0.007849,36000000.0,5.995832,5.996763,0.000191,0.000024,5.995879
35372,23,2009-08-01 23:00:00,13.595026,718.54707,-230.901096,-40.903561,0.0,2.069661,2.343012,0.007849,36000000.0,5.229428,5.230045,0.000102,0.000032,5.229508
35373,24,2009-08-01 23:00:00,13.595026,718.54707,-230.901096,-40.903561,0.0,2.069661,2.343012,0.007849,36000000.0,4.467800,4.468109,0.000013,0.000037,4.467800


In [13]:
out_temp_df = pd.read_csv("./../output/temp_diff01.csv")

out_temp_df = out_temp_df[1:] # considering everything from 2nd time step

flattened_temp = out_temp_df.iloc[:,1:].to_numpy().flatten()
time_stamp = out_temp_df['time'].repeat(depth_steps).values

data = {'time':time_stamp, 'temp_diff01':flattened_temp, 'depth':depth_list}
out_temp_df = pd.DataFrame(data=data)

out_temp_df

Unnamed: 0,time,temp_diff01,depth
0,2009-06-04 01:00:00,11.467275,1
1,2009-06-04 01:00:00,11.627332,2
2,2009-06-04 01:00:00,11.631393,3
3,2009-06-04 01:00:00,11.393058,4
4,2009-06-04 01:00:00,11.130929,5
...,...,...,...
35370,2009-08-01 23:00:00,6.773650,21
35371,2009-08-01 23:00:00,5.996763,22
35372,2009-08-01 23:00:00,5.230045,23
35373,2009-08-01 23:00:00,4.468109,24


In [14]:
final_df = final_df.merge(out_temp_df, how='inner', on=['time','depth'])
final_df

Unnamed: 0,depth,time,AirTemp_degC,Longwave_Wm-2,Latent_Wm-2,Sensible_Wm-2,Shortwave_Wm-2,lightExtinct_m-1,ShearVelocity_mS-1,ShearStress_Nm-2,Area_m2,input_temp,temp_total04,buoyancy,diffusivity,temp_heat00,temp_diff01
0,1,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,36000000.0,11.641386,11.570472,0.000000,0.000037,11.467275,11.467275
1,2,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,36000000.0,11.650003,11.570472,0.000000,0.000037,11.650008,11.627332
2,3,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,36000000.0,11.650003,11.575860,0.000271,0.000021,11.650008,11.631393
3,4,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,36000000.0,11.394495,11.393058,0.000278,0.000021,11.394500,11.393058
4,5,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,36000000.0,11.123800,11.130929,0.000185,0.000024,11.123803,11.130929
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35370,21,2009-08-01 23:00:00,13.595026,718.54707,-230.901096,-40.903561,0.0,2.069661,2.343012,0.007849,36000000.0,6.772400,6.773650,0.000282,0.000020,6.772435,6.773650
35371,22,2009-08-01 23:00:00,13.595026,718.54707,-230.901096,-40.903561,0.0,2.069661,2.343012,0.007849,36000000.0,5.995832,5.996763,0.000191,0.000024,5.995879,5.996763
35372,23,2009-08-01 23:00:00,13.595026,718.54707,-230.901096,-40.903561,0.0,2.069661,2.343012,0.007849,36000000.0,5.229428,5.230045,0.000102,0.000032,5.229508,5.230045
35373,24,2009-08-01 23:00:00,13.595026,718.54707,-230.901096,-40.903561,0.0,2.069661,2.343012,0.007849,36000000.0,4.467800,4.468109,0.000013,0.000037,4.467800,4.468109


In [15]:
datetime_list =[datetime.datetime.strptime(date, '%Y-%m-%d %H:%M:%S') for date in final_df['time']]
day_of_year_list = [t.timetuple().tm_yday for t in datetime_list]
time_of_day_list = [t.hour for t in datetime_list]

In [16]:
final_df['day_of_year']=day_of_year_list
final_df['time_of_day']=time_of_day_list

In [17]:
final_df

Unnamed: 0,depth,time,AirTemp_degC,Longwave_Wm-2,Latent_Wm-2,Sensible_Wm-2,Shortwave_Wm-2,lightExtinct_m-1,ShearVelocity_mS-1,ShearStress_Nm-2,Area_m2,input_temp,temp_total04,buoyancy,diffusivity,temp_heat00,temp_diff01,day_of_year,time_of_day
0,1,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,36000000.0,11.641386,11.570472,0.000000,0.000037,11.467275,11.467275,155,1
1,2,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,36000000.0,11.650003,11.570472,0.000000,0.000037,11.650008,11.627332,155,1
2,3,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,36000000.0,11.650003,11.575860,0.000271,0.000021,11.650008,11.631393,155,1
3,4,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,36000000.0,11.394495,11.393058,0.000278,0.000021,11.394500,11.393058,155,1
4,5,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,36000000.0,11.123800,11.130929,0.000185,0.000024,11.123803,11.130929,155,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35370,21,2009-08-01 23:00:00,13.595026,718.54707,-230.901096,-40.903561,0.0,2.069661,2.343012,0.007849,36000000.0,6.772400,6.773650,0.000282,0.000020,6.772435,6.773650,213,23
35371,22,2009-08-01 23:00:00,13.595026,718.54707,-230.901096,-40.903561,0.0,2.069661,2.343012,0.007849,36000000.0,5.995832,5.996763,0.000191,0.000024,5.995879,5.996763,213,23
35372,23,2009-08-01 23:00:00,13.595026,718.54707,-230.901096,-40.903561,0.0,2.069661,2.343012,0.007849,36000000.0,5.229428,5.230045,0.000102,0.000032,5.229508,5.230045,213,23
35373,24,2009-08-01 23:00:00,13.595026,718.54707,-230.901096,-40.903561,0.0,2.069661,2.343012,0.007849,36000000.0,4.467800,4.468109,0.000013,0.000037,4.467800,4.468109,213,23


In [18]:
temp_mix02_df = pd.read_csv("./../output/temp_mix02.csv")

temp_mix02_df = temp_mix02_df[1:] # considering everything from 2nd time step

flattened_temp = temp_mix02_df.iloc[:,1:].to_numpy().flatten()
time_stamp = temp_mix02_df['time'].repeat(depth_steps).values

data = {'time':time_stamp, 'temp_mix02':flattened_temp, 'depth':depth_list}
temp_mix02_df = pd.DataFrame(data=data)

temp_mix02_df

Unnamed: 0,time,temp_mix02,depth
0,2009-06-04 01:00:00,11.545011,1
1,2009-06-04 01:00:00,11.545011,2
2,2009-06-04 01:00:00,11.631393,3
3,2009-06-04 01:00:00,11.393058,4
4,2009-06-04 01:00:00,11.130929,5
...,...,...,...
35370,2009-08-01 23:00:00,6.773650,21
35371,2009-08-01 23:00:00,5.996763,22
35372,2009-08-01 23:00:00,5.230045,23
35373,2009-08-01 23:00:00,4.468109,24


In [19]:
final_df = final_df.merge(temp_mix02_df, how='inner', on=['time','depth'])
final_df

Unnamed: 0,depth,time,AirTemp_degC,Longwave_Wm-2,Latent_Wm-2,Sensible_Wm-2,Shortwave_Wm-2,lightExtinct_m-1,ShearVelocity_mS-1,ShearStress_Nm-2,Area_m2,input_temp,temp_total04,buoyancy,diffusivity,temp_heat00,temp_diff01,day_of_year,time_of_day,temp_mix02
0,1,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,36000000.0,11.641386,11.570472,0.000000,0.000037,11.467275,11.467275,155,1,11.545011
1,2,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,36000000.0,11.650003,11.570472,0.000000,0.000037,11.650008,11.627332,155,1,11.545011
2,3,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,36000000.0,11.650003,11.575860,0.000271,0.000021,11.650008,11.631393,155,1,11.631393
3,4,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,36000000.0,11.394495,11.393058,0.000278,0.000021,11.394500,11.393058,155,1,11.393058
4,5,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,36000000.0,11.123800,11.130929,0.000185,0.000024,11.123803,11.130929,155,1,11.130929
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35370,21,2009-08-01 23:00:00,13.595026,718.54707,-230.901096,-40.903561,0.0,2.069661,2.343012,0.007849,36000000.0,6.772400,6.773650,0.000282,0.000020,6.772435,6.773650,213,23,6.773650
35371,22,2009-08-01 23:00:00,13.595026,718.54707,-230.901096,-40.903561,0.0,2.069661,2.343012,0.007849,36000000.0,5.995832,5.996763,0.000191,0.000024,5.995879,5.996763,213,23,5.996763
35372,23,2009-08-01 23:00:00,13.595026,718.54707,-230.901096,-40.903561,0.0,2.069661,2.343012,0.007849,36000000.0,5.229428,5.230045,0.000102,0.000032,5.229508,5.230045,213,23,5.230045
35373,24,2009-08-01 23:00:00,13.595026,718.54707,-230.901096,-40.903561,0.0,2.069661,2.343012,0.007849,36000000.0,4.467800,4.468109,0.000013,0.000037,4.467800,4.468109,213,23,4.468109


In [20]:
temp_conv03_df = pd.read_csv("./../output/temp_conv03.csv")

temp_conv03_df = temp_conv03_df[1:] # considering everything from 2nd time step

flattened_temp = temp_conv03_df.iloc[:,1:].to_numpy().flatten()
time_stamp = temp_conv03_df['time'].repeat(depth_steps).values

data = {'time':time_stamp, 'temp_conv03':flattened_temp, 'depth':depth_list}
temp_conv03_df = pd.DataFrame(data=data)

temp_conv03_df

Unnamed: 0,time,temp_conv03,depth
0,2009-06-04 01:00:00,11.570472,1
1,2009-06-04 01:00:00,11.570472,2
2,2009-06-04 01:00:00,11.575860,3
3,2009-06-04 01:00:00,11.393058,4
4,2009-06-04 01:00:00,11.130929,5
...,...,...,...
35370,2009-08-01 23:00:00,6.773650,21
35371,2009-08-01 23:00:00,5.996763,22
35372,2009-08-01 23:00:00,5.230045,23
35373,2009-08-01 23:00:00,4.468109,24


In [21]:
final_df = final_df.merge(temp_conv03_df, how='inner', on=['time','depth'])
final_df

Unnamed: 0,depth,time,AirTemp_degC,Longwave_Wm-2,Latent_Wm-2,Sensible_Wm-2,Shortwave_Wm-2,lightExtinct_m-1,ShearVelocity_mS-1,ShearStress_Nm-2,...,input_temp,temp_total04,buoyancy,diffusivity,temp_heat00,temp_diff01,day_of_year,time_of_day,temp_mix02,temp_conv03
0,1,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,...,11.641386,11.570472,0.000000,0.000037,11.467275,11.467275,155,1,11.545011,11.570472
1,2,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,...,11.650003,11.570472,0.000000,0.000037,11.650008,11.627332,155,1,11.545011,11.570472
2,3,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,...,11.650003,11.575860,0.000271,0.000021,11.650008,11.631393,155,1,11.631393,11.575860
3,4,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,...,11.394495,11.393058,0.000278,0.000021,11.394500,11.393058,155,1,11.393058,11.393058
4,5,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,...,11.123800,11.130929,0.000185,0.000024,11.123803,11.130929,155,1,11.130929,11.130929
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35370,21,2009-08-01 23:00:00,13.595026,718.54707,-230.901096,-40.903561,0.0,2.069661,2.343012,0.007849,...,6.772400,6.773650,0.000282,0.000020,6.772435,6.773650,213,23,6.773650,6.773650
35371,22,2009-08-01 23:00:00,13.595026,718.54707,-230.901096,-40.903561,0.0,2.069661,2.343012,0.007849,...,5.995832,5.996763,0.000191,0.000024,5.995879,5.996763,213,23,5.996763,5.996763
35372,23,2009-08-01 23:00:00,13.595026,718.54707,-230.901096,-40.903561,0.0,2.069661,2.343012,0.007849,...,5.229428,5.230045,0.000102,0.000032,5.229508,5.230045,213,23,5.230045,5.230045
35373,24,2009-08-01 23:00:00,13.595026,718.54707,-230.901096,-40.903561,0.0,2.069661,2.343012,0.007849,...,4.467800,4.468109,0.000013,0.000037,4.467800,4.468109,213,23,4.468109,4.468109


In [22]:
temp_obs_df = pd.read_csv("./../output/observed_temp.csv")

temp_obs_df = temp_obs_df[1:] # considering everything from 2nd time step

flattened_temp = temp_obs_df.iloc[:,1:].to_numpy().flatten()
time_stamp = temp_obs_df['time'].repeat(depth_steps).values

data = {'time':time_stamp, 'obs_temp':flattened_temp, 'depth':depth_list}
temp_obs_df = pd.DataFrame(data=data)

temp_obs_df

print(flattened_temp.shape)

print(time_stamp.shape)

print(depth_list.shape)

(35375,)
(35375,)
(35375,)


In [23]:
final_df = final_df.merge(temp_obs_df, how='inner', on=['time','depth'])
final_df

Unnamed: 0,depth,time,AirTemp_degC,Longwave_Wm-2,Latent_Wm-2,Sensible_Wm-2,Shortwave_Wm-2,lightExtinct_m-1,ShearVelocity_mS-1,ShearStress_Nm-2,...,temp_total04,buoyancy,diffusivity,temp_heat00,temp_diff01,day_of_year,time_of_day,temp_mix02,temp_conv03,obs_temp
0,1,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,...,11.570472,0.000000,0.000037,11.467275,11.467275,155,1,11.545011,11.570472,16.409
1,2,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,...,11.570472,0.000000,0.000037,11.650008,11.627332,155,1,11.545011,11.570472,16.480
2,3,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,...,11.575860,0.000271,0.000021,11.650008,11.631393,155,1,11.631393,11.575860,16.130
3,4,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,...,11.393058,0.000278,0.000021,11.394500,11.393058,155,1,11.393058,11.393058,15.827
4,5,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,...,11.130929,0.000185,0.000024,11.123803,11.130929,155,1,11.130929,11.130929,16.270
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35370,21,2009-08-01 23:00:00,13.595026,718.54707,-230.901096,-40.903561,0.0,2.069661,2.343012,0.007849,...,6.773650,0.000282,0.000020,6.772435,6.773650,213,23,6.773650,6.773650,12.204
35371,22,2009-08-01 23:00:00,13.595026,718.54707,-230.901096,-40.903561,0.0,2.069661,2.343012,0.007849,...,5.996763,0.000191,0.000024,5.995879,5.996763,213,23,5.996763,5.996763,12.204
35372,23,2009-08-01 23:00:00,13.595026,718.54707,-230.901096,-40.903561,0.0,2.069661,2.343012,0.007849,...,5.230045,0.000102,0.000032,5.229508,5.230045,213,23,5.230045,5.230045,12.204
35373,24,2009-08-01 23:00:00,13.595026,718.54707,-230.901096,-40.903561,0.0,2.069661,2.343012,0.007849,...,4.468109,0.000013,0.000037,4.467800,4.468109,213,23,4.468109,4.468109,12.204


In [24]:

#temp_obs_df2 = pd.read_csv("./../output/observed_temp.csv")
#temp_obs_df = pd.read_csv("./../output/observed_temp.csv")


#print(flattened_inp_temp_obs.shape)

#temp_obs_df2 = temp_obs_df2[:-1] # considering everything from 2nd time step

#flattened_inp_temp_obs = temp_obs_df2.iloc[:,1:].to_numpy().flatten()

#time_stamp = input_temp_df['time'][1:].repeat(depth_steps).values

#data = {'time':time_stamp, 'obs_input':flattened_inp_temp_obs, 'depth':depth_list}
#temp_obs_df2 = pd.DataFrame(data=data)

#temp_obs_df2


temp_obs_df = pd.read_csv("./../output/observed_temp.csv")

flattened_inp_temp = temp_obs_df.iloc[:-1,1:].to_numpy().flatten() #this iloc is to remove the time column
print(flattened_inp_temp.shape)

time_stamp = temp_obs_df['time'][1:].repeat(depth_steps).values
print(time_stamp.shape)

print(depth_list.shape)
data = {'time':time_stamp, 'input_obs':flattened_inp_temp,'depth':depth_list}
temp_obs_df = pd.DataFrame(data=data)
temp_obs_df 

(35375,)
(35375,)
(35375,)


Unnamed: 0,time,input_obs,depth
0,2009-06-04 01:00:00,16.350,1
1,2009-06-04 01:00:00,16.426,2
2,2009-06-04 01:00:00,16.088,3
3,2009-06-04 01:00:00,15.789,4
4,2009-06-04 01:00:00,16.240,5
...,...,...,...
35370,2009-08-01 23:00:00,12.204,21
35371,2009-08-01 23:00:00,12.204,22
35372,2009-08-01 23:00:00,12.204,23
35373,2009-08-01 23:00:00,12.204,24


In [25]:
final_df = final_df.merge(temp_obs_df, how='inner', on=['time','depth'])
final_df

Unnamed: 0,depth,time,AirTemp_degC,Longwave_Wm-2,Latent_Wm-2,Sensible_Wm-2,Shortwave_Wm-2,lightExtinct_m-1,ShearVelocity_mS-1,ShearStress_Nm-2,...,buoyancy,diffusivity,temp_heat00,temp_diff01,day_of_year,time_of_day,temp_mix02,temp_conv03,obs_temp,input_obs
0,1,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,...,0.000000,0.000037,11.467275,11.467275,155,1,11.545011,11.570472,16.409,16.350
1,2,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,...,0.000000,0.000037,11.650008,11.627332,155,1,11.545011,11.570472,16.480,16.426
2,3,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,...,0.000271,0.000021,11.650008,11.631393,155,1,11.631393,11.575860,16.130,16.088
3,4,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,...,0.000278,0.000021,11.394500,11.393058,155,1,11.393058,11.393058,15.827,15.789
4,5,2009-06-04 01:00:00,10.715021,678.292163,-152.775961,-4.194743,0.0,0.255324,1.085796,0.00229,...,0.000185,0.000024,11.123803,11.130929,155,1,11.130929,11.130929,16.270,16.240
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35370,21,2009-08-01 23:00:00,13.595026,718.54707,-230.901096,-40.903561,0.0,2.069661,2.343012,0.007849,...,0.000282,0.000020,6.772435,6.773650,213,23,6.773650,6.773650,12.204,12.204
35371,22,2009-08-01 23:00:00,13.595026,718.54707,-230.901096,-40.903561,0.0,2.069661,2.343012,0.007849,...,0.000191,0.000024,5.995879,5.996763,213,23,5.996763,5.996763,12.204,12.204
35372,23,2009-08-01 23:00:00,13.595026,718.54707,-230.901096,-40.903561,0.0,2.069661,2.343012,0.007849,...,0.000102,0.000032,5.229508,5.230045,213,23,5.230045,5.230045,12.204,12.204
35373,24,2009-08-01 23:00:00,13.595026,718.54707,-230.901096,-40.903561,0.0,2.069661,2.343012,0.007849,...,0.000013,0.000037,4.467800,4.468109,213,23,4.468109,4.468109,12.204,12.204


In [26]:
final_df.to_csv("all_data_lake_modeling_in_time_wHeat_Oct.csv", index=False)