In [1]:
import numpy as np
import pandas as pd
import random

import torch
import torch.nn as nn
import torch.nn.functional as F

import seaborn as sns
import matplotlib.pyplot as plt
import os
from datetime import date
import datetime

ModuleNotFoundError: No module named 'torch'

# Generating the training data for the Heat and Diffusion Model

In [2]:
data_dir = "./LakeModeling/1D_HeatMixing_PGDL/output/"
depth_steps = 25

In [3]:
meterological_data_df = pd.read_csv("./LakeModeling/1D_HeatMixing_PGDL/output/meteorology_input.csv")
meterological_data_df = meterological_data_df[1:] # considering everything from 2nd time step

num_time_steps = meterological_data_df.shape[0]
depth_list = np.array(list(range(1, depth_steps+1)) * num_time_steps)
depth_df = pd.DataFrame(data={'depth':depth_list})

#repeating the dataframe depth_steps number of times
meterological_data_df = pd.DataFrame(np.repeat(meterological_data_df.values, depth_steps, axis=0), columns=meterological_data_df.columns)
meterological_data_df = pd.concat([depth_df, meterological_data_df], ignore_index=False, axis=1)
meterological_data_df

Unnamed: 0,depth,time,AirTemp_degC,Longwave_Wm-2,Latent_Wm-2,Sensible_Wm-2,Shortwave_Wm-2,lightExtinct_m-1,ShearVelocity_mS-1,ShearStress_Nm-2,Area_m2
0,1,2009-06-04 15:00:00,20.0671,700.347,-100.233,22.5336,695.447,0.248168,1.05481,0.00218706,3.6e+07
1,2,2009-06-04 15:00:00,20.0671,700.347,-100.233,22.5336,695.447,0.248168,1.05481,0.00218706,3.6e+07
2,3,2009-06-04 15:00:00,20.0671,700.347,-100.233,22.5336,695.447,0.248168,1.05481,0.00218706,3.6e+07
3,4,2009-06-04 15:00:00,20.0671,700.347,-100.233,22.5336,695.447,0.248168,1.05481,0.00218706,3.6e+07
4,5,2009-06-04 15:00:00,20.0671,700.347,-100.233,22.5336,695.447,0.248168,1.05481,0.00218706,3.6e+07
...,...,...,...,...,...,...,...,...,...,...,...
53170,21,2009-09-01 05:00:00,8.1588,715.639,-243.644,-72.05,0,1.41667,1.67207,0.00457409,3.6e+07
53171,22,2009-09-01 05:00:00,8.1588,715.639,-243.644,-72.05,0,1.41667,1.67207,0.00457409,3.6e+07
53172,23,2009-09-01 05:00:00,8.1588,715.639,-243.644,-72.05,0,1.41667,1.67207,0.00457409,3.6e+07
53173,24,2009-09-01 05:00:00,8.1588,715.639,-243.644,-72.05,0,1.41667,1.67207,0.00457409,3.6e+07


In [4]:
input_temp_df = pd.read_csv(data_dir+"temp_total04.csv")

flattened_inp_temp = input_temp_df.iloc[:-1,1:].to_numpy().flatten() #this iloc is to remove the time column
flattened_out_temp = input_temp_df.iloc[1:,1:].to_numpy().flatten() #this iloc is to remove the time column
time_stamp = input_temp_df['time'][1:].repeat(depth_steps).values
data = {'time':time_stamp, 'input_temp':flattened_inp_temp, 'temp_total04':flattened_out_temp, 'depth':depth_list}
input_temp_df = pd.DataFrame(data=data)
input_temp_df 

Unnamed: 0,time,input_temp,temp_total04,depth
0,2009-06-04 15:00:00,13.110569,13.501710,1
1,2009-06-04 15:00:00,11.806688,11.907665,2
2,2009-06-04 15:00:00,11.306141,11.368285,3
3,2009-06-04 15:00:00,11.048210,11.092567,4
4,2009-06-04 15:00:00,10.896431,10.928626,5
...,...,...,...,...
53170,2009-09-01 05:00:00,8.682822,8.683785,21
53171,2009-09-01 05:00:00,7.425032,7.425783,22
53172,2009-09-01 05:00:00,6.179781,6.180297,23
53173,2009-09-01 05:00:00,4.940607,4.940868,24


In [5]:
depth_list.shape

(53175,)

In [6]:
final_df = meterological_data_df.merge(input_temp_df, how='inner', on=['time','depth'])
final_df

Unnamed: 0,depth,time,AirTemp_degC,Longwave_Wm-2,Latent_Wm-2,Sensible_Wm-2,Shortwave_Wm-2,lightExtinct_m-1,ShearVelocity_mS-1,ShearStress_Nm-2,Area_m2,input_temp,temp_total04
0,1,2009-06-04 15:00:00,20.0671,700.347,-100.233,22.5336,695.447,0.248168,1.05481,0.00218706,3.6e+07,13.110569,13.501710
1,2,2009-06-04 15:00:00,20.0671,700.347,-100.233,22.5336,695.447,0.248168,1.05481,0.00218706,3.6e+07,11.806688,11.907665
2,3,2009-06-04 15:00:00,20.0671,700.347,-100.233,22.5336,695.447,0.248168,1.05481,0.00218706,3.6e+07,11.306141,11.368285
3,4,2009-06-04 15:00:00,20.0671,700.347,-100.233,22.5336,695.447,0.248168,1.05481,0.00218706,3.6e+07,11.048210,11.092567
4,5,2009-06-04 15:00:00,20.0671,700.347,-100.233,22.5336,695.447,0.248168,1.05481,0.00218706,3.6e+07,10.896431,10.928626
...,...,...,...,...,...,...,...,...,...,...,...,...,...
53170,21,2009-09-01 05:00:00,8.1588,715.639,-243.644,-72.05,0,1.41667,1.67207,0.00457409,3.6e+07,8.682822,8.683785
53171,22,2009-09-01 05:00:00,8.1588,715.639,-243.644,-72.05,0,1.41667,1.67207,0.00457409,3.6e+07,7.425032,7.425783
53172,23,2009-09-01 05:00:00,8.1588,715.639,-243.644,-72.05,0,1.41667,1.67207,0.00457409,3.6e+07,6.179781,6.180297
53173,24,2009-09-01 05:00:00,8.1588,715.639,-243.644,-72.05,0,1.41667,1.67207,0.00457409,3.6e+07,4.940607,4.940868


In [7]:
buoyancy_data_df = pd.read_csv(data_dir+"buoyancy.csv")
#Removing Nans
buoyancy_data_df['n2S-2_1'] = buoyancy_data_df['n2S-2_2']
buoyancy_data_df['n2S-2_25'] = buoyancy_data_df['n2S-2_24']

buoyancy_data_df = buoyancy_data_df[1:] # considering everything from 2nd time step

flattened_buoy = buoyancy_data_df.iloc[:,1:].to_numpy().flatten()
time_stamp = buoyancy_data_df['time'].repeat(depth_steps).values

data = {'time':time_stamp, 'buoyancy':flattened_buoy, 'depth':depth_list}
buoyancy_data_df = pd.DataFrame(data=data)

buoyancy_data_df

Unnamed: 0,time,buoyancy,depth
0,2009-06-04 15:00:00,0.000533,1
1,2009-06-04 15:00:00,0.000533,2
2,2009-06-04 15:00:00,0.000262,3
3,2009-06-04 15:00:00,0.000150,4
4,2009-06-04 15:00:00,0.000100,5
...,...,...,...
53170,2009-09-01 05:00:00,0.000757,21
53171,2009-09-01 05:00:00,0.000528,22
53172,2009-09-01 05:00:00,0.000300,23
53173,2009-09-01 05:00:00,0.000065,24


In [8]:
final_df = final_df.merge(buoyancy_data_df, how='inner', on=['time','depth'])
final_df

Unnamed: 0,depth,time,AirTemp_degC,Longwave_Wm-2,Latent_Wm-2,Sensible_Wm-2,Shortwave_Wm-2,lightExtinct_m-1,ShearVelocity_mS-1,ShearStress_Nm-2,Area_m2,input_temp,temp_total04,buoyancy
0,1,2009-06-04 15:00:00,20.0671,700.347,-100.233,22.5336,695.447,0.248168,1.05481,0.00218706,3.6e+07,13.110569,13.501710,0.000533
1,2,2009-06-04 15:00:00,20.0671,700.347,-100.233,22.5336,695.447,0.248168,1.05481,0.00218706,3.6e+07,11.806688,11.907665,0.000533
2,3,2009-06-04 15:00:00,20.0671,700.347,-100.233,22.5336,695.447,0.248168,1.05481,0.00218706,3.6e+07,11.306141,11.368285,0.000262
3,4,2009-06-04 15:00:00,20.0671,700.347,-100.233,22.5336,695.447,0.248168,1.05481,0.00218706,3.6e+07,11.048210,11.092567,0.000150
4,5,2009-06-04 15:00:00,20.0671,700.347,-100.233,22.5336,695.447,0.248168,1.05481,0.00218706,3.6e+07,10.896431,10.928626,0.000100
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53170,21,2009-09-01 05:00:00,8.1588,715.639,-243.644,-72.05,0,1.41667,1.67207,0.00457409,3.6e+07,8.682822,8.683785,0.000757
53171,22,2009-09-01 05:00:00,8.1588,715.639,-243.644,-72.05,0,1.41667,1.67207,0.00457409,3.6e+07,7.425032,7.425783,0.000528
53172,23,2009-09-01 05:00:00,8.1588,715.639,-243.644,-72.05,0,1.41667,1.67207,0.00457409,3.6e+07,6.179781,6.180297,0.000300
53173,24,2009-09-01 05:00:00,8.1588,715.639,-243.644,-72.05,0,1.41667,1.67207,0.00457409,3.6e+07,4.940607,4.940868,0.000065


In [9]:
out_diffusivity_df = pd.read_csv(data_dir+"diff.csv")

out_diffusivity_df = out_diffusivity_df[1:] # considering everything from 2nd time step

flattened_diff = out_diffusivity_df.iloc[:,1:].to_numpy().flatten()
time_stamp = out_diffusivity_df['time'].repeat(depth_steps).values

data = {'time':time_stamp, 'diffusivity':flattened_diff, 'depth':depth_list}
out_diffusivity_df = pd.DataFrame(data=data)

out_diffusivity_df

Unnamed: 0,time,diffusivity,depth
0,2009-06-04 15:00:00,0.000010,1
1,2009-06-04 15:00:00,0.000016,2
2,2009-06-04 15:00:00,0.000021,3
3,2009-06-04 15:00:00,0.000027,4
4,2009-06-04 15:00:00,0.000032,5
...,...,...,...
53170,2009-09-01 05:00:00,0.000013,21
53171,2009-09-01 05:00:00,0.000016,22
53172,2009-09-01 05:00:00,0.000020,23
53173,2009-09-01 05:00:00,0.000037,24


In [10]:
final_df = final_df.merge(out_diffusivity_df, how='inner', on=['time','depth'])
final_df

Unnamed: 0,depth,time,AirTemp_degC,Longwave_Wm-2,Latent_Wm-2,Sensible_Wm-2,Shortwave_Wm-2,lightExtinct_m-1,ShearVelocity_mS-1,ShearStress_Nm-2,Area_m2,input_temp,temp_total04,buoyancy,diffusivity
0,1,2009-06-04 15:00:00,20.0671,700.347,-100.233,22.5336,695.447,0.248168,1.05481,0.00218706,3.6e+07,13.110569,13.501710,0.000533,0.000010
1,2,2009-06-04 15:00:00,20.0671,700.347,-100.233,22.5336,695.447,0.248168,1.05481,0.00218706,3.6e+07,11.806688,11.907665,0.000533,0.000016
2,3,2009-06-04 15:00:00,20.0671,700.347,-100.233,22.5336,695.447,0.248168,1.05481,0.00218706,3.6e+07,11.306141,11.368285,0.000262,0.000021
3,4,2009-06-04 15:00:00,20.0671,700.347,-100.233,22.5336,695.447,0.248168,1.05481,0.00218706,3.6e+07,11.048210,11.092567,0.000150,0.000027
4,5,2009-06-04 15:00:00,20.0671,700.347,-100.233,22.5336,695.447,0.248168,1.05481,0.00218706,3.6e+07,10.896431,10.928626,0.000100,0.000032
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53170,21,2009-09-01 05:00:00,8.1588,715.639,-243.644,-72.05,0,1.41667,1.67207,0.00457409,3.6e+07,8.682822,8.683785,0.000757,0.000013
53171,22,2009-09-01 05:00:00,8.1588,715.639,-243.644,-72.05,0,1.41667,1.67207,0.00457409,3.6e+07,7.425032,7.425783,0.000528,0.000016
53172,23,2009-09-01 05:00:00,8.1588,715.639,-243.644,-72.05,0,1.41667,1.67207,0.00457409,3.6e+07,6.179781,6.180297,0.000300,0.000020
53173,24,2009-09-01 05:00:00,8.1588,715.639,-243.644,-72.05,0,1.41667,1.67207,0.00457409,3.6e+07,4.940607,4.940868,0.000065,0.000037


In [11]:
out_temp_df = pd.read_csv(data_dir+"temp_diff01.csv")

out_temp_df = out_temp_df[1:] # considering everything from 2nd time step

flattened_temp = out_temp_df.iloc[:,1:].to_numpy().flatten()
time_stamp = out_temp_df['time'].repeat(depth_steps).values

data = {'time':time_stamp, 'temp_diff01':flattened_temp, 'depth':depth_list}
out_temp_df = pd.DataFrame(data=data)

out_temp_df

Unnamed: 0,time,temp_diff01,depth
0,2009-06-04 15:00:00,13.501710,1
1,2009-06-04 15:00:00,11.907665,2
2,2009-06-04 15:00:00,11.368285,3
3,2009-06-04 15:00:00,11.092567,4
4,2009-06-04 15:00:00,10.928626,5
...,...,...,...
53170,2009-09-01 05:00:00,8.683785,21
53171,2009-09-01 05:00:00,7.425783,22
53172,2009-09-01 05:00:00,6.180297,23
53173,2009-09-01 05:00:00,4.940868,24


In [12]:
final_df = final_df.merge(out_temp_df, how='inner', on=['time','depth'])
final_df

Unnamed: 0,depth,time,AirTemp_degC,Longwave_Wm-2,Latent_Wm-2,Sensible_Wm-2,Shortwave_Wm-2,lightExtinct_m-1,ShearVelocity_mS-1,ShearStress_Nm-2,Area_m2,input_temp,temp_total04,buoyancy,diffusivity,temp_diff01
0,1,2009-06-04 15:00:00,20.0671,700.347,-100.233,22.5336,695.447,0.248168,1.05481,0.00218706,3.6e+07,13.110569,13.501710,0.000533,0.000010,13.501710
1,2,2009-06-04 15:00:00,20.0671,700.347,-100.233,22.5336,695.447,0.248168,1.05481,0.00218706,3.6e+07,11.806688,11.907665,0.000533,0.000016,11.907665
2,3,2009-06-04 15:00:00,20.0671,700.347,-100.233,22.5336,695.447,0.248168,1.05481,0.00218706,3.6e+07,11.306141,11.368285,0.000262,0.000021,11.368285
3,4,2009-06-04 15:00:00,20.0671,700.347,-100.233,22.5336,695.447,0.248168,1.05481,0.00218706,3.6e+07,11.048210,11.092567,0.000150,0.000027,11.092567
4,5,2009-06-04 15:00:00,20.0671,700.347,-100.233,22.5336,695.447,0.248168,1.05481,0.00218706,3.6e+07,10.896431,10.928626,0.000100,0.000032,10.928626
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53170,21,2009-09-01 05:00:00,8.1588,715.639,-243.644,-72.05,0,1.41667,1.67207,0.00457409,3.6e+07,8.682822,8.683785,0.000757,0.000013,8.683785
53171,22,2009-09-01 05:00:00,8.1588,715.639,-243.644,-72.05,0,1.41667,1.67207,0.00457409,3.6e+07,7.425032,7.425783,0.000528,0.000016,7.425783
53172,23,2009-09-01 05:00:00,8.1588,715.639,-243.644,-72.05,0,1.41667,1.67207,0.00457409,3.6e+07,6.179781,6.180297,0.000300,0.000020,6.180297
53173,24,2009-09-01 05:00:00,8.1588,715.639,-243.644,-72.05,0,1.41667,1.67207,0.00457409,3.6e+07,4.940607,4.940868,0.000065,0.000037,4.940868


In [13]:
datetime_list =[datetime.datetime.strptime(date, '%Y-%m-%d %H:%M:%S') for date in final_df['time']]
day_of_year_list = [t.timetuple().tm_yday for t in datetime_list]
time_of_day_list = [t.hour for t in datetime_list]

In [14]:
final_df['day_of_year']=day_of_year_list
final_df['time_of_day']=time_of_day_list

In [15]:
final_df

Unnamed: 0,depth,time,AirTemp_degC,Longwave_Wm-2,Latent_Wm-2,Sensible_Wm-2,Shortwave_Wm-2,lightExtinct_m-1,ShearVelocity_mS-1,ShearStress_Nm-2,Area_m2,input_temp,temp_total04,buoyancy,diffusivity,temp_diff01,day_of_year,time_of_day
0,1,2009-06-04 15:00:00,20.0671,700.347,-100.233,22.5336,695.447,0.248168,1.05481,0.00218706,3.6e+07,13.110569,13.501710,0.000533,0.000010,13.501710,155,15
1,2,2009-06-04 15:00:00,20.0671,700.347,-100.233,22.5336,695.447,0.248168,1.05481,0.00218706,3.6e+07,11.806688,11.907665,0.000533,0.000016,11.907665,155,15
2,3,2009-06-04 15:00:00,20.0671,700.347,-100.233,22.5336,695.447,0.248168,1.05481,0.00218706,3.6e+07,11.306141,11.368285,0.000262,0.000021,11.368285,155,15
3,4,2009-06-04 15:00:00,20.0671,700.347,-100.233,22.5336,695.447,0.248168,1.05481,0.00218706,3.6e+07,11.048210,11.092567,0.000150,0.000027,11.092567,155,15
4,5,2009-06-04 15:00:00,20.0671,700.347,-100.233,22.5336,695.447,0.248168,1.05481,0.00218706,3.6e+07,10.896431,10.928626,0.000100,0.000032,10.928626,155,15
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53170,21,2009-09-01 05:00:00,8.1588,715.639,-243.644,-72.05,0,1.41667,1.67207,0.00457409,3.6e+07,8.682822,8.683785,0.000757,0.000013,8.683785,244,5
53171,22,2009-09-01 05:00:00,8.1588,715.639,-243.644,-72.05,0,1.41667,1.67207,0.00457409,3.6e+07,7.425032,7.425783,0.000528,0.000016,7.425783,244,5
53172,23,2009-09-01 05:00:00,8.1588,715.639,-243.644,-72.05,0,1.41667,1.67207,0.00457409,3.6e+07,6.179781,6.180297,0.000300,0.000020,6.180297,244,5
53173,24,2009-09-01 05:00:00,8.1588,715.639,-243.644,-72.05,0,1.41667,1.67207,0.00457409,3.6e+07,4.940607,4.940868,0.000065,0.000037,4.940868,244,5


In [16]:
temp_mix02_df = pd.read_csv(data_dir+"temp_mix02.csv")

temp_mix02_df = temp_mix02_df[1:] # considering everything from 2nd time step

flattened_temp = temp_mix02_df.iloc[:,1:].to_numpy().flatten()
time_stamp = temp_mix02_df['time'].repeat(depth_steps).values

data = {'time':time_stamp, 'temp_mix02':flattened_temp, 'depth':depth_list}
temp_mix02_df = pd.DataFrame(data=data)

temp_mix02_df

Unnamed: 0,time,temp_mix02,depth
0,2009-06-04 15:00:00,13.501710,1
1,2009-06-04 15:00:00,11.907665,2
2,2009-06-04 15:00:00,11.368285,3
3,2009-06-04 15:00:00,11.092567,4
4,2009-06-04 15:00:00,10.928626,5
...,...,...,...
53170,2009-09-01 05:00:00,8.683785,21
53171,2009-09-01 05:00:00,7.425783,22
53172,2009-09-01 05:00:00,6.180297,23
53173,2009-09-01 05:00:00,4.940868,24


In [17]:
final_df = final_df.merge(temp_mix02_df, how='inner', on=['time','depth'])
final_df

Unnamed: 0,depth,time,AirTemp_degC,Longwave_Wm-2,Latent_Wm-2,Sensible_Wm-2,Shortwave_Wm-2,lightExtinct_m-1,ShearVelocity_mS-1,ShearStress_Nm-2,Area_m2,input_temp,temp_total04,buoyancy,diffusivity,temp_diff01,day_of_year,time_of_day,temp_mix02
0,1,2009-06-04 15:00:00,20.0671,700.347,-100.233,22.5336,695.447,0.248168,1.05481,0.00218706,3.6e+07,13.110569,13.501710,0.000533,0.000010,13.501710,155,15,13.501710
1,2,2009-06-04 15:00:00,20.0671,700.347,-100.233,22.5336,695.447,0.248168,1.05481,0.00218706,3.6e+07,11.806688,11.907665,0.000533,0.000016,11.907665,155,15,11.907665
2,3,2009-06-04 15:00:00,20.0671,700.347,-100.233,22.5336,695.447,0.248168,1.05481,0.00218706,3.6e+07,11.306141,11.368285,0.000262,0.000021,11.368285,155,15,11.368285
3,4,2009-06-04 15:00:00,20.0671,700.347,-100.233,22.5336,695.447,0.248168,1.05481,0.00218706,3.6e+07,11.048210,11.092567,0.000150,0.000027,11.092567,155,15,11.092567
4,5,2009-06-04 15:00:00,20.0671,700.347,-100.233,22.5336,695.447,0.248168,1.05481,0.00218706,3.6e+07,10.896431,10.928626,0.000100,0.000032,10.928626,155,15,10.928626
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53170,21,2009-09-01 05:00:00,8.1588,715.639,-243.644,-72.05,0,1.41667,1.67207,0.00457409,3.6e+07,8.682822,8.683785,0.000757,0.000013,8.683785,244,5,8.683785
53171,22,2009-09-01 05:00:00,8.1588,715.639,-243.644,-72.05,0,1.41667,1.67207,0.00457409,3.6e+07,7.425032,7.425783,0.000528,0.000016,7.425783,244,5,7.425783
53172,23,2009-09-01 05:00:00,8.1588,715.639,-243.644,-72.05,0,1.41667,1.67207,0.00457409,3.6e+07,6.179781,6.180297,0.000300,0.000020,6.180297,244,5,6.180297
53173,24,2009-09-01 05:00:00,8.1588,715.639,-243.644,-72.05,0,1.41667,1.67207,0.00457409,3.6e+07,4.940607,4.940868,0.000065,0.000037,4.940868,244,5,4.940868


In [18]:
temp_conv03_df = pd.read_csv(data_dir+"temp_conv03.csv")

temp_conv03_df = temp_conv03_df[1:] # considering everything from 2nd time step

flattened_temp = temp_conv03_df.iloc[:,1:].to_numpy().flatten()
time_stamp = temp_conv03_df['time'].repeat(depth_steps).values

data = {'time':time_stamp, 'temp_conv03':flattened_temp, 'depth':depth_list}
temp_conv03_df = pd.DataFrame(data=data)

temp_conv03_df

Unnamed: 0,time,temp_conv03,depth
0,2009-06-04 15:00:00,13.501710,1
1,2009-06-04 15:00:00,11.907665,2
2,2009-06-04 15:00:00,11.368285,3
3,2009-06-04 15:00:00,11.092567,4
4,2009-06-04 15:00:00,10.928626,5
...,...,...,...
53170,2009-09-01 05:00:00,8.683785,21
53171,2009-09-01 05:00:00,7.425783,22
53172,2009-09-01 05:00:00,6.180297,23
53173,2009-09-01 05:00:00,4.940868,24


In [19]:
final_df = final_df.merge(temp_conv03_df, how='inner', on=['time','depth'])
final_df

Unnamed: 0,depth,time,AirTemp_degC,Longwave_Wm-2,Latent_Wm-2,Sensible_Wm-2,Shortwave_Wm-2,lightExtinct_m-1,ShearVelocity_mS-1,ShearStress_Nm-2,Area_m2,input_temp,temp_total04,buoyancy,diffusivity,temp_diff01,day_of_year,time_of_day,temp_mix02,temp_conv03
0,1,2009-06-04 15:00:00,20.0671,700.347,-100.233,22.5336,695.447,0.248168,1.05481,0.00218706,3.6e+07,13.110569,13.501710,0.000533,0.000010,13.501710,155,15,13.501710,13.501710
1,2,2009-06-04 15:00:00,20.0671,700.347,-100.233,22.5336,695.447,0.248168,1.05481,0.00218706,3.6e+07,11.806688,11.907665,0.000533,0.000016,11.907665,155,15,11.907665,11.907665
2,3,2009-06-04 15:00:00,20.0671,700.347,-100.233,22.5336,695.447,0.248168,1.05481,0.00218706,3.6e+07,11.306141,11.368285,0.000262,0.000021,11.368285,155,15,11.368285,11.368285
3,4,2009-06-04 15:00:00,20.0671,700.347,-100.233,22.5336,695.447,0.248168,1.05481,0.00218706,3.6e+07,11.048210,11.092567,0.000150,0.000027,11.092567,155,15,11.092567,11.092567
4,5,2009-06-04 15:00:00,20.0671,700.347,-100.233,22.5336,695.447,0.248168,1.05481,0.00218706,3.6e+07,10.896431,10.928626,0.000100,0.000032,10.928626,155,15,10.928626,10.928626
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53170,21,2009-09-01 05:00:00,8.1588,715.639,-243.644,-72.05,0,1.41667,1.67207,0.00457409,3.6e+07,8.682822,8.683785,0.000757,0.000013,8.683785,244,5,8.683785,8.683785
53171,22,2009-09-01 05:00:00,8.1588,715.639,-243.644,-72.05,0,1.41667,1.67207,0.00457409,3.6e+07,7.425032,7.425783,0.000528,0.000016,7.425783,244,5,7.425783,7.425783
53172,23,2009-09-01 05:00:00,8.1588,715.639,-243.644,-72.05,0,1.41667,1.67207,0.00457409,3.6e+07,6.179781,6.180297,0.000300,0.000020,6.180297,244,5,6.180297,6.180297
53173,24,2009-09-01 05:00:00,8.1588,715.639,-243.644,-72.05,0,1.41667,1.67207,0.00457409,3.6e+07,4.940607,4.940868,0.000065,0.000037,4.940868,244,5,4.940868,4.940868


In [20]:
temp_obs_df = pd.read_csv(data_dir+"observed_temp.csv")

temp_obs_df = temp_obs_df[1:] # considering everything from 2nd time step

flattened_temp = temp_obs_df.iloc[:,1:].to_numpy().flatten()
time_stamp = temp_obs_df['time'].repeat(depth_steps).values

data = {'time':time_stamp, 'obs_temp':flattened_temp, 'depth':depth_list}
temp_obs_df = pd.DataFrame(data=data)

temp_obs_df

Unnamed: 0,time,obs_temp,depth
0,2009-06-04 15:00:00,16.409,1
1,2009-06-04 15:00:00,16.480,2
2,2009-06-04 15:00:00,16.130,3
3,2009-06-04 15:00:00,15.827,4
4,2009-06-04 15:00:00,16.270,5
...,...,...,...
53170,2009-09-01 05:00:00,12.294,21
53171,2009-09-01 05:00:00,12.294,22
53172,2009-09-01 05:00:00,12.294,23
53173,2009-09-01 05:00:00,12.294,24


In [21]:
final_df = final_df.merge(temp_obs_df, how='inner', on=['time','depth'])
final_df

Unnamed: 0,depth,time,AirTemp_degC,Longwave_Wm-2,Latent_Wm-2,Sensible_Wm-2,Shortwave_Wm-2,lightExtinct_m-1,ShearVelocity_mS-1,ShearStress_Nm-2,...,input_temp,temp_total04,buoyancy,diffusivity,temp_diff01,day_of_year,time_of_day,temp_mix02,temp_conv03,obs_temp
0,1,2009-06-04 15:00:00,20.0671,700.347,-100.233,22.5336,695.447,0.248168,1.05481,0.00218706,...,13.110569,13.501710,0.000533,0.000010,13.501710,155,15,13.501710,13.501710,16.409
1,2,2009-06-04 15:00:00,20.0671,700.347,-100.233,22.5336,695.447,0.248168,1.05481,0.00218706,...,11.806688,11.907665,0.000533,0.000016,11.907665,155,15,11.907665,11.907665,16.480
2,3,2009-06-04 15:00:00,20.0671,700.347,-100.233,22.5336,695.447,0.248168,1.05481,0.00218706,...,11.306141,11.368285,0.000262,0.000021,11.368285,155,15,11.368285,11.368285,16.130
3,4,2009-06-04 15:00:00,20.0671,700.347,-100.233,22.5336,695.447,0.248168,1.05481,0.00218706,...,11.048210,11.092567,0.000150,0.000027,11.092567,155,15,11.092567,11.092567,15.827
4,5,2009-06-04 15:00:00,20.0671,700.347,-100.233,22.5336,695.447,0.248168,1.05481,0.00218706,...,10.896431,10.928626,0.000100,0.000032,10.928626,155,15,10.928626,10.928626,16.270
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53170,21,2009-09-01 05:00:00,8.1588,715.639,-243.644,-72.05,0,1.41667,1.67207,0.00457409,...,8.682822,8.683785,0.000757,0.000013,8.683785,244,5,8.683785,8.683785,12.294
53171,22,2009-09-01 05:00:00,8.1588,715.639,-243.644,-72.05,0,1.41667,1.67207,0.00457409,...,7.425032,7.425783,0.000528,0.000016,7.425783,244,5,7.425783,7.425783,12.294
53172,23,2009-09-01 05:00:00,8.1588,715.639,-243.644,-72.05,0,1.41667,1.67207,0.00457409,...,6.179781,6.180297,0.000300,0.000020,6.180297,244,5,6.180297,6.180297,12.294
53173,24,2009-09-01 05:00:00,8.1588,715.639,-243.644,-72.05,0,1.41667,1.67207,0.00457409,...,4.940607,4.940868,0.000065,0.000037,4.940868,244,5,4.940868,4.940868,12.294


In [22]:
final_df.to_csv("all_data_lake_modeling_in_time.csv", index=False)