In [450]:
import pandas as pd
from datetime import datetime
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import numpy as np

In [468]:
pio.templates

Templates configuration
-----------------------
    Default template: 'plotly'
    Available templates:
        ['ggplot2', 'seaborn', 'simple_white', 'plotly',
         'plotly_white', 'plotly_dark', 'presentation', 'xgridoff',
         'ygridoff', 'gridon', 'none']

In [521]:
target = "TSDip-1.txt"

In [522]:
df = pd.read_csv(f"/home/rod/Documents/DataScience/my_projects/nav/tsdip/data/{target}",sep="\t")

## Cleaning up and refactoring column names

In [486]:
df.drop(['SS','FileName','Ser','Meas ','Sal. ','Cond. ','Temp ',' %O2 ','mg/l'],axis=1,inplace=True) # droping unused columns
df.columns=['SoS','Depth','year','month','day','hour','minute','second']                             # renaming columns
df = df[df['Depth']>2.0]                                                                             # keep only  depths above 2.0 meters
df['date']=pd.to_datetime(df[['year', 'month', 'day', 'hour','minute','second']])                    # parsing datetime columnes to single 'date' columns
#df.drop(['year', 'month', 'day', 'hour','minute','second'],axis=1,inplace=True)                      # droping date and time not to be used
#df["Date"] = df["date"].dt.date                                                                      # splitting date
#df["Time"] = df["date"].dt.time                                                                      # spliting time
#df.drop("date",axis=1,inplace=True)                                                                  # droping single 'datetime' column
#df.reset_index(inplace=True)                                                                         # reset index
#df.reset_index(inplace=True)                                                                         # reset index again to get correct index column

In [496]:
df['time_shift'] = df['date'].shift()

In [503]:
df['time_diff']=(df['date']-df['time_shift'])/ pd.Timedelta(seconds=1)

In [504]:
df

Unnamed: 0,SoS,Depth,year,month,day,hour,minute,second,date,time_shift,time_diff
19,1540.96,2.93,2022,11,7,4,34,16,2022-11-07 04:34:16,NaT,
20,1540.92,3.44,2022,11,7,4,34,18,2022-11-07 04:34:18,2022-11-07 04:34:16,2.0
21,1540.92,3.95,2022,11,7,4,34,20,2022-11-07 04:34:20,2022-11-07 04:34:18,2.0
22,1540.90,4.48,2022,11,7,4,34,22,2022-11-07 04:34:22,2022-11-07 04:34:20,2.0
23,1540.91,5.29,2022,11,7,4,34,24,2022-11-07 04:34:24,2022-11-07 04:34:22,2.0
...,...,...,...,...,...,...,...,...,...,...,...
1047,1540.68,9.33,2022,11,7,5,8,32,2022-11-07 05:08:32,2022-11-07 05:08:30,2.0
1048,1540.83,7.66,2022,11,7,5,8,34,2022-11-07 05:08:34,2022-11-07 05:08:32,2.0
1049,1540.85,6.07,2022,11,7,5,8,36,2022-11-07 05:08:36,2022-11-07 05:08:34,2.0
1050,1540.83,4.61,2022,11,7,5,8,38,2022-11-07 05:08:38,2022-11-07 05:08:36,2.0


In [505]:
df['depth_shifted']=df['Depth'].shift()

In [511]:
df['depth_step(m)']=abs(df['Depth']-df['depth_shifted'])

In [517]:
df['dip_speed(m/s)'] = abs(df['depth_step']/df['time_diff'])

In [518]:
df

Unnamed: 0,SoS,Depth,year,month,day,hour,minute,second,date,time_shift,time_diff,depth_shifted,depth_step,depth_step(m),dip_speed,dip_speed(m/s)
19,1540.96,2.93,2022,11,7,4,34,16,2022-11-07 04:34:16,NaT,,,,,,
20,1540.92,3.44,2022,11,7,4,34,18,2022-11-07 04:34:18,2022-11-07 04:34:16,2.0,2.93,0.51,0.51,0.255,0.255
21,1540.92,3.95,2022,11,7,4,34,20,2022-11-07 04:34:20,2022-11-07 04:34:18,2.0,3.44,0.51,0.51,0.255,0.255
22,1540.90,4.48,2022,11,7,4,34,22,2022-11-07 04:34:22,2022-11-07 04:34:20,2.0,3.95,0.53,0.53,0.265,0.265
23,1540.91,5.29,2022,11,7,4,34,24,2022-11-07 04:34:24,2022-11-07 04:34:22,2.0,4.48,0.81,0.81,0.405,0.405
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1047,1540.68,9.33,2022,11,7,5,8,32,2022-11-07 05:08:32,2022-11-07 05:08:30,2.0,10.40,-1.07,1.07,-0.535,0.535
1048,1540.83,7.66,2022,11,7,5,8,34,2022-11-07 05:08:34,2022-11-07 05:08:32,2.0,9.33,-1.67,1.67,-0.835,0.835
1049,1540.85,6.07,2022,11,7,5,8,36,2022-11-07 05:08:36,2022-11-07 05:08:34,2.0,7.66,-1.59,1.59,-0.795,0.795
1050,1540.83,4.61,2022,11,7,5,8,38,2022-11-07 05:08:38,2022-11-07 05:08:36,2.0,6.07,-1.46,1.46,-0.730,0.730


### defining dip direction (up or down)

In [471]:
tmax=df['Time'][df['Depth']==df['Depth'].max()].index                                               # find index of the max depth value
df['direction'] = np.where(df['level_0']<=tmax[0], 'down', 'up')                                    # create 'direction' columns based on index level
df.drop(['level_0','index'],axis=1,inplace=True)                                                    # remove unused column

### spliting dataframe

In [472]:
df_up=df[df['direction']=='up']
df_down=df[df['direction']=='down']

top = df[df['Depth']<=50] # top 50m dataframe
top_up=top[top['direction']=='up']
top_down=top[top['direction']=='down']

In [473]:
 
fig = make_subplots(
    rows=1,cols=2,
    subplot_titles=("Full column","Top 50m")
                   )

fig.add_trace(
    go.Scatter(x=df_down['SoS'],y=df_down['Depth'],line=dict(color='royalblue'),showlegend=False),
    row=1,col=1
)

fig.add_trace(
    go.Scatter(x=df_up['SoS'],y=df_up['Depth'],line=dict(color='brown'),showlegend=False),
    row=1,col=1
)


fig.add_trace(
    go.Scatter(x=top_down["SoS"],y=top_down["Depth"],line=dict(color='royalblue'),name='down'),
    row=1,col=2
)

fig.add_trace(
    go.Scatter(x=top_up["SoS"],y=top_up["Depth"],line=dict(color='brown'),name='up'),
    row=1,col=2
)


fig.update_yaxes(autorange="reversed", row=1, col=1)
fig.update_yaxes(autorange="reversed", row=1, col=2)
fig.update_xaxes(range=[1539,1543],row=1,col=2)

fig.update_layout(
    title_text="Speed of sound profile (m/s)",
    xaxis2={'side':'top'},
    xaxis={'side':'top'},
    yaxis_title="Depth (m)",
    yaxis2_title="Depth (m)",
    legend_title="direction",
    template='seaborn',
    height=800,
    width=800,
    
    
)
fig.update_annotations(yshift=20)

#fig.layout

In [474]:
df_down

Unnamed: 0,SoS,Depth,Date,Time,direction
0,1540.96,2.93,2022-11-07,04:34:16,down
1,1540.92,3.44,2022-11-07,04:34:18,down
2,1540.92,3.95,2022-11-07,04:34:20,down
3,1540.90,4.48,2022-11-07,04:34:22,down
4,1540.91,5.29,2022-11-07,04:34:24,down
...,...,...,...,...,...
527,1493.17,983.11,2022-11-07,04:51:50,down
528,1493.17,984.12,2022-11-07,04:51:52,down
529,1493.18,984.53,2022-11-07,04:51:54,down
530,1493.17,984.94,2022-11-07,04:51:56,down


In [483]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1033 entries, 0 to 1032
Data columns (total 6 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   level_0  1033 non-null   int64  
 1   index    1033 non-null   int64  
 2   SoS      1033 non-null   float64
 3   Depth    1033 non-null   float64
 4   Date     1033 non-null   object 
 5   Time     1033 non-null   object 
dtypes: float64(2), int64(2), object(2)
memory usage: 48.5+ KB


In [482]:
type(df['Time'])

pandas.core.series.Series

### reading files in directory and creating a list with their names

In [337]:
import os

In [334]:
path = "/home/rod/Documents/DataScience/my_projects/nav/tsdip/data"
dir_list = os.listdir(path)

In [335]:
new = []
for i in dir_list:
    new.append(i.split(sep='.')[0])

In [336]:
new

['tsdip-01', 'tsdip-02']