In [38]:
## Importing Libraries

# Python-based Libraries
from datetime import timedelta, datetime
from pathlib import Path
import os

# Data Analysis Libraries
import numpy as np
import pandas as pd

# Graph plotting libraries
import matplotlib.pyplot as plt
import matplotlib.colors as mplcols
from matplotlib import cm
import folium
import branca.colormap as bcm

    
# Filtering Data before Plotting


In [39]:
class Drifters:
    
    def __init__(self, name:str):
        self.name = name
        self.data = {}
        self.data_info = {}
        
    def read_data(self, path):
        # Define Possible encodings
        encodings = ["UTF-8", "UTF-16 LE"]

        # Check if path is a filepath or directory path
        if Path(path).is_file(): 
            # Get file name from path
            filename = path.split('/')[-1].split('.')[-2] 
            # Define a variable to track file decoding
            encoding_gotten = False
            for encoding_value in encodings:
                try:
                    data_value = pd.read_csv(path, encoding=encoding_value)
                    # Catch special errors whereby the encoding is wrong but the data is read.
                    if "Unnamed: 1" in data_value.columns: 
                        continue
                    encoding_gotten = True
                    break # Stop trying if the options had worked
                except:
                    pass # Continue trying if the option didn't work

            if encoding_gotten == True:
                data_to_keep = [['DeviceDateTime','Latitude','Longitude'], 
                            ['Position time (UTC)', 'Latitude (°)', 'Longitude (°)'],
                            ]
                for dtk_val in data_to_keep:
                    try:
                        # Select Data to Keep
                        data_value = data_value[dtk_val]
                        # Drop columns with missing values
                        data_value.dropna(axis=0)
                        # Rename Header Columns
                        data_value.columns = ['DateTime','Latitude','Longitude']
                        # Convert DateTime
                        data_value['DateTime'] = pd.to_datetime(data_value['DateTime'])
                        # Store DF in the object instance 'data'
                        self.data[filename] = data_value
                        
                    except:
                        pass
            else:
                print(f"Encoding error. The encoding for {filename} data should be checked.")

        elif Path(path).is_dir():    
        # 1. Read the files in the given path
            file_names = os.listdir(path)

            # 2. Read each file into pandas dataframe
            for file in file_names:
                filepath = path+'/'+file
                filename = file.split('.')[-2] 
                # Define a variable to track file decoding
                encoding_gotten = False
                for encoding_value in encodings:
                    try:
                        data_value = pd.read_csv(filepath, encoding=encoding_value)
                        # Catch special errors whereby the encoding is wrong but the data is read.
                        if "Unnamed: 1" in data_value.columns: 
                            continue
                        encoding_gotten = True
                        break # Stop trying if the options had worked
                    except:
                        pass # Continue trying if the option didn't work

                # Store the data in the "data" dictionary
                if encoding_gotten == True:
                    data_to_keep = [['DeviceDateTime','Latitude','Longitude'], 
                               ['Position time (UTC)', 'Latitude (°)', 'Longitude (°)'],
                                ]
                    for dtk_val in data_to_keep:
                        try:
                            # Select Data to Keep
                            data_value = data_value[dtk_val]
                            # Drop columns with missing values
                            data_value.dropna(axis=0)
                            # Rename Header Columns
                            data_value.columns = ['DateTime','Latitude','Longitude']
                            # Convert DateTime
                            data_value['DateTime'] = pd.to_datetime(data_value['DateTime'])
                            # Store DF in the object instance 'data'
                            self.data[filename] = data_value
                        
                        except:
                            pass

                else:
                    print(f"Encoding error. The encoding for {filepath} data should be checked.")
    
    def read_logsheet(self, file_path):
        
        self.logsheet = pd.read_csv(file_path)
        
        self.logsheet.dropna(axis=0, inplace=True)
        
        self.logsheet["DepDateTime"] = self.logsheet.apply(lambda x: str(x["DepDate"]) + " " + str(x["DepTime"]), axis = 1)
        self.logsheet["RecovDateTime"] = self.logsheet.apply(lambda x: str(x["RecovDate"]) + " " + str(x["RecovTime"]), axis = 1)
        
        self.logsheet.drop(columns=["DepDate", "DepTime", "RecovDate", "RecovTime"], inplace=True)
        
        self.logsheet["DepDateTime"] = pd.to_datetime(self.logsheet["DepDateTime"])
        self.logsheet["RecovDateTime"] = pd.to_datetime(self.logsheet["RecovDateTime"])
        
        for key in self.data.keys():
            station = self.logsheet["Station"][self.logsheet["Name"] == float(key)].values
            drog_depth = self.logsheet["DrogDepth"][self.logsheet["Name"] == float(key)].values
            
            
            dep_datetime = self.logsheet["DepDateTime"][self.logsheet["Name"] == float(key)].values
            recov_datetime = self.logsheet["RecovDateTime"][self.logsheet["Name"] == float(key)].values
            
            dep_long = self.logsheet["DepLong"][self.logsheet["Name"] == float(key)].values
            dep_lat = self.logsheet["DepLat"][self.logsheet["Name"] == float(key)].values
            
            recov_long = self.logsheet["RecovLong"][self.logsheet["Name"] == float(key)].values
            recov_lat = self.logsheet["RecovLat"][self.logsheet["Name"] == float(key)].values
            
            
            info = {"Station":station, "DrogDepth":drog_depth, "DepDateTime":dep_datetime, "RecovDateTime":recov_datetime,
                    "DepLong":dep_long, "DepLat":dep_lat, "RecovLong":recov_long, "RecovLat":recov_lat}
            
            self.data_info[key] = info
    
    def time_shift(self, shift_amount:float = 1):
        for key in self.data.keys():
            self.data[key]["DateTime"] = self.data[key]["DateTime"] + timedelta(hours = shift_amount)
    
    def extract_data(self):
        for key in self.data.keys():
            # Get the start and end time for each experiment
            deploy_time = self.data_info[key]['DepDateTime']
            recov_time = self.data_info[key]['RecovDateTime']

            # Extract experiment data within the start and end time 
            # bool = np.array([(deploy_time < self.data[key]["DateTime"][i] < recov_time) for i in range(len(self.data[key]))]).flatten()
            bool = np.array([(deploy_time < pos_time < recov_time) for pos_time in self.data[key]["DateTime"]]).flatten()
            
            self.data[key] = self.data[key][bool]
            self.data[key].reset_index(inplace = True)
        
    
    def compute_velocity(self):
        R = 6373.0
        for key in self.data.keys():
            # data = self.data[key]
            # select columns for latitude, longtitude and time
            df_lat = np.array(self.data[key]["Latitude"])
            df_long = np.array(self.data[key]["Longitude"])
            df_time = self.data[key]["DateTime"]
            
            # compute delta time
            d_time = np.array(df_time[1:]) - df_time[:-1]
            d_time = d_time.apply(lambda dt_i : dt_i.seconds/60.0)
            
            # compute distance from lat,long
            rad_lat = np.radians(df_lat)
            rad_long = np.radians(df_long)
            d_lat = rad_lat[1:] - np.array(rad_lat[:-1])
            d_long = rad_long[1:] - np.array(rad_long[:-1]) 
            
            a = np.sin(d_lat / 2.0)**2 + np.cos(rad_lat[:-1]) * np.cos(rad_lat[1:]) * np.sin(d_long / 2)**2
            c = 2.0 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))

            distance = R * c
            velocity = ( distance*1000.0 )/( 60.0*np.array(d_time, dtype=float) )
            # Remove the first row
            self.data[key]= self.data[key].iloc[1:]
            self.data[key]["Velocity"] = pd.Series(velocity)
    
    def plot_trajectories(self, saving_path=None):
        
        def velocity_extremes(data):
            min_vel = np.inf
            max_vel = -np.inf

            for key in data.keys():
                min_vel_val = np.min(data[key]["Velocity"])
                max_vel_val = np.max(data[key]["Velocity"])

                min_vel = min_vel_val if min_vel_val < min_vel else min_vel
                max_vel = max_vel_val if max_vel_val > max_vel else max_vel

            return min_vel, max_vel
        
        def graph_centre(data):
            min_lat = 100
            max_lat = 0
            min_lon = 100
            max_lon = 0

            for key in data.keys():
                min_lat_val = np.min(data[key]["Latitude"])
                min_lon_val = np.min(data[key]["Longitude"])
                max_lat_val = np.max(data[key]["Latitude"])
                max_lon_val = np.max(data[key]["Longitude"])

                min_lat = min_lat_val if min_lat_val < min_lat else min_lat
                max_lat = max_lat_val if max_lat_val > max_lat else max_lat
                min_lon = min_lon_val if min_lon_val < min_lon else min_lon
                max_lon = max_lon_val if max_lon_val > max_lon else max_lon

            lat_centre = np.average([min_lat, max_lat])
            lon_centre= np.average([min_lon, max_lon])

            return lat_centre, lon_centre
        
        def plot_vel_traj(df_map, df, name, drog_depth, color_mapper):
            
            colors = ['#ffff00', '#ffffff', '#0000ff']
            drog_depths = np.unique(self.logsheet["DrogDepth"])
            
            drog_depth_found = False
            count = 0
            for depth in drog_depths:
                if drog_depth == depth:
                    drifter_col, drifter_info = [colors[count], f'{drog_depth}m Depth']
                    drog_depth_found = False
                    break
                count += 1
                
            if not drog_depth_found:
                drifter_col, drifter_info = ['#000000', 'Unknown']
                
            feature_group = folium.FeatureGroup(f"{name}\n{drifter_info}")
            
            folium.Marker([ df["Latitude"].iloc[-1]+0.001, df["Longitude"].iloc[-1]-0.001 ],
                    #popup = folium.Popup(f'{key}', parse_html=True),
                    icon=folium.DivIcon(html=f"""<div style="font-family: courier new; color: blue">{f"{name}"}</div>""")
                    ).add_to(feature_group )
            
            start = False

            for lat, lon in zip(df["Latitude"], df["Longitude"]):
                if start: line = folium.PolyLine([[prev_lat, prev_long], [lat, lon]], color='white', weight=2).add_to(feature_group )
                else: start = True

                prev_lat = lat
                prev_long = lon


            for lat, lon, vel in zip(df["Latitude"], df["Longitude"], df["Velocity"]):
                circlemarker = folium.CircleMarker(location=(lat,lon), radius=6,  
                                    fill=True, fill_color=color_mapper(vel)[:7], fill_opacity=0.9, 
                                    stroke=True, color=drifter_col, weight=1.5, opacity=0.9,
                                    popup=folium.Popup(f"{round(vel, 4)} m/s"),
                                ).add_to(feature_group )
                print(color_mapper(vel)[:7])
                
            feature_group.add_to(df_map)

            return df_map
        
        def visualize_drifters(data, data_info):
            
            # Initializing the Map
            plot_map = folium.Map(location = graph_centre(data), zoom_start = 14.5, position="absolute", width='100%', height="100%", 
                                left='0%', top='0%', border=None, min_zoom=13, max_zoom=17)

            # position:absolute;width:100%;height:100%;left:0;top:0;border:none !important;" allowfullscreen webkitallowfullscreen mozallowfullscreen

            # Plotting the location and velocities into the Map
            min_vel, max_vel = velocity_extremes(data)

            color_gradients = ['orange', 'red']
            velcolmap = bcm.LinearColormap(color_gradients, vmin=min_vel, vmax=max_vel, caption = 'Velocity (m/s)') # The velocity scale
            velcolmap.add_to(plot_map)

            for key in data.keys():
                drog_depth = data_info[key]["DrogDepth"]
                plot_map = plot_vel_traj(plot_map, data[key], key, drog_depth, velcolmap)

            folium.LayerControl().add_to(plot_map)
            return plot_map
        
        self.map = visualize_drifters(self.data, self.data_info)
        
        if saving_path:
            self.map.save(f"{saving_path}/Graphs/TrajAndVelocity.html")
            
    def plot_boxplot(self):
        pass
    
    def plot_lineplot(self):
        pass

In [40]:
# Testing code
working_dir = "Data/2022/Day2"
D = Drifters("2022")
D.read_data(f"{working_dir}/drifters/")
D.read_logsheet(f"{working_dir}/drifters-logsheet.csv")
# D.data["274"].head()

In [41]:
# D.logsheet

In [42]:
D.time_shift(2)

In [43]:
# D.data["274"]

In [44]:
D.extract_data()
# D.data["274"]

In [45]:
D.time_shift(shift_amount=2)
# D.data["274"].head(10)

In [48]:
D.compute_velocity()
D.data["274"]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.data[key]["Velocity"] = pd.Series(velocity)


Unnamed: 0,index,DateTime,Latitude,Longitude,Velocity
2,5,2022-10-12 12:10:00,43.07711,5.9713,0.122558
3,6,2022-10-12 12:20:00,43.07718,5.97055,0.124899
4,7,2022-10-12 12:30:00,43.07725,5.96965,0.111592
5,8,2022-10-12 12:40:00,43.07736,5.96874,0.113137
6,9,2022-10-12 12:50:00,43.07742,5.96792,0.097512
7,10,2022-10-12 13:00:00,43.07749,5.96709,0.085388
8,11,2022-10-12 13:10:00,43.0775,5.96637,0.092078
9,12,2022-10-12 13:20:00,43.07748,5.96574,0.101383
10,13,2022-10-12 13:30:00,43.07748,5.96506,0.110077
11,14,2022-10-12 13:40:00,43.07763,5.96434,0.12211


In [47]:
D.plot_trajectories()
D.map

#ff1d00ff
yes
#ffa400
#ffa400
#ffa400
#ffa400
#ffa400
#ffa400
#ffa400
#ffa400
#ffa400
#ffa400
#ffa400
#ffa500
#ffa400
#ffa400
#ffa400
#ffa500
#ffa500
#ffa500
#ffa500
#ffa500
#ffa500
#ffa500
#ffa500
#ffa500
#ffa500
#ffa500
#ffa500
#ffa500
#ffa500
#ffa500
#ffa500
#ffa500
#ffa500
#ffa500
#ffa500
#ffa500
#ffa400


ValueError: Thresholds are not sorted.