In [99]:
## Importing Libraries

# Python-based Libraries
from datetime import timedelta, datetime
from pathlib import Path
import os

# Data Analysis Libraries
import numpy as np
import pandas as pd

# Graph plotting libraries

    
# Filtering Data before Plotting


In [100]:
class Drifters:
    
    def __init__(self, name:str):
        self.name = name
        self.data = {}
        self.data_info = {}
        
    def read_data(self, path):
        # Define Possible encodings
        encodings = ["UTF-8", "UTF-16 LE"]

        # Check if path is a filepath or directory path
        if Path(path).is_file(): 
            # Get file name from path
            filename = path.split('/')[-1].split('.')[-2] 
            # Define a variable to track file decoding
            encoding_gotten = False
            for encoding_value in encodings:
                try:
                    data_value = pd.read_csv(path, encoding=encoding_value)
                    # Catch special errors whereby the encoding is wrong but the data is read.
                    if "Unnamed: 1" in data_value.columns: 
                        continue
                    encoding_gotten = True
                    break # Stop trying if the options had worked
                except:
                    pass # Continue trying if the option didn't work

            if encoding_gotten == True:
                data_to_keep = [['DeviceDateTime','Latitude','Longitude'], 
                            ['Position time (UTC)', 'Latitude (°)', 'Longitude (°)'],
                            ]
                for dtk_val in data_to_keep:
                    try:
                        # Select Data to Keep
                        data_value = data_value[dtk_val]
                        # Drop columns with missing values
                        data_value.dropna(axis=0)
                        # Rename Header Columns
                        data_value.columns = ['DateTime','Latitude','Longitude']
                        # Convert DateTime
                        data_value['DateTime'] = pd.to_datetime(data_value['DateTime'])
                        # Store DF in the object instance 'data'
                        self.data[filename] = data_value
                        
                    except:
                        pass
            else:
                print(f"Encoding error. The encoding for {filename} data should be checked.")

        elif Path(path).is_dir():    
        # 1. Read the files in the given path
            file_names = os.listdir(path)

            # 2. Read each file into pandas dataframe
            for file in file_names:
                filepath = path+'/'+file
                filename = file.split('.')[-2] 
                # Define a variable to track file decoding
                encoding_gotten = False
                for encoding_value in encodings:
                    try:
                        data_value = pd.read_csv(filepath, encoding=encoding_value)
                        # Catch special errors whereby the encoding is wrong but the data is read.
                        if "Unnamed: 1" in data_value.columns: 
                            continue
                        encoding_gotten = True
                        break # Stop trying if the options had worked
                    except:
                        pass # Continue trying if the option didn't work

                # Store the data in the "data" dictionary
                if encoding_gotten == True:
                    data_to_keep = [['DeviceDateTime','Latitude','Longitude'], 
                               ['Position time (UTC)', 'Latitude (°)', 'Longitude (°)'],
                                ]
                    for dtk_val in data_to_keep:
                        try:
                            # Select Data to Keep
                            data_value = data_value[dtk_val]
                            # Drop columns with missing values
                            data_value.dropna(axis=0)
                            # Rename Header Columns
                            data_value.columns = ['DateTime','Latitude','Longitude']
                            # Convert DateTime
                            data_value['DateTime'] = pd.to_datetime(data_value['DateTime'])
                            # Store DF in the object instance 'data'
                            self.data[filename] = data_value
                        
                        except:
                            pass

                else:
                    print(f"Encoding error. The encoding for {filepath} data should be checked.")
    
    def read_logsheet(self, file_path):
        
        self.logsheet = pd.read_csv(file_path)
        
        self.logsheet.dropna(axis=0, inplace=True)
        
        self.logsheet["DepDateTime"] = self.logsheet.apply(lambda x: str(x["DepDate"]) + " " + str(x["DepTime"]), axis = 1)
        self.logsheet["RecovDateTime"] = self.logsheet.apply(lambda x: str(x["RecovDate"]) + " " + str(x["RecovTime"]), axis = 1)
        
        self.logsheet.drop(columns=["DepDate", "DepTime", "RecovDate", "RecovTime"], inplace=True)
        
        self.logsheet["DepDateTime"] = pd.to_datetime(self.logsheet["DepDateTime"])
        self.logsheet["RecovDateTime"] = pd.to_datetime(self.logsheet["RecovDateTime"])
        
        for key in self.data.keys():
            station = self.logsheet["Station"][self.logsheet["Name"] == float(key)].values
            drog_depth = self.logsheet["DrogDepth"][self.logsheet["Name"] == float(key)].values
            
            
            dep_datetime = self.logsheet["DepDateTime"][self.logsheet["Name"] == float(key)].values
            recov_datetime = self.logsheet["RecovDateTime"][self.logsheet["Name"] == float(key)].values
            
            dep_long = self.logsheet["DepLong"][self.logsheet["Name"] == float(key)].values
            dep_lat = self.logsheet["DepLat"][self.logsheet["Name"] == float(key)].values
            
            recov_long = self.logsheet["RecovLong"][self.logsheet["Name"] == float(key)].values
            recov_lat = self.logsheet["RecovLat"][self.logsheet["Name"] == float(key)].values
            
            
            info = {"Station":station, "DrogDepth":drog_depth, "DepDateTime":dep_datetime, "RecovDateTime":recov_datetime,
                    "DepLong":dep_long, "DepLat":dep_lat, "RecovLong":recov_long, "RecovLat":recov_lat}
            
            self.data_info[key] = info
    
    def time_shift(self, shift_amount:float = 1):
        for key in self.data.keys():
            self.data[key]["DateTime"] = self.data[key]["DateTime"] + timedelta(hours = shift_amount)
    
    def extract_data(self):
        pass
            
    
    def compute_velocity(self):
        R = 6373.0
        for key in self.data.keys():
            # data = self.data[key]
            # select columns for latitude, longtitude and time
            df_lat = np.array(self.data[key]["Latitude"])
            df_long = np.array(self.data[key]["Longitude"])
            df_time = self.data[key]["DateTime"]
            
            # compute delta time
            d_time = np.array(df_time[1:]) - df_time[:-1]
            d_time = d_time.apply(lambda dt_i : dt_i.seconds/60.0)
            
            # compute distance from lat,long
            rad_lat = np.radians(df_lat)
            rad_long = np.radians(df_long)
            d_lat = rad_lat[1:] - np.array(rad_lat[:-1])
            d_long = rad_long[1:] - np.array(rad_long[:-1]) 
            
            a = np.sin(d_lat / 2.0)**2 + np.cos(rad_lat[:-1]) * np.cos(rad_lat[1:]) * np.sin(d_long / 2)**2
            c = 2.0 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))

            distance = R * c
            velocity = ( distance*1000.0 )/( 60.0*np.array(d_time, dtype=float) )
            # Remove the first row
            self.data[key]= self.data[key].iloc[1:]
            self.data[key]["Velocity"] = pd.Series(velocity)
    
    def plot_trajectory(self):
        pass
    

In [101]:
# Testing code
working_dir = "Data/2022/Day2"
D = Drifters("2022")
D.read_data(f"{working_dir}/drifters/")
D.read_logsheet(f"{working_dir}/drifters-logsheet.csv")
D.data["0119"].head()

Unnamed: 0,DateTime,Latitude,Longitude
0,2022-10-12 11:11:48,43.08366,5.95759
1,2022-10-12 11:07:25,43.08342,5.95843
2,2022-10-12 11:01:48,43.08297,5.95946
3,2022-10-12 10:56:47,43.08281,5.96034
4,2022-10-12 10:51:45,43.08254,5.96113


In [102]:
D.time_shift(shift_amount=2)
D.data["0119"].head()

Unnamed: 0,DateTime,Latitude,Longitude
0,2022-10-12 13:11:48,43.08366,5.95759
1,2022-10-12 13:07:25,43.08342,5.95843
2,2022-10-12 13:01:48,43.08297,5.95946
3,2022-10-12 12:56:47,43.08281,5.96034
4,2022-10-12 12:51:45,43.08254,5.96113


In [104]:
D.compute_velocity()
D.data["0119"]

Unnamed: 0,DateTime,Latitude,Longitude,Velocity
2,2022-10-12 13:01:48,43.08297,5.95946,0.000823
3,2022-10-12 12:56:47,43.08281,5.96034,0.000861
4,2022-10-12 12:51:45,43.08254,5.96113,0.000939
5,2022-10-12 12:46:47,43.08228,5.96197,0.000939
6,2022-10-12 12:41:45,43.08194,5.96285,0.000873
7,2022-10-12 12:36:44,43.08163,5.96375,0.000911
8,2022-10-12 12:31:45,43.08129,5.96455,0.000944
9,2022-10-12 12:26:43,43.081,5.96543,0.00095
10,2022-10-12 12:21:45,43.08068,5.96633,0.000906
11,2022-10-12 12:16:44,43.08035,5.96723,0.00078
