In [39]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime
from datetime import datetime as dt
import glob
import re
import gsw

In [40]:
trap_metadata = pd.read_csv('../../data_directory/metadata/trap_metadata.csv')
print(trap_metadata)
trap_metadata = trap_metadata[['Deployment', 'Deploy_DateTime_local', 'Recover_DateTime_local']].dropna()
trap_metadata.rename(columns={'Deployment':'deployment',
                              'Deploy_DateTime_local':'deploy_dateTimeAKDT',
                              'Recover_DateTime_local':'recover_dateTimeAKDT'},
                              inplace=True)

trap_metadata['deploy_dateTimeAKDT'] = trap_metadata['deploy_dateTimeAKDT'].apply(lambda x: dt.strptime(x, '%m/%d/%y %H:%M'))
trap_metadata['recover_dateTimeAKDT'] = trap_metadata['recover_dateTimeAKDT'].apply(lambda x: dt.strptime(x, '%m/%d/%y %H:%M'))

# trap_metadata.to_csv('../results/trap_deploy_recover.csv')

   Sample_Number Deployment  Deploy_Station Beacon_Number  \
0          T001S       T001              15          D036   
1          T001D       T001              15          D036   
2          T002S       T002              16          D036   
3          T002D       T002              16          D036   
4          T003S       T003              21          D001   
5          T003D       T003              21          D001   
6          T004S       T004              26           480   
7          T004D       T004              26           480   
8          T005S       T005              29          6210   
9          T005D       T005              29          6210   
10         T006S       T006              32          D036   
11         T006D       T006              32          D036   
12         T007S       T007              38          2730   
13         T007D       T007              38          2730   
14         T008S       T008              42           130   
15         T008D       T

In [41]:
fileList = glob.glob('../../data_directory/array_deployment_gps/T[0-9][0-9][0-9]_full_deployment.csv')

print(fileList)

['../../data_directory/array_deployment_gps/T016_full_deployment.csv', '../../data_directory/array_deployment_gps/T017_full_deployment.csv', '../../data_directory/array_deployment_gps/T014_full_deployment.csv', '../../data_directory/array_deployment_gps/T015_full_deployment.csv', '../../data_directory/array_deployment_gps/T013_full_deployment.csv', '../../data_directory/array_deployment_gps/T012_full_deployment.csv', '../../data_directory/array_deployment_gps/T011_full_deployment.csv', '../../data_directory/array_deployment_gps/T008_full_deployment.csv', '../../data_directory/array_deployment_gps/T009_full_deployment.csv', '../../data_directory/array_deployment_gps/T004_full_deployment.csv', '../../data_directory/array_deployment_gps/T005_full_deployment.csv', '../../data_directory/array_deployment_gps/T006_full_deployment.csv', '../../data_directory/array_deployment_gps/T007_full_deployment.csv', '../../data_directory/array_deployment_gps/T018_full_deployment.csv', '../../data_directo

In [42]:
# Function to read in Iridium Beacon CSV files, parse datetimes, and return slim and usable dataframe
def parse_file(fileName):
    # Get Deployment Name
    deploymentString = re.search(r'T\d{3}', fileName).group()

    # Print String in case of error
    print(deploymentString)

    # Read in Data
    data = pd.read_csv(fileName)
    data.rename(columns={'Asset Name':'assetName',
                         'Data Date (AKDT)':'dateTimeAKDT',
                         ' Latitude':'latitude',
                         ' Longitude':'longitude'}, inplace=True)
    
    # Make Time Datetime Object - two possible formats (slashes-seconds or dashes+seconds)
    formats = ["%Y-%m-%d %H:%M:%S", "%m/%d/%y %H:%M"] #, "%m-%d-%y %H:%M"]
    for fmt in formats:
        try:
            # datetime = data['dateTimeAKDT'].apply(lambda x: dt.strptime(x, fmt))
            datetime = data['dateTimeAKDT'].apply(lambda x: dt.strptime(str(x), fmt) if pd.notnull(x) else pd.NaT)
            # print(datetime)
        except ValueError:
            # Continue to the next format if the current one fails
            continue
    
    data['dateTimeAKDT'] = datetime
    # Select Necessary Data, Sort and Reset Index
    dataSlim = data.dropna(subset=['dateTimeAKDT'])[['dateTimeAKDT', 'latitude', 'longitude']].sort_values(by='dateTimeAKDT').drop_duplicates().reset_index(drop=True)
    dataSlim['deployment'] = deploymentString
    return(dataSlim)

    

In [43]:
# computes speed and other summary stats from streamlined dataframes
def compute_speed_metrics(df):
    dataSlim = df
    deploymentString = dataSlim['deployment'].iloc[0]

    # Compute Total Distance Travelled
    lat_deploy = dataSlim['latitude'].iloc[0]
    lat_recover = dataSlim['latitude'].iloc[-1]
    lon_deploy = dataSlim['longitude'].iloc[0]
    lon_recover = dataSlim['longitude'].iloc[-1]
    lats = [lat_deploy, lat_recover]
    lons = [lon_deploy, lon_recover]
    # lats = [dataSlim['latitude'].iloc[0], dataSlim['latitude'].iloc[-1]]
    # lons = [dataSlim['longitude'].iloc[0], dataSlim['longitude'].iloc[-1]]
    total_distance_travelled = gsw.geostrophy.distance(lats, lons) / 1000 # return in km

    # Compute Velocity
    ## difference in time points in seconds
    seconds = dataSlim['dateTimeAKDT'].diff().dt.total_seconds()
    ## difference in time points in meters (use insert to make indexing match time difference)
    meters = np.insert(gsw.geostrophy.distance(dataSlim['longitude'], dataSlim['latitude']), 0, np.nan)
    ## convert to centimeters to make final unit match current measurements
    centimeters = 100*meters
    ## speed in cm/s - done with np.where to prevent inf if seconds data is na
    # speed_cm_s = centimeters / seconds
    speed_cm_s = np.where(seconds != 0, centimeters / seconds, np.nan)

    # Summary Stats
    speedMean = np.nanmean(speed_cm_s)
    speedSd = np.nanstd(speed_cm_s)
    speedMax = np.nanmax(speed_cm_s)
    speedMin = np.nanmin(speed_cm_s)

    # Return as Dataframe
    result_df = pd.DataFrame({
        'deployment':deploymentString,
        'totalDist_km':total_distance_travelled,
        'speedMean_cm_s':speedMean,
        'speedSd':speedSd,
        'speedMax':speedMax,
        'speedMin':speedMin,
        'deployLat':lat_deploy,
        'deployLon':lon_deploy,
        'recoverLat':lat_recover,
        'recoverLon':lon_recover
    })
    return(result_df)

In [44]:
## T010 is special because we lost the beacon
t010 = pd.read_csv('../../data_directory/array_deployment_gps/T010_full_deployment_spotter.csv')
t010.rename(columns={'Epoch Time':'epochTime',
                            'Latitude (deg)':'latitude',
                            'Longitude (deg)':'longitude'},
                     inplace=True)

t010['deployment'] = 'T010'
t010['dateTimeUTC'] = t010['epochTime'].apply(lambda x: dt.fromtimestamp(x))
t010['dateTimeAKDT'] = t010['dateTimeUTC'] - datetime.timedelta(hours=8)

t010 = t010.loc[:, ['deployment', 'dateTimeAKDT', 'latitude', 'longitude']].sort_values(by='dateTimeAKDT').drop_duplicates().reset_index(drop=True)

t010_speed = compute_speed_metrics(t010)
t010_speed


Unnamed: 0,deployment,totalDist_km,speedMean_cm_s,speedSd,speedMax,speedMin,deployLat,deployLon,recoverLat,recoverLon
0,T010,220.883103,20.041601,10.438201,42.324341,3.188885,70.41515,-163.56137,70.57455,-161.58075


In [45]:
fileList

['../../data_directory/array_deployment_gps/T016_full_deployment.csv',
 '../../data_directory/array_deployment_gps/T017_full_deployment.csv',
 '../../data_directory/array_deployment_gps/T014_full_deployment.csv',
 '../../data_directory/array_deployment_gps/T015_full_deployment.csv',
 '../../data_directory/array_deployment_gps/T013_full_deployment.csv',
 '../../data_directory/array_deployment_gps/T012_full_deployment.csv',
 '../../data_directory/array_deployment_gps/T011_full_deployment.csv',
 '../../data_directory/array_deployment_gps/T008_full_deployment.csv',
 '../../data_directory/array_deployment_gps/T009_full_deployment.csv',
 '../../data_directory/array_deployment_gps/T004_full_deployment.csv',
 '../../data_directory/array_deployment_gps/T005_full_deployment.csv',
 '../../data_directory/array_deployment_gps/T006_full_deployment.csv',
 '../../data_directory/array_deployment_gps/T007_full_deployment.csv',
 '../../data_directory/array_deployment_gps/T018_full_deployment.csv',
 '../.

In [46]:
speed_metrics_all = pd.concat([compute_speed_metrics(parse_file(file)) for file in fileList])
speed_metrics_all = pd.concat([speed_metrics_all, t010_speed])

T016
T017
T014
T015
T013
T012
T011
T008
T009
T004
T005
T006
T007
T018
T001
T019
T003
T002
T029
T028
T020
T022
T023
T025
T024
T027
T026


In [47]:
speed_metrics_all = speed_metrics_all.sort_values(by='deployment').reset_index(drop=True)
speed_metrics_all

Unnamed: 0,deployment,totalDist_km,speedMean_cm_s,speedSd,speedMax,speedMin,deployLat,deployLon,recoverLat,recoverLon
0,T001,0.562567,31.059237,0.0,31.059237,31.059237,70.885031,-161.379777,70.884765,-161.374724
1,T002,24.977047,13.600728,4.984864,27.091254,2.068243,70.698394,-165.534161,70.617849,-165.744797
2,T003,54.162838,10.543892,4.772877,22.489893,0.661329,71.282081,-164.579399,71.198278,-164.099031
3,T004,73.049696,18.423708,4.925776,28.044179,4.142416,70.409049,-163.530099,70.376165,-162.873902
4,T005,312.071075,24.349386,9.440234,63.691781,0.823508,70.808548,-163.268809,70.779194,-160.462425
5,T006,147.727778,12.739912,5.552675,23.881294,1.388088,71.18297,-163.045772,71.080765,-161.7208
6,T007,164.147697,16.80108,8.730653,37.17163,0.749297,71.400898,-164.393234,71.226468,-162.926538
7,T008,213.825596,14.174455,8.670985,46.525197,1.226236,71.062242,-164.988256,70.914635,-163.070519
8,T009,269.170252,19.497726,9.792536,43.917596,1.512148,70.695359,-165.528302,70.67389,-163.107684
9,T010,220.883103,20.041601,10.438201,42.324341,3.188885,70.41515,-163.56137,70.57455,-161.58075


In [48]:
speed_metrics_all.to_csv('../../data_directory/metadata/speed_metrics_all_array_deployments.csv')