In [1]:
## This note book is built for the lisa rose etchells team
# the purpose is to speed up the combining of logs and importing photo events to SP RRP
# it accomplishes the following
# 1 - clean sailmon log file and add TWD and TWS
# 2 - calculate course and TWA **HOLD**
# 3 - align time stamps and add measured loads and positions from cyclops and visualStringpot
# 4 - obtain file path and time stamps from coach photos
# 5 - cluster coach photos using time and the yachts heading and heel
# 6 - obtain file path for onboard photos at the centre of each cluster
# 7 - move all onboard photos to the correct project directory
# 8 - create XML script for photo events to import photos to RRP
# 9 - creat txt for comments for NS ASA ** HOLD **

In [2]:
## The default project directory structure is based on the RRP default directory
# the user specifies the the project folder ie 22Feb - REGATTA
# the user also specifies the specific date of the coach photo folder ie 21
# all coach photos are to be imported into the folder photos/coach_day
# the vSp, cyclops and sailmon logs should be named acordingly with the date at the end

In [3]:
import numpy as np
import pandas as pd
from datetime import datetime
import os
import pandas as pd
from datetime import datetime
import os
import exifread as ex
from os import listdir
from os.path import isfile, join
from sklearn import preprocessing
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
from datetime import timedelta

In [5]:
from tkinter import filedialog
dir = filedialog.askdirectory()

In [6]:
## Get project details from the user
projectFolder = input("Please enter the RRP analysis folder")
day = str(input("Please enter the day as DD"))
month = str(input("Please enter the month as MM"))
year = str(input("Please enter the year as YY"))
windDir = input("Please enter the average wind direction")
windSpeed = input("Please enter the average wind speed")
## Build file names and directories
cyclopsFile = str('lisaRose_loads_'+year+month+day+'.csv') # build the cyclops file name
vSpFile = str('lisaRose_vSp_'+year+month+day+'.csv') # build the vSp file name
sailmonFile = str('lisaRose_SM_'+year+month+day+'.csv') # build the sailmon file name

In [121]:
directory = os.path.join("N:","Sean O'Rourke","LisaRose E22",
                         "2.0 Performance Analysis","2.2 KND",
                         "2022",projectFolder) # build the path to the project directory

In [7]:
## Import all csv files from project folder
sailmon = pd.read_csv(os.path.join(directory,"Logs",sailmonFile), index_col=False) # import sailmon csv
sailmon.rename(columns={'time': 'timeStamp'}, inplace=True) # rename columns
sailmon['timeStamp'] = pd.to_datetime(sailmon.timeStamp, format="%Y-%m-%d %H:%M:%S").dt.round('1s') # format timeStamp column to datetime object and round to 1s

gate = pd.read_csv(os.path.join(directory,"Logs",vSpFile), index_col=0) # import mast gate vSp file
gate['timeStamp'] = pd.to_datetime(gate['timeStamp'], format = "%Y:%m:%d %H:%M:%S") # make timestamp a datetime object
gate.drop(columns = ['Dot', 'Count', 'file'], inplace = True) # drop irrelevant columns

loadcells = pd.read_csv(os.path.join(directory,"Logs",cyclopsFile), index_col=False) # import loadcells csv
loadcells.rename(columns={'Timestamp': 'timeStamp'}, inplace=True) # fix the column name of timeStamp
loadcells['timeStamp'] = pd.to_datetime(loadcells.timeStamp, format="%Y-%m-%d %H:%M:%S").dt.round('1s') # make the timestamp column a date time object and round to 1 second
loadcells['Mainsheet'] = 2*loadcells.Mainsheet # double the mainsheet load as it is read off a 2:1
loadcells['Backstay'] = 2*loadcells.Backstay# double the backstay load as it is read off a 2:1
loadcells.drop(columns =[' Latitude', ' Longitude', ' Heading'], inplace = True) # drop unused loadcell columns

In [8]:
## correct timestamping on all 3 csv data frames
sm_timeStart = sailmon.timeStamp[0] # get the first tiemstamp in the sailmon log
lc_timeStart = loadcells.timeStamp[0] # get the first tiemstamp in the loadcell log
ga_timeStart = gate.timeStamp[0] # get the first tiemstamp in the gate log

## get the time offsets
ts_correct = datetime.strptime(input("Please enter what first Sailmon time stamp should read, "
                                     "the current first time stamp is "+ str(sm_timeStart)),
                               '%Y-%m-%d %H:%M:%S') # get the correct time for the first sailmon timestamp
delta = ts_correct - sm_timeStart # calculate delta
sailmon['timeStamp'] = sailmon.timeStamp + delta # apply delta to the sailmon time stamp column

ts_correct = datetime.strptime(input("Please enter what first Loadcells time stamp should read, "
                                     "the current first time stamp is "+ str(lc_timeStart)),
                               '%Y-%m-%d %H:%M:%S') # get the correct timestamp for the first loacells time stamp
delta = ts_correct - lc_timeStart # calculate delta
loadcells['timeStamp'] = loadcells.timeStamp + delta # apply the delta to the loadcells timestamp column

ts_correct = datetime.strptime(input("Please enter what first mastGate time stamp should read, "
                                     "the current first time stamp is "+ str(ga_timeStart)),
                               '%Y-%m-%d %H:%M:%S') # get the correct timestamp for the first mastgate time stamp
delta = ts_correct - ga_timeStart # calculate delta
gate['timeStamp'] = gate.timeStamp + delta # apply the delta to the mastagte timestamp column


In [9]:
## Clean and merge all csv inputs
allData = pd.DataFrame(data = sailmon[['timeStamp', 'latitude', 'longitude', 'speed', 'HDT - Heading True', 'COG - Course over Ground', 'Heel']]) # select the relevent columns of the sailmon log to for the base data frame
allData.rename(columns = {'HDT - Heading True': 'HDG','COG - Course over Ground': 'COG' }, inplace=True) # rename columns to remove hythons
allData = allData.merge(gate, how = 'left', left_on='timeStamp', right_on='timeStamp') # merge all date with ther mast gate data
allData.rename(columns = {'measure': 'mastGate'}, inplace = True) # rename the measure column to a mastgate
allData = allData.merge(loadcells, how = 'left', left_on='timeStamp', right_on='timeStamp') # merge the loadcells dataframe
allData.rename(columns={'timeStamp': 'time'}, inplace=True) # rename timestamo to time to better suit rrp
allData['TWS'] = windSpeed # add true wind speed
allData['TWD'] = windDir # add true wind direction
# allData['TWA'] = windDir - HDG # calculate the true wind angle
allData['speed'] = allData.speed * 1.944 # convert m/s to Knots
allData.to_csv(os.path.join(directory,'Logs','allData_'+year+month+day+'.csv')) # export alldata as a csv

In [200]:
directory_coach = os.path.join(directory,"Photos","coach_"+day) # define directory containing coach photos
def get_photo_path(directory, filter, criteria):
    file_list = [f for f in listdir(directory) if isfile(join(directory, f))] # inspect directory and return list of files
    print(str(len(file_list))+' Files found in folder')
    if filter: # if filter is set as true
        file_list = [file for file in file_list if criteria in file] # remove any files not containing .jpg
        print(str(len(file_list))+str(criteria)+' images found in folder')

    timeStamps = [] # create an empty list to append timeStamps to
    for file in file_list: # iterate over all files in the file list
        filepath = os.path.join(directory,file) # create the path to each individual photo
        photo = open(filepath, 'rb') # open photo
        tags = ex.process_file(photo) # get the photos exif tags
        ts = datetime.strptime(str(tags['EXIF DateTimeOriginal']), '%Y:%m:%d %H:%M:%S') # extract photos time stamp
        timeStamps.append(ts) # append timestamp to list

    return file_list, timeStamps

file_list, timeStamps = get_photo_path(directory_coach, True, ".jpg")
## correct timestamps

def correct_timestamp(timeStamps, varName):
    ts_correct = datetime.strptime(input("Please enter what first "+str(varName)+" time stamp should read, "
                                     "the current first time stamp is "+ str(min(timeStamps))),
                               '%Y-%m-%d %H:%M:%S') # get the correct timestamp for the first mastgate time stamp
    delta = ts_correct - min(timeStamps) # calculate delta
    timeStamps = [ts + delta for ts in timeStamps] # iterate over timestamps and add delta
    print("The first corrected timeStamp is "+str(min(timeStamps)))
    return timeStamps

timeStamps = correct_timestamp(timeStamps, 'Coach Photos')


21 Files found in folder
20.jpg images found in folder
The first corrected timeStamp is 2022-05-06 15:43:28


In [201]:
features = pd.DataFrame({'time': timeStamps}) # make features dataframe
features = pd.merge(features, allData[['time','HDG','Heel','speed']], on = 'time') # merge relevent parts of allData to features
timeStep = features.time - features.time[0] # subtract the first time stamp from all other stamps to give time from the first photo
## make timeStep a numeric type
timeStep = [time.total_seconds() for time in timeStep] # convert timestep to seconds
features["timeStep"] = timeStep # add timeStep to features
features = features[['timeStep']] # select final features for clustering
features = features.to_numpy() # convert features to a numpy array

In [202]:
## scale features and cluster
scaler = preprocessing.StandardScaler().fit(features) # define scikitlearn scaler
features_scaled = scaler.transform(features) # apply scaller and create new scaled array

In [226]:
noClusters = input("Enter the number of clusters")
kmeans = KMeans(n_clusters = noClusters) # define cluster object
cluster = kmeans.fit(features_scaled) # find clusters

In [204]:
labels = kmeans.labels_ # create list of labels

In [205]:
# label each coach photo with cluster label, then group by cluster label, then find median time of group, then find jib and main picture at this time, then adjust time stamps so no confilcts then create photo events
cluster_df = pd.DataFrame({'cluster': labels, 'file': file_list, 'timeStamp': timeStamps}) # create a df with the cluster label, file name and time stamp
photos_df = pd.DataFrame({'directory': directory_coach,'file': file_list, 'timeStamp': timeStamps})

In [206]:
times_to_fetch = cluster_df[['cluster', 'timeStamp']].groupby(['cluster']).mean() # create a list of the average time of each cluster
## need to work out order of clusters --> do I??

In [207]:
times_to_fetch = times_to_fetch.timeStamp.round('S')

In [208]:
## define jib and main photo directorys
directory_jib = os.path.join(directory,"Photos","jib_"+day) # define directory containing jib photos
directory_main = os.path.join(directory,"Photos","main_"+day) # define directory containing main photos

In [209]:
for dir in [directory_jib, directory_main]:
    file_list, timeStamps = get_photo_path(dir,False,'crit')
    availablePhotos = pd.DataFrame({'timeStamp': timeStamps, 'file': file_list})
    availablePhotos = availablePhotos.set_index('timeStamp').sort_index()
    file_list = [file_list[i] for i in list(np.unique(availablePhotos.index.get_indexer(times_to_fetch, method='nearest')))]
    timeStamps = [timeStamps[i] for i in list(np.unique(availablePhotos.index.get_indexer(times_to_fetch, method='nearest')))]
    newPhotos = pd.DataFrame({'directory': dir,'file': file_list, 'timeStamp': timeStamps})
    photos_df = pd.concat([photos_df,newPhotos])

6 Files found in folder
6 Files found in folder


In [211]:
## change time stamps so as no 2 are the same

timeStamps = list(photos_df.timeStamp)


while len(np.unique(timeStamps)) < len(timeStamps): # continue to iterate over time and add time to times until all timestamps are unique
    duplicates = []
    for time in timeStamps:
        count = 0
        for i in range(len(timeStamps)):
            if time == timeStamps[i]:
                count += 1
            if count > 1:
                duplicates.append(i)

    for n in duplicates:
        timeStamps[n] = timeStamps[n] + timedelta(seconds=1)

photos_df['timeStamps'] = timeStamps