## Libraries

In [1]:
import os
import re
import pandas as pd
import datetime

## Metrics 

In [6]:
# Import file
metricsFolder = (
    "/Users/rokbohinc/Documents/Work/Golf_AI/Golfshot_Categoriser/data/raw/Metrics/MergedMetrics.csv"
)
df_metrics = pd.read_csv(metricsFolder, header=[0], skiprows=[1])

# Format date
df_metrics['Date'] = pd.to_datetime(df_metrics['Date'])

In [7]:
df_metrics.head(3)

Unnamed: 0.1,Unnamed: 0,Date,Player,Club Name,Club Type,Club Speed,Attack Angle,Club Path,Club Face,Face to Path,...,Carry Deviation Distance,Total Distance,Total Deviation Angle,Total Deviation Distance,Note,Tag,Air Density,Temperature,Air Pressure,Relative Humidity
0,1,2022-11-26 11:48:50,Rok,7I XXIO eleven,7 Iron,120.275997,0.39,2.47,2.5,0.03,...,5.67,142.380005,2.44,6.07,,,1.225398,4.44,97.95,97
1,2,2022-11-26 11:49:10,Rok,7I XXIO eleven,7 Iron,109.547997,1.72,3.89,-8.71,-12.6,...,-27.469999,138.589996,-13.35,-32.009998,,,1.225398,4.44,97.95,97
2,3,2022-11-26 11:57:38,Rok,7I XXIO eleven,7 Iron,103.247997,-2.69,6.23,5.91,-0.32,...,8.72,101.93,5.56,9.87,,BAD,1.225398,4.44,97.95,97


## Videos

In [9]:
videosFolder = "/Users/rokbohinc/Documents/Work/Golf_AI/Golfshot_Categoriser/data/raw/Videos"

video_paths = []
# Walk through all subdirectories and collect file paths
for root, dirs, files in os.walk(videosFolder):
    for file in files:
        if file.endswith(".mp4"):
            video_paths.append(os.path.join(root, file))

df_videos = pd.DataFrame({'videoPath': video_paths})
df_videos['Date'] = [datetime.datetime.fromtimestamp(os.path.getmtime(path)) for path in df_videos['videoPath']]
df_videos = df_videos.sort_values(by='Date')

In [None]:
display(df_videos)

## Merging Metrics and Videos

There is a problem with matching the datetime from the metrics data to the creation datetime of the videos. It ocassionally occours that there is a about 1 second difference between the two. Since I don't hit 2 shots in 2 seconds I can round the Date to the nereast 2 secods in order to match correctly occasions when there is 1 s difference.

In [11]:
merged_df_asof = pd.merge_asof(df_videos, df_metrics, 
                               on='Date', 
                               suffixes=('_videos', '_metrics'), 
                               tolerance=pd.Timedelta('2s'),
                               direction='nearest')


In [12]:
display(merged_df_asof)

Unnamed: 0.1,videoPath,Date,Unnamed: 0,Player,Club Name,Club Type,Club Speed,Attack Angle,Club Path,Club Face,...,Carry Deviation Distance,Total Distance,Total Deviation Angle,Total Deviation Distance,Note,Tag,Air Density,Temperature,Air Pressure,Relative Humidity
0,/Users/rokbohinc/Documents/Work/Golf_AI/Golfsh...,2022-11-29 17:24:40,0.0,Rok,SW XXIO eleven,Sand Wedge,0.000000,0.000000,0.000000,0.000000,...,0.630000,16.530001,3.390000,0.980000,,,1.220799,7.2200,98.600,91.0
1,/Users/rokbohinc/Documents/Work/Golf_AI/Golfsh...,2022-11-29 17:24:52,1.0,Rok,SW XXIO eleven,Sand Wedge,0.000000,0.000000,0.000000,0.000000,...,-4.120000,24.320000,-12.310000,-5.180000,,,1.220799,7.2200,98.600,91.0
2,/Users/rokbohinc/Documents/Work/Golf_AI/Golfsh...,2022-11-29 17:26:04,2.0,Rok,SW XXIO eleven,Sand Wedge,0.000000,0.000000,0.000000,0.000000,...,-0.070000,20.990000,-0.280000,-0.100000,,,1.220799,7.2200,98.600,91.0
3,/Users/rokbohinc/Documents/Work/Golf_AI/Golfsh...,2022-12-24 11:13:58,,,,,,,,,...,,,,,,,,,,
4,/Users/rokbohinc/Documents/Work/Golf_AI/Golfsh...,2022-12-24 11:15:04,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
615,/Users/rokbohinc/Documents/Work/Golf_AI/Golfsh...,2024-03-31 14:39:10,5.0,Rok,7I XXIO eleven,7 Iron,134.319366,-1.550528,9.684385,-2.206765,...,-20.125858,150.571548,-8.713073,-22.809591,,,1.199572,21.1111,101.325,0.0
616,/Users/rokbohinc/Documents/Work/Golf_AI/Golfsh...,2024-03-31 14:39:36,6.0,Rok,7I XXIO eleven,7 Iron,134.166642,3.702559,11.712544,4.219777,...,-0.539315,141.665237,-0.687280,-1.699433,,,1.199572,21.1111,101.325,0.0
617,/Users/rokbohinc/Documents/Work/Golf_AI/Golfsh...,2024-03-31 14:39:55,7.0,Rok,7I XXIO eleven,7 Iron,131.185145,3.796341,12.602219,7.453573,...,-0.287651,152.553513,-0.668225,-1.779588,,,1.199572,21.1111,101.325,0.0
618,/Users/rokbohinc/Documents/Work/Golf_AI/Golfsh...,2024-03-31 14:40:16,8.0,Rok,7I XXIO eleven,7 Iron,129.081442,0.160541,14.670392,2.346039,...,0.233299,151.252411,-0.141274,-0.372414,,,1.199572,21.1111,101.325,0.0


### Determining the shot type

In [81]:
def shot_direction(x):
    if x < -5:
        return "pull"
    elif x > 5:
        return "push"
    else:
        return "straight"

df["Direction"] = df["Launch Direction"].apply(shot_direction)
df["Direction"].value_counts()

Direction
straight    59
pull        44
push         6
Name: count, dtype: int64

In [86]:
def shot_curvature(x):
    if x < -800:
        return "hook"
    elif x < -300:
        return "draw"
    elif x < 300:
        return ""
    elif x < 800:
        return "fade"
    else:
        return "slice"

df["Shape"] = df["Sidespin"].apply(shot_curvature)
df["Shape"].value_counts()

Shape
         42
draw     27
hook     17
fade     13
slice    10
Name: count, dtype: int64

In [87]:
df["ShotType"] = df["Direction"] + "-" + df["Shape"]
df["ShotType"].value_counts()

ShotType
straight-draw    22
straight-        21
pull-            20
straight-hook    12
pull-slice       10
pull-fade         9
push-hook         5
pull-draw         5
straight-fade     4
push-             1
Name: count, dtype: int64

In [93]:
type(df[["ShotType"]])

pandas.core.frame.DataFrame

In [29]:
import os

# Specify the directory path
metricsFolder = "/Users/rokbohinc/Documents/Work/Golf_AI/Golfshot_Categoriser/data/raw/Metrics"
labelsFolder = "/Users/rokbohinc/Documents/Work/Golf_AI/Golfshot_Categoriser/data/extracted/labels"

# List all files in the directory
files = [file for file in os.listdir(metricsFolder) if file.endswith(".csv")]

# Print the list of files
print(files)


['DrivingRange-2024-03-03 13:35:37 +0000.csv', 'DrivingRange-2024-03-03 21:03:28 +0000.csv', 'DrivingRange-2024-03-03 21:06:50 +0000.csv', 'DrivingRange-2024-03-03 21:18:09 +0000.csv', 'DrivingRange-2024-03-03 21:20:14 +0000.csv']


In [30]:
filePaths = [(os.path.join(metricsFolder, file), os.path.join(labelsFolder, file)) for file in files]

filePaths

[('/Users/rokbohinc/Documents/Work/Golf_AI/Golfshot_Categoriser/data/raw/Metrics/DrivingRange-2024-03-03 13:35:37 +0000.csv',
  '/Users/rokbohinc/Documents/Work/Golf_AI/Golfshot_Categoriser/data/extracted/labels/DrivingRange-2024-03-03 13:35:37 +0000.csv'),
 ('/Users/rokbohinc/Documents/Work/Golf_AI/Golfshot_Categoriser/data/raw/Metrics/DrivingRange-2024-03-03 21:03:28 +0000.csv',
  '/Users/rokbohinc/Documents/Work/Golf_AI/Golfshot_Categoriser/data/extracted/labels/DrivingRange-2024-03-03 21:03:28 +0000.csv'),
 ('/Users/rokbohinc/Documents/Work/Golf_AI/Golfshot_Categoriser/data/raw/Metrics/DrivingRange-2024-03-03 21:06:50 +0000.csv',
  '/Users/rokbohinc/Documents/Work/Golf_AI/Golfshot_Categoriser/data/extracted/labels/DrivingRange-2024-03-03 21:06:50 +0000.csv'),
 ('/Users/rokbohinc/Documents/Work/Golf_AI/Golfshot_Categoriser/data/raw/Metrics/DrivingRange-2024-03-03 21:18:09 +0000.csv',
  '/Users/rokbohinc/Documents/Work/Golf_AI/Golfshot_Categoriser/data/extracted/labels/DrivingRange-2

In [32]:
from labelExtraction import extractLabel

for importPath, exportPath in filePaths:
    extractLabel(import_path=importPath, export_path=exportPath)