## Libraries

In [6]:
import os
import re
import pandas as pd
import datetime

## Metrics 

In [8]:
# Import file
metricsFolder = "/Users/rokbohinc/Documents/Work/Golf_AI/Golfshot_Categoriser/data/extracted/metrics/MergedMetrics.csv"
df_metrics = pd.read_csv(metricsFolder, header=[0], skiprows=[1])

# Format date
df_metrics['Date'] = pd.to_datetime(df_metrics['Date'])

In [9]:
df_metrics.head(3)

Unnamed: 0.1,Unnamed: 0,Date,Player,Club Name,Club Type,Club Speed,Attack Angle,Club Path,Club Face,Face to Path,...,Carry Deviation Distance,Total Distance,Total Deviation Angle,Total Deviation Distance,Note,Tag,Air Density,Temperature,Air Pressure,Relative Humidity
0,1,2022-11-26 11:48:50,Rok,7I XXIO eleven,7 Iron,120.275997,0.39,2.47,2.5,0.03,...,5.67,142.380005,2.44,6.07,,,1.225398,4.44,97.95,97
1,2,2022-11-26 11:49:10,Rok,7I XXIO eleven,7 Iron,109.547997,1.72,3.89,-8.71,-12.6,...,-27.469999,138.589996,-13.35,-32.009998,,,1.225398,4.44,97.95,97
2,3,2022-11-26 11:57:38,Rok,7I XXIO eleven,7 Iron,103.247997,-2.69,6.23,5.91,-0.32,...,8.72,101.93,5.56,9.87,,BAD,1.225398,4.44,97.95,97


In [21]:
df_metrics.shape

(1647, 32)

## Videos

In [10]:
videosFolder = "/Users/rokbohinc/Documents/Work/Golf_AI/Golfshot_Categoriser/data/raw/Videos"

video_paths = []
# Walk through all subdirectories and collect file paths
for root, dirs, files in os.walk(videosFolder):
    for file in files:
        if file.endswith(".mp4"):
            video_paths.append(os.path.join(root, file))

df_videos = pd.DataFrame({'videoPath': video_paths})
df_videos['Date'] = [datetime.datetime.fromtimestamp(os.path.getmtime(path)) for path in df_videos['videoPath']]
df_videos = df_videos.sort_values(by='Date')

In [11]:
display(df_videos)

Unnamed: 0,videoPath,Date
256,/Users/rokbohinc/Documents/Work/Golf_AI/Golfsh...,2022-11-29 17:24:40
258,/Users/rokbohinc/Documents/Work/Golf_AI/Golfsh...,2022-11-29 17:24:52
257,/Users/rokbohinc/Documents/Work/Golf_AI/Golfsh...,2022-11-29 17:26:04
604,/Users/rokbohinc/Documents/Work/Golf_AI/Golfsh...,2022-12-24 11:13:58
606,/Users/rokbohinc/Documents/Work/Golf_AI/Golfsh...,2022-12-24 11:15:04
...,...,...
403,/Users/rokbohinc/Documents/Work/Golf_AI/Golfsh...,2024-04-16 16:49:01
402,/Users/rokbohinc/Documents/Work/Golf_AI/Golfsh...,2024-04-16 16:49:37
400,/Users/rokbohinc/Documents/Work/Golf_AI/Golfsh...,2024-04-16 16:50:38
401,/Users/rokbohinc/Documents/Work/Golf_AI/Golfsh...,2024-04-16 16:51:23


In [22]:
df_videos.shape

(753, 2)

## Merging Metrics and Videos

There is a problem with matching the datetime from the metrics data to the creation datetime of the videos. It ocassionally occours that there is a about 1 second difference between the two. Since I don't hit 2 shots in 2 seconds I can round the Date to the nereast 2 secods in order to match correctly occasions when there is 1 s difference.

In [14]:
merged = pd.merge_asof(df_videos, df_metrics, 
                               on='Date', 
                               suffixes=('_videos', '_metrics'), 
                               tolerance=pd.Timedelta('2s'),
                               direction='nearest')


In [15]:
display(merged)

Unnamed: 0.1,videoPath,Date,Unnamed: 0,Player,Club Name,Club Type,Club Speed,Attack Angle,Club Path,Club Face,...,Carry Deviation Distance,Total Distance,Total Deviation Angle,Total Deviation Distance,Note,Tag,Air Density,Temperature,Air Pressure,Relative Humidity
0,/Users/rokbohinc/Documents/Work/Golf_AI/Golfsh...,2022-11-29 17:24:40,0.0,Rok,SW XXIO eleven,Sand Wedge,0.000000,0.000000,0.000000,0.000000,...,0.630000,16.530001,3.390000,0.980000,,,1.220799,7.220000,98.60000,91.0
1,/Users/rokbohinc/Documents/Work/Golf_AI/Golfsh...,2022-11-29 17:24:52,1.0,Rok,SW XXIO eleven,Sand Wedge,0.000000,0.000000,0.000000,0.000000,...,-4.120000,24.320000,-12.310000,-5.180000,,,1.220799,7.220000,98.60000,91.0
2,/Users/rokbohinc/Documents/Work/Golf_AI/Golfsh...,2022-11-29 17:26:04,2.0,Rok,SW XXIO eleven,Sand Wedge,0.000000,0.000000,0.000000,0.000000,...,-0.070000,20.990000,-0.280000,-0.100000,,,1.220799,7.220000,98.60000,91.0
3,/Users/rokbohinc/Documents/Work/Golf_AI/Golfsh...,2022-12-24 11:13:58,,,,,,,,,...,,,,,,,,,,
4,/Users/rokbohinc/Documents/Work/Golf_AI/Golfsh...,2022-12-24 11:15:04,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
748,/Users/rokbohinc/Documents/Work/Golf_AI/Golfsh...,2024-04-16 16:49:01,3.0,Rok,7I XXIO eleven,7 Iron,126.860281,0.220221,-4.572323,-6.394173,...,-27.651789,137.314240,-13.098104,-31.118023,,,1.187058,11.666667,97.38699,65.0
749,/Users/rokbohinc/Documents/Work/Golf_AI/Golfsh...,2024-04-16 16:49:37,4.0,Rok,7I XXIO eleven,7 Iron,129.611093,-2.145370,2.965698,2.487503,...,-4.815349,116.098564,-3.621420,-7.333233,,,1.187058,11.666667,97.38699,65.0
750,/Users/rokbohinc/Documents/Work/Golf_AI/Golfsh...,2024-04-16 16:50:38,5.0,Rok,7I XXIO eleven,7 Iron,126.243179,1.511003,0.361320,2.601925,...,-1.366448,110.502029,-1.281306,-2.471094,,,1.187058,11.666667,97.38699,65.0
751,/Users/rokbohinc/Documents/Work/Golf_AI/Golfsh...,2024-04-16 16:51:23,6.0,Rok,7I XXIO eleven,7 Iron,125.306320,2.557724,1.098738,3.818094,...,14.673410,136.882645,6.723858,16.026850,,,1.187058,11.666667,97.38699,65.0


Caclulating the number of missing matches

In [25]:
merged["Player"].isna().sum()

74

Number of sucessfull merges

In [32]:
merged.dropna(subset=["Player"]).shape

(679, 33)

Number of iron 7 shots

In [35]:
merged[merged['Club Name'] == "7I XXIO eleven"].shape

(410, 33)

### Determining the shot type

In [81]:
def shot_direction(x):
    if x < -5:
        return "pull"
    elif x > 5:
        return "push"
    else:
        return "straight"

df["Direction"] = df["Launch Direction"].apply(shot_direction)
df["Direction"].value_counts()

Direction
straight    59
pull        44
push         6
Name: count, dtype: int64

In [86]:
def shot_curvature(x):
    if x < -800:
        return "hook"
    elif x < -300:
        return "draw"
    elif x < 300:
        return ""
    elif x < 800:
        return "fade"
    else:
        return "slice"

df["Shape"] = df["Sidespin"].apply(shot_curvature)
df["Shape"].value_counts()

Shape
         42
draw     27
hook     17
fade     13
slice    10
Name: count, dtype: int64

In [87]:
df["ShotType"] = df["Direction"] + "-" + df["Shape"]
df["ShotType"].value_counts()

ShotType
straight-draw    22
straight-        21
pull-            20
straight-hook    12
pull-slice       10
pull-fade         9
push-hook         5
pull-draw         5
straight-fade     4
push-             1
Name: count, dtype: int64

In [93]:
type(df[["ShotType"]])

pandas.core.frame.DataFrame

In [29]:
import os

# Specify the directory path
metricsFolder = "/Users/rokbohinc/Documents/Work/Golf_AI/Golfshot_Categoriser/data/raw/Metrics"
labelsFolder = "/Users/rokbohinc/Documents/Work/Golf_AI/Golfshot_Categoriser/data/extracted/labels"

# List all files in the directory
files = [file for file in os.listdir(metricsFolder) if file.endswith(".csv")]

# Print the list of files
print(files)


['DrivingRange-2024-03-03 13:35:37 +0000.csv', 'DrivingRange-2024-03-03 21:03:28 +0000.csv', 'DrivingRange-2024-03-03 21:06:50 +0000.csv', 'DrivingRange-2024-03-03 21:18:09 +0000.csv', 'DrivingRange-2024-03-03 21:20:14 +0000.csv']


In [30]:
filePaths = [(os.path.join(metricsFolder, file), os.path.join(labelsFolder, file)) for file in files]

filePaths

[('/Users/rokbohinc/Documents/Work/Golf_AI/Golfshot_Categoriser/data/raw/Metrics/DrivingRange-2024-03-03 13:35:37 +0000.csv',
  '/Users/rokbohinc/Documents/Work/Golf_AI/Golfshot_Categoriser/data/extracted/labels/DrivingRange-2024-03-03 13:35:37 +0000.csv'),
 ('/Users/rokbohinc/Documents/Work/Golf_AI/Golfshot_Categoriser/data/raw/Metrics/DrivingRange-2024-03-03 21:03:28 +0000.csv',
  '/Users/rokbohinc/Documents/Work/Golf_AI/Golfshot_Categoriser/data/extracted/labels/DrivingRange-2024-03-03 21:03:28 +0000.csv'),
 ('/Users/rokbohinc/Documents/Work/Golf_AI/Golfshot_Categoriser/data/raw/Metrics/DrivingRange-2024-03-03 21:06:50 +0000.csv',
  '/Users/rokbohinc/Documents/Work/Golf_AI/Golfshot_Categoriser/data/extracted/labels/DrivingRange-2024-03-03 21:06:50 +0000.csv'),
 ('/Users/rokbohinc/Documents/Work/Golf_AI/Golfshot_Categoriser/data/raw/Metrics/DrivingRange-2024-03-03 21:18:09 +0000.csv',
  '/Users/rokbohinc/Documents/Work/Golf_AI/Golfshot_Categoriser/data/extracted/labels/DrivingRange-2

In [32]:
from labelExtraction import extractLabel

for importPath, exportPath in filePaths:
    extractLabel(import_path=importPath, export_path=exportPath)