## Libraries

In [1]:
import os
import re
import pandas as pd
from labelExtraction import importFile
import datetime

## Metrics 

In [2]:
metricsFolder = (
    "/Users/rokbohinc/Documents/Work/Golf_AI/Golfshot_Categoriser/data/raw/Metrics"
)

# List all files in the directory
file_paths = [
    os.path.join(metricsFolder, file)
    for file in os.listdir(metricsFolder)
    if file.endswith(".csv")
]
file_paths = sorted(file_paths)

# Import file
l_metrics = [pd.read_csv(name, header=[0], skiprows=[1]).assign(Metricsfile=name) for name in file_paths]


In [3]:
# Convert Date column to datetime - 2 formats in different files
for i, df in enumerate(l_metrics):
    try:
        l_metrics[i]['Date'] = pd.to_datetime(df['Date'], format='%d.%m.%y %H:%M:%S')
    except ValueError:
        try:
            l_metrics[i]['Date'] = pd.to_datetime(df['Date'], format='%d.%m.%Y %H:%M:%S')
        except ValueError:
            # Handle parsing failure
            print("Unable to parse date strings in DataFrame index:", i)

In [4]:
# Concat together 
df_metrics = pd.concat(l_metrics)

# In case there I have uploaded the same file twice (it will have a different fileName), I have to delete duplicates
df_metrics = df_metrics.round(8)                 # If two files of the same session have different format
df_metrics = df_metrics.map(lambda x: x.strip() if isinstance(x, str) else x) # Remove trailing whitespaces from all columns
df_metrics = df_metrics.drop_duplicates()

# Sort by Date
df_metrics = df_metrics.sort_values(by='Date')

In [234]:
df_metrics.head(3)

Unnamed: 0,Date,Player,Club Name,Club Type,Club Speed,Attack Angle,Club Path,Club Face,Face to Path,Ball Speed,...,Total Distance,Total Deviation Angle,Total Deviation Distance,Note,Tag,Air Density,Temperature,Air Pressure,Relative Humidity,Metricsfile
0,2023-03-21 15:15:41,Rok,9I XXIO eleven,9 Iron,96.296612,-1.783461,-8.317504,-7.338091,0.979413,123.789108,...,92.223274,-6.658262,-10.693084,,,1.165952,21.1111,98.48524,0,/Users/rokbohinc/Documents/Work/Golf_AI/Golfsh...
1,2023-03-21 15:16:52,Rok,9I XXIO eleven,9 Iron,110.074683,-0.002081,1.610606,-4.977206,-6.587812,140.224833,...,111.578529,-6.700834,-13.019547,,,1.165952,21.1111,98.48524,0,/Users/rokbohinc/Documents/Work/Golf_AI/Golfsh...
2,2023-03-21 15:17:50,Rok,9I XXIO eleven,9 Iron,105.755868,-0.93542,-0.225933,-16.238737,-16.012804,73.435794,...,25.50279,-15.023775,-6.610821,,,1.165952,21.1111,98.48524,0,/Users/rokbohinc/Documents/Work/Golf_AI/Golfsh...


In [242]:
df_metrics.dtypes

Date                        datetime64[ns]
Player                              object
Club Name                           object
Club Type                           object
Club Speed                         float64
Attack Angle                       float64
Club Path                          float64
Club Face                          float64
Face to Path                       float64
Ball Speed                         float64
Smash Factor                       float64
Launch Angle                       float64
Launch Direction                   float64
Backspin                           float64
Sidespin                           float64
Spin Rate                          float64
Spin Rate Type                      object
Spin Axis                          float64
Apex Height                        float64
Carry Distance                     float64
Carry Deviation Angle              float64
Carry Deviation Distance           float64
Total Distance                     float64
Total Devia

## Videos

In [5]:
videosFolder = "/Users/rokbohinc/Documents/Work/Golf_AI/Golfshot_Categoriser/data/raw/Videos"

video_paths = []
# Walk through all subdirectories and collect file paths
for root, dirs, files in os.walk(videosFolder):
    for file in files:
        if file.endswith(".mp4"):
            video_paths.append(os.path.join(root, file))

df_videos = pd.DataFrame({'videoPath': video_paths})
df_videos['Date'] = [datetime.datetime.fromtimestamp(os.path.getmtime(path)) for path in df_videos['videoPath']]
df_videos = df_videos.sort_values(by='Date')

In [6]:
df_videos.dtypes

videoPath            object
Date         datetime64[ns]
dtype: object

In [None]:
display(df_videos)

## Merging Metrics and Videos

There is a problem with matching the datetime from the metrics data to the creation datetime of the videos. It ocassionally occours that there is a about 1 second difference between the two. Since I don't hit 2 shots in 2 seconds I can round the Date to the nereast 2 secods in order to match correctly occasions when there is 1 s difference.

In [10]:
merged_df_asof = pd.merge_asof(df_videos, df_metrics, 
                               on='Date', 
                               suffixes=('_videos', '_metrics'), 
                               tolerance=pd.Timedelta('2s'),
                               direction='nearest')


In [11]:
display(merged_df_asof)

Unnamed: 0,videoPath,Date,Player,Club Name,Club Type,Club Speed,Attack Angle,Club Path,Club Face,Face to Path,...,Total Distance,Total Deviation Angle,Total Deviation Distance,Note,Tag,Air Density,Temperature,Air Pressure,Relative Humidity,Metricsfile
0,/Users/rokbohinc/Documents/Work/Golf_AI/Golfsh...,2022-11-29 17:24:40,,,,,,,,,...,,,,,,,,,,
1,/Users/rokbohinc/Documents/Work/Golf_AI/Golfsh...,2022-11-29 17:24:52,,,,,,,,,...,,,,,,,,,,
2,/Users/rokbohinc/Documents/Work/Golf_AI/Golfsh...,2022-11-29 17:26:04,,,,,,,,,...,,,,,,,,,,
3,/Users/rokbohinc/Documents/Work/Golf_AI/Golfsh...,2022-12-24 11:13:58,,,,,,,,,...,,,,,,,,,,
4,/Users/rokbohinc/Documents/Work/Golf_AI/Golfsh...,2022-12-24 11:15:04,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
615,/Users/rokbohinc/Documents/Work/Golf_AI/Golfsh...,2024-03-31 14:39:10,Rok,7I XXIO eleven,7 Iron,134.319366,-1.550528,9.684385,-2.206765,-11.891150,...,150.571548,-8.713073,-22.809591,,,1.199572,21.1111,101.325,0.0,/Users/rokbohinc/Documents/Work/Golf_AI/Golfsh...
616,/Users/rokbohinc/Documents/Work/Golf_AI/Golfsh...,2024-03-31 14:39:36,Rok,7I XXIO eleven,7 Iron,134.166642,3.702559,11.712544,4.219777,-7.492768,...,141.665237,-0.687280,-1.699433,,,1.199572,21.1111,101.325,0.0,/Users/rokbohinc/Documents/Work/Golf_AI/Golfsh...
617,/Users/rokbohinc/Documents/Work/Golf_AI/Golfsh...,2024-03-31 14:39:55,Rok,7I XXIO eleven,7 Iron,131.185145,3.796341,12.602219,7.453573,-5.148645,...,152.553513,-0.668225,-1.779588,,,1.199572,21.1111,101.325,0.0,/Users/rokbohinc/Documents/Work/Golf_AI/Golfsh...
618,/Users/rokbohinc/Documents/Work/Golf_AI/Golfsh...,2024-03-31 14:40:16,Rok,7I XXIO eleven,7 Iron,129.081442,0.160541,14.670392,2.346039,-12.324353,...,151.252411,-0.141274,-0.372414,,,1.199572,21.1111,101.325,0.0,/Users/rokbohinc/Documents/Work/Golf_AI/Golfsh...


### Determining the shot type

In [81]:
def shot_direction(x):
    if x < -5:
        return "pull"
    elif x > 5:
        return "push"
    else:
        return "straight"

df["Direction"] = df["Launch Direction"].apply(shot_direction)
df["Direction"].value_counts()

Direction
straight    59
pull        44
push         6
Name: count, dtype: int64

In [86]:
def shot_curvature(x):
    if x < -800:
        return "hook"
    elif x < -300:
        return "draw"
    elif x < 300:
        return ""
    elif x < 800:
        return "fade"
    else:
        return "slice"

df["Shape"] = df["Sidespin"].apply(shot_curvature)
df["Shape"].value_counts()

Shape
         42
draw     27
hook     17
fade     13
slice    10
Name: count, dtype: int64

In [87]:
df["ShotType"] = df["Direction"] + "-" + df["Shape"]
df["ShotType"].value_counts()

ShotType
straight-draw    22
straight-        21
pull-            20
straight-hook    12
pull-slice       10
pull-fade         9
push-hook         5
pull-draw         5
straight-fade     4
push-             1
Name: count, dtype: int64

In [93]:
type(df[["ShotType"]])

pandas.core.frame.DataFrame

In [29]:
import os

# Specify the directory path
metricsFolder = "/Users/rokbohinc/Documents/Work/Golf_AI/Golfshot_Categoriser/data/raw/Metrics"
labelsFolder = "/Users/rokbohinc/Documents/Work/Golf_AI/Golfshot_Categoriser/data/extracted/labels"

# List all files in the directory
files = [file for file in os.listdir(metricsFolder) if file.endswith(".csv")]

# Print the list of files
print(files)


['DrivingRange-2024-03-03 13:35:37 +0000.csv', 'DrivingRange-2024-03-03 21:03:28 +0000.csv', 'DrivingRange-2024-03-03 21:06:50 +0000.csv', 'DrivingRange-2024-03-03 21:18:09 +0000.csv', 'DrivingRange-2024-03-03 21:20:14 +0000.csv']


In [30]:
filePaths = [(os.path.join(metricsFolder, file), os.path.join(labelsFolder, file)) for file in files]

filePaths

[('/Users/rokbohinc/Documents/Work/Golf_AI/Golfshot_Categoriser/data/raw/Metrics/DrivingRange-2024-03-03 13:35:37 +0000.csv',
  '/Users/rokbohinc/Documents/Work/Golf_AI/Golfshot_Categoriser/data/extracted/labels/DrivingRange-2024-03-03 13:35:37 +0000.csv'),
 ('/Users/rokbohinc/Documents/Work/Golf_AI/Golfshot_Categoriser/data/raw/Metrics/DrivingRange-2024-03-03 21:03:28 +0000.csv',
  '/Users/rokbohinc/Documents/Work/Golf_AI/Golfshot_Categoriser/data/extracted/labels/DrivingRange-2024-03-03 21:03:28 +0000.csv'),
 ('/Users/rokbohinc/Documents/Work/Golf_AI/Golfshot_Categoriser/data/raw/Metrics/DrivingRange-2024-03-03 21:06:50 +0000.csv',
  '/Users/rokbohinc/Documents/Work/Golf_AI/Golfshot_Categoriser/data/extracted/labels/DrivingRange-2024-03-03 21:06:50 +0000.csv'),
 ('/Users/rokbohinc/Documents/Work/Golf_AI/Golfshot_Categoriser/data/raw/Metrics/DrivingRange-2024-03-03 21:18:09 +0000.csv',
  '/Users/rokbohinc/Documents/Work/Golf_AI/Golfshot_Categoriser/data/extracted/labels/DrivingRange-2

In [32]:
from labelExtraction import extractLabel

for importPath, exportPath in filePaths:
    extractLabel(import_path=importPath, export_path=exportPath)