In [None]:
#following along the youtube tutorial: https://www.youtube.com/watch?v=BiS-uKoK5GY

#Start of Step 1 of tutorial, Setup: 

import os
import pandas as pd

#set path to the file of the github repo --> finder, right click on openbiomechanics, option, copy path
path_github_repo = "/Users/leofeingold/Documents/GitHub/openbiomechanics"

#change the directory to the repo path
os.chdir(path_github_repo)

#list the diectory
os.listdir()

In [None]:
#set path to baseball pitching folder
os.listdir('baseball_pitching')


In [None]:
#set path to baseball pitching data
pitching_data_path = os.path.join('baseball_pitching', 'data')

os.listdir(pitching_data_path)

In [None]:
#read in the metadata.csv
with open(os.path.join(pitching_data_path, 'metadata.csv'), 'r') as f:
    metadata = pd.read_csv(f)

#end of Step 1 of tutorial, Setup


In [None]:
#start of Step 2 of tutorial, Explore Metadata CSV

#list metadata headers
metadata.head()


In [None]:
#print metadata columns
print(metadata.columns)

In [None]:
#print metadata shape

metadata.shape

In [None]:
#find the number of users, sessions and trials
num_users = len(metadata['user'].unique())
num_sessions = len(metadata['session'].unique())
num_trials = len(metadata['session_pitch'].unique())
print(f"We have {num_trials} trials from {num_sessions} sessions and {num_users} users.")

In [None]:
#Exercise: find the average age, height and weight of a pitcher in the data

ave_age = metadata['age_yrs'].mean()
ave_height = metadata['session_height_m'].mean()
ave_weight = metadata['session_mass_kg'].mean()

print(f"Average Age: {ave_age}, Average Height (meters): {ave_height}, Average Weight (kilograms): {ave_weight}")

#End of Step 2 of tutorial, Explore Metadata CSV

In [45]:
#Start of Step 3 of tutorial, Explore Point of Interest (POI) CSV

#Identify path to POI CSV
path_poi = os.path.join(pitching_data_path, 'poi')

#access poi data
with open(os.path.join(path_poi, 'poi_metrics.csv'), 'r') as f:
    poi = pd.read_csv(f)

In [None]:
# Show poi headers
poi.head()

In [None]:
#print poi columns:
print(poi.columns)

In [None]:
#Getting an error here, I think it's because I'm taking the mean of non ints/floats? But I copied his code exactly...
#https://www.reddit.com/r/learnpython/comments/16r1ded/groupby_not_working/ (potential fix)
#group poi data by session

#poi_grouped = poi.groupby('session').mean()

poi_grouped = poi.groupby('session')


poi_grouped.head()

In [None]:
#make a scatterplot of max IR and pitch speed
import matplotlib.pyplot as plt

fig = plt.figure(figsize=(10,7), facecolor = 'white')
plt.scatter(poi_grouped['max_shoulder_internal_rotational_velo'].mean(), poi_grouped['pitch_speed_mph'].mean(), s=50,c='#ffa300', alpha = 0.75, edgecolors = 'k')
plt.xlabel("Shoulder Internal Rotation Velocity (degrees/second)")
plt.ylabel("Pitch Speed (mph)")
plt.title("Pitch Speed vs. Max Shoulder IR Velocity")

plt.show()

In [None]:
#alternative data visualization with correlation matrix from poi data

import seaborn as sns

#select columns of interest
columns_of_interest = ['pitch_speed_mph', 'max_shoulder_internal_rotational_velo', 'max_elbow_extension_velo', 'rear_grf_mag_max']

#create a subset dataframe of the original dataframe with only the important columns
subset_data = poi_grouped[columns_of_interest].mean()

#calculate the correlation matrix
correlation_matrix = subset_data.corr()

#visualize the correlation matrix
plt.figure(figsize=(10,10))
sns.heatmap(correlation_matrix, annot=True, cmap= 'coolwarm', fmt='.2f')
plt.show()






In [None]:
#build a basic regression model
import statsmodels.api as sm

#select the columns for the regression model
columns_of_interest = ['pitch_speed_mph', 'max_torso_rotational_velo', 'rear_grf_mag_max', 'lead_grf_mag_max']

#drop any rows with missing values
data = poi_grouped[columns_of_interest].mean()
data = data.dropna()

#split the data into predictor variables (x) and the target variable (y)
x = data[[
    #'max_torso_rotational_velo',
    #'rear_grf_mag_max',
    #'lead_grf_mag_max'
    'max_torso_rotational_velo'
    ]]
y = data['pitch_speed_mph']

#add a constant column to the predictor variables (x) for the intercept term
x = sm.add_constant(x)

#fit the linear regression model
model = sm.OLS(y, x).fit()

#print the summary of the model
print(model.summary())

#visualize the residuals
fig = plt.figure()
residuals = model.resid
sns.scatterplot(y=model.predict(x), x = model.fittedvalues)

#End of Step 3 of tutorial, Explore Point of Interest (POI) CSV


In [84]:
#Start of Step 4 of tutorial, Explore Full Signal Timeseries Data

# initialize python packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tkinter import filedialog
from scipy import integrate
from scipy import signal

In [4]:
#simply identify file paths where data is stored

#problem here is that it's a zip file not a csv, not sure how to unzip...
#fixed: right click on the zip file in VSCode and click show in finder. Then double click the file in the finder and it will unzip itself and automatically show up as a CSV
#then to get the file path right click on the CSV in VSCode and click copy path
#r goes in front of the path to flip the back slash to forward slash?
force_file = r'/Users/leofeingold/Documents/GitHub/openbiomechanics/baseball_pitching/data/full_sig/force_plate.csv'
joint_angles_file = r'/Users/leofeingold/Documents/GitHub/openbiomechanics/baseball_pitching/data/full_sig/joint_angles.csv'

#choose a file from your file explorer
#not working
#root = tk.Tk()
#filename = tk.filedialog.askopenfilename()
#root.destroy()



In [None]:
# read the csv file into a python pandas dataframe object
master_force_data = pd.read_csv(force_file)

#master_force_data.head(10)

#identify force plate data for one particular trial within the dataframe
#Essentially now able to analyze force data from just the single session labled '1031_2'
trial_force_data = master_force_data.query("session_pitch == '1031_2'")



In [None]:
#plot timeseries
plt.plot(trial_force_data['time'], trial_force_data['rear_force_z'])
plt.ylabel('Force (N)')
plt.xlabel('Time (s)')
plt.title("Rear Leg Vertical Force Over Time")
plt.show()

plt.plot(trial_force_data['time'], trial_force_data['lead_force_z'])
plt.ylabel('Force (N)')
plt.xlabel('Time (s)')
plt.title("Front Leg Vertical Force Over Time")
plt.show()



#timeseries indexing
#max push-off force
max_rear_force_z = max(trial_force_data['rear_force_z'])
print("Max Rear Leg Vertical Force:" , max_rear_force_z)

frame_max_rear_force_z = np.where(trial_force_data['rear_force_z'] == max_rear_force_z)[0][0]
print("Max rear leg vertical force occurs at frame:" , frame_max_rear_force_z)

time_max_rear_force_z = trial_force_data['time'][frame_max_rear_force_z]
print("Max rear leg vertical force occures at time:" , time_max_rear_force_z)

#max touch down force
max_lead_force_z = max(trial_force_data['lead_force_z'])
print("Max Lead Leg Vertical Force:" , max_lead_force_z)

frame_max_lead_force_z = np.where(trial_force_data['lead_force_z'] == max_lead_force_z)[0][0]
print("Max lead leg vertical force occurs at frame:", frame_max_lead_force_z)

time_max_lead_force_z = trial_force_data['time'][frame_max_lead_force_z]
print("Max lead leg vertical force occurs at time:", time_max_lead_force_z)

#frame of intiial lead foot contact
frame_initial_lead_foot_contact = np.where(trial_force_data['lead_force_z'][0:frame_max_lead_force_z] > 20)[0][0]
print("Frame of foot plant:" , frame_initial_lead_foot_contact)

time_intiial_lead_foot_contact = trial_force_data['time'][frame_initial_lead_foot_contact]
print("Time at foot plant:" , time_intiial_lead_foot_contact)

#filtering data (4th order lowpass butterworth)
total_frames = len(trial_force_data['time'])
total_time = trial_force_data['time'][total_frames - 1]
fs = int(total_frames/total_time)
cutoff = 50
nyq = 0.5 * fs
N = 4
fc = cutoff/nyq
b, a, = signal.butter(N, fc)

#apply the filter
lead_force = signal.filtfilt(b, a, trial_force_data['lead_force_z']) #this is the object holding the filtered data

#derivation and integration
#lead_impulse = integrate.cumtrapz(lead_force, trial_force_data['time']) #itegrate the filtered data with respet to time, use the line below because of function deprecation 
lead_impulse = integrate.cumulative_trapezoid(lead_force, trial_force_data['time'])

lead_rfd = np.gradient(lead_force, trial_force_data['time'])
max_lead_rfd = max(lead_rfd)

print("Max Lead Leg Rate of Force Development:" , max_lead_rfd)

#store single timeseries to csv
save_csv = pd.DataFrame({'Time': list(trial_force_data['time']), 'Force': list(trial_force_data['lead_force_z'])})
pd.DataFrame.to_csv(save_csv, "ENTER THE PATH TO THE LOCATION YOU WANT THE CSV HERE")
