In [50]:
import pandas as pd
import numpy as np

In [62]:
sim_model = pd.read_csv('Duffey_Tyler_2022_FF.csv')
sim_model = sim_model[['pitch_type', 'release_speed', 'release_spin_rate', 'release_pos_z', 'release_pos_x', 'spin_axis',
                                               'plate_z', 'plate_x', 'pfx_x', 'pfx_z', 'vx0', 'vy0', 'vz0', 'ax', 'ay', 'az']]

In [63]:
######################################
## CALCULATE SPIN X, SPIN Y, SPIN Z ##
######################################

def calculate_spin_components(data):
    
    # Extract necessary columns from the data
    vx0 = data["vx0"]
    vy0 = data["vy0"]
    vz0 = data["vz0"]
    ax = data["ax"]
    ay = data["ay"]
    az = data["az"]
    
    # Calculate velocity and acceleration magnitudes
    velocity_magnitude = np.sqrt(vx0**2 + vy0**2 + vz0**2)
    acceleration_magnitude = np.sqrt(ax**2 + ay**2 + az**2)
    
    # Calculate velocity and acceleration unit vectors
    velocity_unit_x = vx0 / velocity_magnitude
    velocity_unit_y = vy0 / velocity_magnitude
    velocity_unit_z = vz0 / velocity_magnitude
    acceleration_unit_x = ax / acceleration_magnitude
    acceleration_unit_y = ay / acceleration_magnitude
    acceleration_unit_z = az / acceleration_magnitude

    # Calculate spin axis components
    SpinX = velocity_unit_y * acceleration_unit_z - velocity_unit_z * acceleration_unit_y
    SpinY = velocity_unit_z * acceleration_unit_x - velocity_unit_x * acceleration_unit_z
    SpinZ = velocity_unit_x * acceleration_unit_y - velocity_unit_y * acceleration_unit_x
    
    # Normalize spin axis components
    spin_magnitude = np.sqrt(SpinX**2 + SpinY**2 + SpinZ**2)
    SpinX = SpinX / spin_magnitude
    SpinY = SpinY / spin_magnitude
    SpinZ = SpinZ / spin_magnitude
    
    return SpinX, SpinY, SpinZ

spin_x, spin_y, spin_z = calculate_spin_components(sim_model)
    
# Add spin axis components to the DataFrame
sim_model["SpinX"] = spin_x
sim_model["SpinY"] = spin_y
sim_model["SpinZ"] = spin_z


In [64]:
def calculate_spin_axis(data):
    SpinX = data["SpinX"]
    SpinY = data["SpinY"]
    SpinZ = data["SpinZ"]
    
    spin = np.column_stack((SpinX, SpinY, SpinZ))
    
    return spin

spin = calculate_spin_axis(sim_model)

# Add spin axis to the DataFrame
sim_model["spin"] = list(spin)

In [65]:
def getSimpleLatLon(spin):
    spinAxis = spin / (np.linalg.norm(spin) + 1e-9)
    
    # Assume an identity matrix for hawkeyeRotMat
    hawkeyeRotMat = np.eye(3)
    inverseHawkeyeRotMat = hawkeyeRotMat  # Identity matrix is its own inverse
    
    # Rotate the spin axis from global coordinates to local coordinates
    rotatedSpinAxis = np.dot(inverseHawkeyeRotMat, spinAxis)
    
    # Convert between Trajekt and Hawkeye reference frames (x = -z, y = x, z = -y)
    x, y, z = -rotatedSpinAxis[2], rotatedSpinAxis[0], -rotatedSpinAxis[1]
    
    return {
        "lon": np.degrees(np.arctan2(y, x)),
        "lat": np.degrees(np.pi/2 - np.arccos(z))
    }
    
sim_model["SeamLat"] = sim_model["spin"].apply(lambda spin: getSimpleLatLon(np.array(spin))["lat"])
sim_model["SeamLon"] = sim_model["spin"].apply(lambda spin: getSimpleLatLon(np.array(spin))["lon"])

In [66]:
print(sim_model.head())

  pitch_type  release_speed  release_spin_rate  release_pos_z  release_pos_x  \
0         FF           90.0               2168           6.23          -2.39   
1         FF           90.7               2230           6.10          -2.50   
2         FF           90.9               2269           6.16          -2.60   
3         FF           90.9               2138           6.16          -2.52   
4         FF           90.4               2208           6.08          -2.65   

   spin_axis  plate_z  plate_x  pfx_x  pfx_z  ...       vz0        ax  \
0        201     3.02     0.61   0.09   1.11  ... -4.341588 -0.548144   
1        210     2.78    -0.36  -0.19   1.45  ... -5.554115 -3.374833   
2        202     3.28     0.01  -0.18   1.28  ... -4.161636 -3.300812   
3        206     3.72     0.43  -0.16   1.25  ... -2.981592 -3.234983   
4        201     3.51    -0.52  -0.16   1.17  ... -3.000728 -3.062860   

          ay         az     SpinX     SpinY     SpinZ  \
0  27.587603 -18.734218

In [67]:
sim_model = sim_model[['pitch_type', 'release_speed', 'release_spin_rate', 'release_pos_z', 'release_pos_x', 'spin_axis',
                                               'SeamLat', 'SeamLon', 'pfx_x', 'pfx_z']]
print(sim_model.head())

  pitch_type  release_speed  release_spin_rate  release_pos_z  release_pos_x  \
0         FF           90.0               2168           6.23          -2.39   
1         FF           90.7               2230           6.10          -2.50   
2         FF           90.9               2269           6.16          -2.60   
3         FF           90.9               2138           6.16          -2.52   
4         FF           90.4               2208           6.08          -2.65   

   spin_axis   SeamLat    SeamLon  pfx_x  pfx_z  
0        201 -3.074987  92.859371   0.09   1.11  
1        210 -2.830244  81.591248  -0.19   1.45  
2        202 -3.199894  82.964508  -0.18   1.28  
3        206 -3.459775  83.865628  -0.16   1.25  
4        201 -2.586741  84.137244  -0.16   1.17  


In [68]:
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split

features = ['release_pos_x', 'release_pos_z', 'release_speed', 'release_spin_rate', 'SeamLat', 'SeamLon']
target = ['pfx_x', 'pfx_z']

scaler = MinMaxScaler()
sim_model[features] = scaler.fit_transform(sim_model[features])

label_encoder = LabelEncoder()
sim_model['pitch_type'] = label_encoder.fit_transform(sim_model['pitch_type'])

X_train, X_test, y_train, y_test = train_test_split(sim_model[features], sim_model[target], test_size=0.2, random_state=42)

In [69]:
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [155]:
# Define the input shape based on the number of features
input_shape = (len(features),)

# Create a sequential model
model = Sequential()

# Add the first hidden layer with 64 units and ReLU activation
model.add(Dense(64, activation='relu', input_shape=input_shape))

# Add additional hidden layers as needed
model.add(Dense(64, activation='relu')) #########
model.add(Dense(32, activation='relu'))

# Add the output layer with 2 units (for pfx_x and pfx_z) and linear activation
model.add(Dense(2, activation='linear'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [156]:
from tensorflow.keras.callbacks import EarlyStopping

# Create an early stopping callback
early_stopping = EarlyStopping(
    monitor='val_loss',  # Monitor the validation loss
    min_delta=0.001,     # Minimum change in the monitored quantity to qualify as an improvement
    patience=10,         # Number of epochs with no improvement after which training will be stopped
    verbose=1,           # Verbosity mode (0 = silent, 1 = progress bar)
    mode='min',          # In 'min' mode, training will stop when the quantity monitored has stopped decreasing
    restore_best_weights=True  # Restore the weights from the epoch with the best value of the monitored quantity
)

In [157]:
from tensorflow.keras.callbacks import CSVLogger

csv_logger = CSVLogger('training_log.csv', append=True, separator=',')

model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train, y_train, epochs=200, batch_size=16, validation_split=0.2, callbacks=[early_stopping, csv_logger])

Epoch 1/200


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 0.7470 - val_loss: 0.2430
Epoch 2/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.1442 - val_loss: 0.0665
Epoch 3/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0728 - val_loss: 0.0524
Epoch 4/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0523 - val_loss: 0.0588
Epoch 5/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0484 - val_loss: 0.0443
Epoch 6/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0375 - val_loss: 0.0403
Epoch 7/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0332 - val_loss: 0.0395
Epoch 8/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0322 - val_loss: 0.0336
Epoch 9/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

<keras.src.callbacks.history.History at 0x7fe4844b3110>

In [167]:
# Save the entire model in a single file
model.save('model.h5')



In [158]:
# Evaluate the model on the testing set
loss = model.evaluate(X_test, y_test)
print("Test loss:", loss)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0122
Test loss: 0.012236538343131542


In [159]:
Duffey_Tyler_2021_FF = pd.read_csv('Duffey_Tyler_2021_FF.csv')
print(Duffey_Tyler_2021_FF.head())

  pitch_type  release_speed  release_spin_rate  release_pos_z  release_pos_x  \
0         FF           93.1               2265           6.06          -2.65   
1         FF           92.7               2259           6.06          -2.66   
2         FF           92.7               2314           6.21          -2.60   
3         FF           94.4               2269           6.20          -2.65   
4         FF           92.7               2162           6.28          -2.69   

   spin_axis   SeamLat    SeamLon  pfx_x  pfx_z  
0        214 -4.654379  70.613116  -0.43   1.42  
1        214 -4.191090  71.599232  -0.40   1.45  
2        212 -4.517261  68.650477  -0.43   1.54  
3        213 -3.009771  59.217537  -0.58   1.59  
4        222 -4.348771  65.168437  -0.59   1.37  


In [160]:
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split

new_features = ['release_pos_x', 'release_pos_z', 'release_speed', 'release_spin_rate', 'SeamLat', 'SeamLon']
new_target = ['pfx_x', 'pfx_z']

scaler = MinMaxScaler()
Duffey_Tyler_2021_FF[new_features] = scaler.fit_transform(Duffey_Tyler_2021_FF[new_features])

label_encoder = LabelEncoder()
Duffey_Tyler_2021_FF['pitch_type'] = label_encoder.fit_transform(Duffey_Tyler_2021_FF['pitch_type'])

In [161]:
predictions = model.predict(Duffey_Tyler_2021_FF[new_features])

Duffey_Tyler_2021_FF['predicted_pfx_x'] = predictions[:, 0]
Duffey_Tyler_2021_FF['predicted_pfx_z'] = predictions[:, 1]

print(Duffey_Tyler_2021_FF[['pfx_x', 'pfx_z', 'predicted_pfx_x', 'predicted_pfx_z']])

[1m 1/15[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 40ms/step

[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
     pfx_x  pfx_z  predicted_pfx_x  predicted_pfx_z
0    -0.43   1.42        -0.350564         1.222968
1    -0.40   1.45        -0.320483         1.227438
2    -0.43   1.54        -0.420627         1.356235
3    -0.58   1.59        -0.698609         1.508150
4    -0.59   1.37        -0.524806         1.394786
..     ...    ...              ...              ...
464  -0.62   1.39        -0.581396         1.444116
465  -0.79   1.25        -0.634746         1.504727
466  -0.53   1.32        -0.425030         1.414663
467  -0.65   1.47        -0.692578         1.512535
468  -0.69   1.45        -0.695304         1.517622

[469 rows x 4 columns]


In [162]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

mse_x = mean_squared_error(Duffey_Tyler_2021_FF['pfx_x'], Duffey_Tyler_2021_FF['predicted_pfx_x'])
mse_z = mean_squared_error(Duffey_Tyler_2021_FF['pfx_z'], Duffey_Tyler_2021_FF['predicted_pfx_z'])

mae_x = mean_absolute_error(Duffey_Tyler_2021_FF['pfx_x'], Duffey_Tyler_2021_FF['predicted_pfx_x'])
mae_z = mean_absolute_error(Duffey_Tyler_2021_FF['pfx_z'], Duffey_Tyler_2021_FF['predicted_pfx_z'])

print("MSE for pfx_x:", mse_x)
print("MSE for pfx_z:", mse_z)
print("MAE for pfx_x:", mae_x)
print("MAE for pfx_z:", mae_z)

MSE for pfx_x: 0.011365540424360575
MSE for pfx_z: 0.01845781937663172
MAE for pfx_x: 0.09473035625239679
MAE for pfx_z: 0.10811287869776744


In [163]:
Duffey_Tyler_2019_FF = pd.read_csv('Duffey_Tyler_2019_FF.csv')
print(Duffey_Tyler_2019_FF.head())

  pitch_type  release_speed  release_spin_rate  release_pos_z  release_pos_x  \
0         FF           95.7               2247           6.10          -2.62   
1         FF           94.7               2254           6.23          -2.59   
2         FF           94.0               2177           6.30          -2.31   
3         FF           95.4               2303           6.34          -2.45   
4         FF           95.6               2484           6.18          -2.60   

   spin_axis   SeamLat    SeamLon  pfx_x  pfx_z  
0        207 -4.254315  64.704574  -0.61   1.18  
1        208 -3.400609  71.076774  -0.53   1.00  
2        195 -4.789967  76.465145  -0.33   1.18  
3        200 -4.623492  67.067328  -0.49   1.37  
4        207 -4.482592  62.622096  -0.64   1.27  


In [164]:
newer_features = ['release_pos_x', 'release_pos_z', 'release_speed', 'release_spin_rate', 'SeamLat', 'SeamLon']
newer_target = ['pfx_x', 'pfx_z']

scaler = MinMaxScaler()
Duffey_Tyler_2019_FF[newer_features] = scaler.fit_transform(Duffey_Tyler_2019_FF[newer_features])

label_encoder = LabelEncoder()
Duffey_Tyler_2019_FF['pitch_type'] = label_encoder.fit_transform(Duffey_Tyler_2019_FF['pitch_type'])

In [165]:
predictions = model.predict(Duffey_Tyler_2019_FF[new_features])

Duffey_Tyler_2019_FF['predicted_pfx_x'] = predictions[:, 0]
Duffey_Tyler_2019_FF['predicted_pfx_z'] = predictions[:, 1]

print(Duffey_Tyler_2019_FF[['pfx_x', 'pfx_z', 'predicted_pfx_x', 'predicted_pfx_z']])

[1m 1/16[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 14ms/step

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 448us/step
     pfx_x  pfx_z  predicted_pfx_x  predicted_pfx_z
0    -0.61   1.18        -0.461634         1.381389
1    -0.53   1.00        -0.325304         1.385399
2    -0.33   1.18        -0.205078         1.316329
3    -0.49   1.37        -0.447188         1.481485
4    -0.64   1.27        -0.523192         1.463440
..     ...    ...              ...              ...
498  -0.47   1.23        -0.336223         1.294175
499  -0.27   1.26        -0.190672         1.127833
500  -0.67   1.33        -0.559978         1.477866
501  -0.29   1.27        -0.195267         1.337080
502  -0.65   1.15        -0.490081         1.493265

[503 rows x 4 columns]


In [166]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

mse_x = mean_squared_error(Duffey_Tyler_2019_FF['pfx_x'], Duffey_Tyler_2019_FF['predicted_pfx_x'])
mse_z = mean_squared_error(Duffey_Tyler_2019_FF['pfx_z'], Duffey_Tyler_2019_FF['predicted_pfx_z'])

mae_x = mean_absolute_error(Duffey_Tyler_2019_FF['pfx_x'], Duffey_Tyler_2019_FF['predicted_pfx_x'])
mae_z = mean_absolute_error(Duffey_Tyler_2019_FF['pfx_z'], Duffey_Tyler_2019_FF['predicted_pfx_z'])

print("MSE for pfx_x:", mse_x)
print("MSE for pfx_z:", mse_z)
print("MAE for pfx_x:", mae_x)
print("MAE for pfx_z:", mae_z)

MSE for pfx_x: 0.01619278953603682
MSE for pfx_z: 0.03131136755686647
MAE for pfx_x: 0.116747777851865
MAE for pfx_z: 0.14233711901526327
