In [21]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import math

In [22]:
data = pd.read_csv('Kershaw_FF_2020.csv')

In [None]:
def calculate_spin_components(row):
    # Convert spin rate to radians per second
    data['spin_rate_rps'] = data['release_spin_rate'] * 2 * math.pi / 60

    # Convert spin axis to radians
    data['spin_axis_radians'] = data['spin_axis'] * math.pi / 180

    # Calculate theoretical SpinX and SpinY in radians per second
    data['SpinX_rps'] = data['spin_rate_rps'] * data['spin_axis_radians'].apply(math.cos)
    data['SpinY_rps'] = data['spin_rate_rps'] * data['spin_axis_radians'].apply(math.sin)

    # Calculate SpinZ in radians per second with a check to avoid negative values
    def calculate_spinz(row):
        spin_squared = row['spin_rate_rps']**2
        spin_x_squared = row['SpinX_rps']**2
        spin_y_squared = row['SpinY_rps']**2
        spin_z_squared = spin_squared - spin_x_squared - spin_y_squared
        if spin_z_squared < 0:
            return -math.sqrt(-spin_z_squared) if row['spin_axis'] > 90 else math.sqrt(-spin_z_squared)
        return math.sqrt(spin_z_squared)

    data['SpinZ_rps'] = data.apply(calculate_spinz, axis=1)

    # Convert spin components back to RPM
    data['SpinX_rpm'] = data['SpinX_rps'] * 60 / (2 * math.pi)
    data['SpinY_rpm'] = data['SpinY_rps'] * 60 / (2 * math.pi)
    data['SpinZ_rpm'] = data['SpinZ_rps'] * 60 / (2 * math.pi)

In [27]:
# Features and target variables
X = data[['release_speed', 'pfx_x', 'pfx_z']]
y = data[['SpinX_rpm', 'SpinY_rpm', 'SpinZ_rpm']]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate model
mse = mean_squared_error(y_test, y_pred, multioutput='raw_values')
print(f'Mean Squared Error for SpinX: {mse[0]}')
print(f'Mean Squared Error for SpinY: {mse[1]}')
print(f'Mean Squared Error for SpinZ: {mse[2]}')

# Compare actual vs predicted values
comparison = pd.DataFrame({
    'Actual_SpinX': y_test['SpinX_rpm'], 'Predicted_SpinX': y_pred[:, 0],
    'Actual_SpinY': y_test['SpinY_rpm'], 'Predicted_SpinY': y_pred[:, 1],
    'Actual_SpinZ': y_test['SpinZ_rpm'], 'Predicted_SpinZ': y_pred[:, 2]
})
print(comparison)

ValueError: With n_samples=0, test_size=0.2 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.