In [2]:
import pandas as pd

file_path = r"C:\Users\lukeh\Downloads\total_inj_df_1.csv"
data = pd.read_csv(file_path)

In [4]:
data['injured?'] = data['Injury / Surgery'].apply(
    lambda x: 0 if pd.isna(x) or x.strip().lower() == 'na' else 1
)



In [6]:
elbow_injuries = [
    "Tommy John surgery",
    "Elbow surgery (UCL)",
    "Elbow surgery",
    "Elbow surgery (internal brace)",
    "Elbow surgery (UCL revision)",
    "Arthroscopic elbow surgery",
    "Elbow surgery (UCL/flexor tendon repair)",
    "Elbow surgery (ulnar neuritis)"
]

data['elbow?'] = data['Injury / Surgery'].apply(
    lambda x: 1 if str(x).strip() in elbow_injuries else 0
)



In [8]:
elbow_count = data['elbow?'].sum()

print(f"Number of elbow-related injuries: {elbow_count}")


Number of elbow-related injuries: 134


In [10]:
data_cleaned = data.dropna(subset=['p_game'])

print(data_cleaned.head())

rows_removed = len(data) - len(data_cleaned)
print(f"Number of rows removed: {rows_removed}")


               Name Team Pos Injury / Surgery Date       Injury / Surgery  \
0     Chase Silseth  LAA  SP   2024-04-07 00:00:00     Elbow inflammation   
1      José Cisnero  LAA  RP   2024-04-27 00:00:00  Shoulder inflammation   
3       Adam Cimber  LAA  RP   2024-06-14 00:00:00  Shoulder inflammation   
4  Patrick Sandoval  LAA  SP   2024-06-26 00:00:00    Elbow surgery (UCL)   
6     Carson Fulmer  LAA  SP   2024-08-24 00:00:00     Elbow inflammation   

      Status IL Retro Date Eligible to Return Return Date Latest Update  ...  \
0  Activated      04/08/24           06/07/24    06/25/24     Activated  ...   
1  Activated      04/28/24           06/27/24    08/17/24     Activated  ...   
3  Activated      06/15/24           06/30/24    07/22/24     Activated  ...   
4  Activated      06/22/24           08/21/24    09/30/24     Activated  ...   
6  Activated      08/27/24           09/11/24    09/30/24     Activated  ...   

  n_offspeed_formatted  offspeed_avg_speed  offspeed_avg

In [12]:
elbow_count = data_cleaned['elbow?'].sum()

print(f"Number of elbow-related injuries: {elbow_count}")


Number of elbow-related injuries: 73


# Explanation of Below Code - Injury Risk Calculator Code

### Data Preparatioa.
- **Feature Selection**: Extracts relevant features (e.g., fastball speed, breaking speed, arm angle).
- **Handle Missing Values**: Replaces missing values with column means.
- **Train-Test Split**: Splits data into training (70%) and test (30%) sets.

### Model Training
- **Scaling Features**: Standardizes the data using `StandardScaler` to improve model performance.
- **Logistic Regression**: Trains a logistic regression model (`LogisticRegression`) to predict elbow injury risk.
- **Save Model**: Saves the trained model and scaler to `.pkl` files for future use.

### Risk Calculation
- **Reload Model**: Loads the saved model and scaler to ensure the app uses a trained model.
- **Prediction**: Predicts the probability of injury based on user inputs scaled by the saved scaler.

### Dash App Layout
- **Input Fields**: Includes numeric inputs for variables like:
  - Fastball spin rate
  - Offspeed break
  - Arm angle, etc.
  - Input ranges are displayed for user guidance.
- **Calculate Button**: Triggers the injury risk calculation.

### Callback Function
- **User Inputs**: Takes user-provided values for the features.
- **Feature Scaling**: Scales the input values using the trained scaler.
- **Injury Risk Prediction**: Calculates injury risk using the logistic regression model.
- **Output**: Displays the predicted injury risk as a percentage.

### Deployment
- **Run Server**: Launches the app locally with `app.run_server(debug=True)` for testing.
- **Interactive Visualizations**: Provides a user-friendly interface to explore injury risks dynamically.


In [20]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
import pickle

file_path = r"C:\Users\lukeh\Downloads\refined_data.csv"
data = pd.read_csv(file_path)

features = [
    'player_age', 'p_game', 'fastball_avg_speed', 'breaking_avg_speed',
    'pitch_count', 'arm_angle', 'offspeed_avg_speed'
]
X = data[features]
y = data['elbow?']  

X = X.fillna(X.mean())

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

log_reg = LogisticRegression(max_iter=500)
log_reg.fit(X_train_scaled, y_train)

with open("logistic_model.pkl", "wb") as f:
    pickle.dump(log_reg, f)

with open("scaler.pkl", "wb") as f:
    pickle.dump(scaler, f)

print("Model and scaler saved successfully.")


Model and scaler saved successfully.


In [22]:
with open("logistic_model.pkl", "rb") as f:
    model = pickle.load(f)

with open("scaler.pkl", "rb") as f:
    scaler = pickle.load(f)


In [34]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
import pickle

file_path = r"C:\Users\lukeh\Downloads\refined_data.csv"
data = pd.read_csv(file_path)

features = [
    'p_formatted_ip', 'fastball_avg_spin', 'offspeed_avg_break', 'fastball_avg_break_z_induced',
    'fastball_avg_speed', 'breaking_avg_speed', 'arm_angle', 'offspeed_avg_speed'
]
X = data[features]
y = data['elbow?']  

X = X.fillna(X.mean())

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

log_reg = LogisticRegression(max_iter=500)
log_reg.fit(X_train_scaled, y_train)

with open("logistic_model.pkl", "wb") as f:
    pickle.dump(log_reg, f)

with open("scaler.pkl", "wb") as f:
    pickle.dump(scaler, f)

print("Model and scaler saved successfully.")

# Reload for verification
with open("logistic_model.pkl", "rb") as f:
    model = pickle.load(f)

with open("scaler.pkl", "rb") as f:
    scaler = pickle.load(f)

dummy_input = np.array([[100, 2200, 12, 15, 95, 85, 50, 80]])
dummy_input_scaled = scaler.transform(dummy_input)
risk = model.predict_proba(dummy_input_scaled)[0][1] * 100

print(f"Test Prediction: Injury Risk = {risk:.2f}%")


Model and scaler saved successfully.
Test Prediction: Injury Risk = 3.88%



X does not have valid feature names, but StandardScaler was fitted with feature names



In [36]:
import dash
from dash import dcc, html, Input, Output
import dash_bootstrap_components as dbc
import numpy as np
import pickle

with open("logistic_model.pkl", "rb") as f:
    model = pickle.load(f)
with open("scaler.pkl", "rb") as f:
    scaler = pickle.load(f)

app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

input_ranges = {
    "p_formatted_ip": "0 - 300",
    "fastball_avg_spin": "1500 - 3000 rpm",
    "offspeed_avg_break": "0 - 20 inches",
    "fastball_avg_break_z_induced": "0 - 30 inches",
    "fastball_avg_speed": "80 - 105 mph",
    "breaking_avg_speed": "70 - 95 mph",
    "arm_angle": "30 - 90 degrees",
    "offspeed_avg_speed": "60 - 85 mph",
}

app.layout = dbc.Container([
    dbc.Row([
        dbc.Col(html.H1("Injury Risk Calculator", className="text-center text-primary mb-4"), width=12)
    ]),
    dbc.Row([
        dbc.Col([
            dbc.Label(f"Formatted Innings Pitched (Range: {input_ranges['p_formatted_ip']})"),
            dbc.Input(id="p_formatted_ip", type="number", placeholder="Enter formatted innings pitched", value=100),
            dbc.Label(f"Fastball Avg Spin Rate (Range: {input_ranges['fastball_avg_spin']})"),
            dbc.Input(id="fastball_avg_spin", type="number", placeholder="Enter fastball spin rate (rpm)", value=2200),
            dbc.Label(f"Offspeed Avg Break (Range: {input_ranges['offspeed_avg_break']})"),
            dbc.Input(id="offspeed_avg_break", type="number", placeholder="Enter offspeed break (inches)", value=12),
            dbc.Label(f"Fastball Avg Break Z-Induced (Range: {input_ranges['fastball_avg_break_z_induced']})"),
            dbc.Input(id="fastball_avg_break_z_induced", type="number", placeholder="Enter z-induced fastball break", value=15),
        ], width=6),
        dbc.Col([
            dbc.Label(f"Fastball Avg Speed (Range: {input_ranges['fastball_avg_speed']})"),
            dbc.Input(id="fastball_avg_speed", type="number", placeholder="Enter fastball speed (mph)", value=95),
            dbc.Label(f"Breaking Avg Speed (Range: {input_ranges['breaking_avg_speed']})"),
            dbc.Input(id="breaking_avg_speed", type="number", placeholder="Enter breaking speed (mph)", value=85),
            dbc.Label(f"Arm Angle (Range: {input_ranges['arm_angle']})"),
            dbc.Input(id="arm_angle", type="number", placeholder="Enter arm angle (degrees)", value=50),
            dbc.Label(f"Offspeed Avg Speed (Range: {input_ranges['offspeed_avg_speed']})"),
            dbc.Input(id="offspeed_avg_speed", type="number", placeholder="Enter offspeed speed (mph)", value=80),
            html.Br(),
            dbc.Button("Calculate Risk", id="calculate-btn", color="primary", className="mt-2"),
        ], width=6),
    ]),
    dbc.Row([
        dbc.Col(html.H3(id="risk-output", className="text-center mt-4"))
    ])
])

@app.callback(
    Output("risk-output", "children"),
    Input("calculate-btn", "n_clicks"),
    [
        Input("p_formatted_ip", "value"),
        Input("fastball_avg_spin", "value"),
        Input("offspeed_avg_break", "value"),
        Input("fastball_avg_break_z_induced", "value"),
        Input("fastball_avg_speed", "value"),
        Input("breaking_avg_speed", "value"),
        Input("arm_angle", "value"),
        Input("offspeed_avg_speed", "value"),
    ]
)
def calculate_risk(n_clicks, p_ip, spin, offspeed_break, break_z, fastball_speed, breaking_speed, arm_angle, offspeed_speed):
    if n_clicks is None:
        return "Enter values and click Calculate Risk."
    
    features = np.array([[p_ip, spin, offspeed_break, break_z, fastball_speed, breaking_speed, arm_angle, offspeed_speed]])
    
    try:
        features_scaled = scaler.transform(features)
        risk = model.predict_proba(features_scaled)[0][1] * 100
        return f"Injury Risk: {risk:.2f}%"
    except Exception as e:
        return f"Error: {str(e)}"

if __name__ == "__main__":
    app.run_server(debug=True)
