In [13]:
import pandas as pd
import numpy as np
from statsmodels.nonparametric.smoothers_lowess import lowess

# --------------------
# Load training data
# --------------------
df_train = pd.read_csv("cfb_nfl_age_curve_train.csv")
df_train = df_train.drop(columns=["player_display_name", "year"])

features = ['true_age']
weight_col = 'snaps_weight'

delta_cols = [col for col in df_train.columns if col.endswith('_delta') or col.endswith('_ln_delta')]
print(f"Found delta columns: {delta_cols}")

alpha_smooth = 10
age_window = 1.0

# --------------------
# Train LOESS per position
# --------------------
trained_curves = {}

for pos in df_train['position_group'].unique():
    print(f"\n=== LOESS curves for position: {pos} ===")
    trained_curves[pos] = {}

    df_pos = df_train[df_train['position_group'] == pos]

    for target_col in delta_cols:
        print(f"Fitting LOESS for {target_col}")

        valid_idx = df_pos[['true_age', target_col]].notnull().all(axis=1)
        if valid_idx.sum() == 0:
            print(f"No valid data for {target_col} in {pos}, skipping...")
            continue

        age_train = df_pos.loc[valid_idx, 'true_age']
        y_raw = df_pos.loc[valid_idx, target_col]
        weights_train = df_pos.loc[valid_idx, weight_col]

        # Local smoothing (your alpha_smooth blending)
        y_local_smooth = []
        for age, wt, y in zip(age_train, weights_train, y_raw):
            nearby = (age_train >= age - age_window) & (age_train <= age + age_window)
            local_weights = weights_train[nearby]
            local_values = y_raw[nearby]
            if local_weights.sum() > 0:
                local_mean = np.average(local_values, weights=local_weights)
            else:
                local_mean = y_raw.mean()
            smooth_y = (wt * y + alpha_smooth * local_mean) / (wt + alpha_smooth)
            y_local_smooth.append(smooth_y)

        # Fit LOESS — frac controls smoothness (0.2 = smoother, 0.6 = wavier)
        loess_fit = lowess(endog=np.array(y_local_smooth),
                           exog=age_train,
                           frac=0.25,  # adjust smoothness
                           it=0)

        # Store as (ages, fitted_values) for later interpolation
        trained_curves[pos][target_col] = {
            "ages": loess_fit[:, 0],
            "values": loess_fit[:, 1]
        }

# --------------------
# Load inference data
# --------------------
df_infer = pd.read_csv("cfb_nfl_age_curve_inference.csv")
all_predictions = df_infer[['true_age', 'position_group']].copy()

# --------------------
# Predict per position via interpolation
# --------------------
for pos, curves in trained_curves.items():
    df_pos_infer = df_infer[df_infer['position_group'] == pos]

    for target_col, curve in curves.items():
        pred_col = target_col.replace('_delta', '').replace('_ln_delta', '') + '_projected_change'

        preds = np.interp(
            df_pos_infer['true_age'],
            curve['ages'],
            curve['values']
        ) / 10  # same scaling factor

        all_predictions.loc[df_pos_infer.index, pred_col] = preds

# --------------------
# Save predictions
# --------------------
all_predictions.to_csv("cfb_nfl_age_curve_predictions_loess.csv", index=False)
print(f"\nSaved {len(all_predictions)} rows to cfb_nfl_age_curve_predictions_loess.csv")

prediction_cols = [c for c in all_predictions.columns if c.endswith('_projected_change')]
if prediction_cols:
    print("\nSample predictions:")
    print(all_predictions[['true_age', 'position_group'] + prediction_cols[:3]].head())


Found delta columns: ['snaps_per_non_snap_ln_delta', 'targets_per_non_target_ln_delta', 'receptions_per_target_ln_delta', 'receptions_per_non_reception_ln_delta', 'yards_per_reception_ln_delta', 'rec_touchdowns_per_non_rec_touchdown_ln_delta', 'rushs_per_non_rush_ln_delta', 'passes_per_non_pass_ln_delta', 'completions_per_non_completion_ln_delta', 'yards_per_rush_ln_delta', 'yards_per_completion_ln_delta', 'rush_touchdowns_per_non_rush_touchdown_ln_delta', 'pass_touchdowns_per_non_pass_touchdown_ln_delta', 'epa_per_snap_delta']

=== LOESS curves for position: RB ===
Fitting LOESS for snaps_per_non_snap_ln_delta
Fitting LOESS for targets_per_non_target_ln_delta
Fitting LOESS for receptions_per_target_ln_delta
Fitting LOESS for receptions_per_non_reception_ln_delta
Fitting LOESS for yards_per_reception_ln_delta
Fitting LOESS for rec_touchdowns_per_non_rec_touchdown_ln_delta
Fitting LOESS for rushs_per_non_rush_ln_delta
Fitting LOESS for passes_per_non_pass_ln_delta
Fitting LOESS for comp

In [16]:
import pandas as pd
import re
from datetime import date, timedelta

# Read the game_logs.csv file
df = pd.read_csv('cfb_nfl_age_curve_predictions_loess.csv')

# Get the column names and data types
column_names = df.columns.tolist()
column_types = df.dtypes.tolist()

# Map pandas data types to SQL data types
type_mapping = {
    'object': 'varchar(255)',
    'int64': 'int',
    'float64': 'double precision',
    'bool': 'boolean'
}

# Replace spaces with underscores in column names
column_names = [name.replace(' ', '_') for name in column_names]
column_names = [re.sub(r'^(\d)', r'_\1', name) for name in column_names]

# Generate the SQL code
table_name = 'cfb_nfl_age_curve_predictions'
create_table_sql = f"create table {table_name}\n("
for column_name, column_type in zip(column_names, column_types):
    sql_type = type_mapping.get(str(column_type), 'varchar(255)')
    create_table_sql += f"    {column_name} {sql_type},\n"
create_table_sql = create_table_sql.rstrip(",\n")
create_table_sql += "\n);"
copy_sql = f"copy {table_name}({', '.join(column_names)})\n    from '/Users/riley.gisseman/Downloads/cfb_nfl_dev_env/Data & Modeling/modeling/python/cfb_nfl_age_curve_predictions_loess.csv'\n    delimiter ','\n    csv header;"

# Print the SQL code
print(create_table_sql)
print(copy_sql)


create table cfb_nfl_age_curve_predictions
(    true_age double precision,
    position_group varchar(255),
    snaps_per_non_snap_ln_projected_change double precision,
    targets_per_non_target_ln_projected_change double precision,
    receptions_per_target_ln_projected_change double precision,
    receptions_per_non_reception_ln_projected_change double precision,
    yards_per_reception_ln_projected_change double precision,
    rec_touchdowns_per_non_rec_touchdown_ln_projected_change double precision,
    rushs_per_non_rush_ln_projected_change double precision,
    passes_per_non_pass_ln_projected_change double precision,
    completions_per_non_completion_ln_projected_change double precision,
    yards_per_rush_ln_projected_change double precision,
    yards_per_completion_ln_projected_change double precision,
    rush_touchdowns_per_non_rush_touchdown_ln_projected_change double precision,
    pass_touchdowns_per_non_pass_touchdown_ln_projected_change double precision,
    epa_per_