<h1>Importing Libraries</h1>

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

<h1>Loading data</h1>

In [3]:
# Load data
train = pd.read_parquet('../train.parquet')
test = pd.read_parquet('../test.parquet')
sample_sub = pd.read_csv('../sample_submission.csv')

<h1>Precomputations</h1>

In [4]:
# Get all IV columns from TEST data
iv_columns = [col for col in test.columns if col.startswith(('call_iv_', 'put_iv_'))]

# Create strike dictionary from TEST columns
strike_dict = {}
for col in iv_columns:
    strike = col.split('_')[-1]
    if strike not in strike_dict:
        strike_dict[strike] = {'call': None, 'put': None}
    
    if col.startswith('call_iv_'):
        strike_dict[strike]['call'] = col
    else:
        strike_dict[strike]['put'] = col

# Pre-calculate global means from training data
global_means = {}
for col in iv_columns:
    if col in train.columns:
        global_means[col] = train[col].mean()
overall_mean = np.mean(list(global_means.values())) if global_means else 0.2

<h1>Prediction function</h1>

In [5]:
def predict_iv(data):
    data = data.copy()
    
    # Phase 1: Strict put-call parity
    for strike, cols in strike_dict.items():
        call_col = cols['call']
        put_col = cols['put']
        
        if call_col in data.columns and put_col in data.columns:
            # Fill calls using puts where possible
            call_mask = data[call_col].isna() & data[put_col].notna()
            data.loc[call_mask, call_col] = data.loc[call_mask, put_col]
            
            # Fill puts using calls where possible
            put_mask = data[put_col].isna() & data[call_col].notna()
            data.loc[put_mask, put_col] = data.loc[put_mask, call_col]
    
    # Phase 2: Row-wise mean imputation
    for idx, row in data.iterrows():
        # Collect all available IV values in this row
        available_ivs = []
        for col in iv_columns:
            if col in row and not pd.isna(row[col]):
                available_ivs.append(row[col])
        
        if available_ivs:
            row_mean = np.mean(available_ivs)
        else:
            row_mean = overall_mean
            
        # Apply row mean to all missing IVs in this row
        for col in iv_columns:
            if col in data.columns and pd.isna(data.at[idx, col]):
                data.at[idx, col] = row_mean
    
    return data

<h1>Validation Score</h1>

In [6]:
# Create validation split
train_df, val_df = train_test_split(train, test_size=0.2, random_state=42)

# Apply to validation set
val_pred = predict_iv(val_df)

# Calculate MSE only on originally masked validation points
mse_vals = []
for col in iv_columns:
    if col in val_df.columns and col in val_pred.columns:
        # Focus only on points that were originally missing
        mask = val_df[col].isna() & val_pred[col].notna()
        if mask.any():
            se = (val_df.loc[mask, col] - val_pred.loc[mask, col]) ** 2
            mse_vals.append(se.mean())

validation_mse = np.mean(mse_vals) if mse_vals else 0
print(f"Validation MSE (masked points only): {validation_mse:.12f}")

Validation MSE (masked points only): 0.000000000000


<h1>Test Data Prediction</h1>

In [8]:
# Apply to test set
test_pred = predict_iv(test)

<h1>Submission</h1>

In [9]:
# Prepare submission
submission = test_pred[['timestamp'] + iv_columns].copy()
submission.columns = sample_sub.columns

# Verify no missing values
assert submission.isna().sum().sum() == 0, "Missing values detected"
submission.to_csv('submission.csv', index=False)

print("\nFinal Submission Preview:")
print(submission.head())
print(f"\nSubmission shape: {submission.shape}")
print(f"Validation MSE: {validation_mse:.12f}")


Final Submission Preview:
   timestamp  call_iv_24000  call_iv_24100  call_iv_24200  call_iv_24300  \
0          0       0.280939       0.280528       0.280528       0.280528   
1          1       0.270276       0.293367       0.258893       0.293367   
2          2       0.225231       0.251731       0.225231       0.225231   
3          3       0.241888       0.227331       0.220505       0.210733   
4          4       0.235328       0.208597       0.222983       0.214126   

   call_iv_24400  call_iv_24500  call_iv_24600  call_iv_24700  call_iv_24800  \
0       0.242149       0.280528       0.232439       0.280528       0.222997   
1       0.244875       0.293367       0.233548       0.228209       0.293367   
2       0.214869       0.204580       0.194604       0.188052       0.225231   
3       0.198602       0.186190       0.227331       0.166394       0.161561   
4       0.208597       0.199485       0.192603       0.208597       0.208597   

   ...  put_iv_24600  put_iv_24700 