In [2]:
import pandas as pd
import re
import joblib
import sklearn

# Load raw test file
test = pd.read_csv("test.csv")

# Compute FamilySize and IsAlone
test['FamilySize'] = test['SibSp'] + test['Parch'] + 1
test['IsAlone'] = (test['FamilySize'] == 1)

# Map Sex to 0/1 (male->0, female->1)
test['Sex'] = test['Sex'].map({'male': 0, 'female': 1})

# Extract Title and bucket
def extract_title(name):
    m = re.search(r',\s*([^\.]+)\.', str(name))
    return m.group(1).strip() if m else 'Rare'

def map_title_to_buckets(title):
    if title in ['Mr']:
        return 'Mr'
    if title in ['Mrs', 'Mme', 'Lady', 'Countess']:
        return 'Mrs'
    if title in ['Miss', 'Mlle']:
        return 'Miss'
    return 'Rare'

test['Title'] = test['Name'].apply(extract_title).apply(map_title_to_buckets)

# Select ONLY the 7 features
feature_cols = ['Sex', 'Pclass', 'Age', 'Fare', 'Title', 'FamilySize', 'IsAlone']

# Load the retrained model
model = joblib.load('titanic_model_7features.pkl')

# Predict (the pipeline will handle imputation, scaling, and OHE)
proba = model.predict_proba(test[feature_cols])[:, 1]
pred = (proba >= 0.5).astype(int)

# Save detailed predictions with requested features
results = test[['PassengerId'] + feature_cols].copy()
results['Survived_Pred'] = pred
results['Survival_Prob'] = proba

results.to_csv('selected7_feature_predictions.csv', index=False)
print(results.head())


   PassengerId  Sex  Pclass   Age     Fare Title  FamilySize  IsAlone  \
0          892    0       3  34.5   7.8292    Mr           1     True   
1          893    1       3  47.0   7.0000   Mrs           2    False   
2          894    0       2  62.0   9.6875    Mr           1     True   
3          895    0       3  27.0   8.6625    Mr           1     True   
4          896    1       3  22.0  12.2875   Mrs           3    False   

   Survived_Pred  Survival_Prob  
0              0       0.104444  
1              0       0.480473  
2              0       0.105810  
3              0       0.108847  
4              1       0.585283  
