# Final Submission Generator

This notebook loads the trained model, validates the test ultimate dataset columns against train (excluding `TARGET`), and writes the final submission CSV in the required format under `senior_ds_test/data/final_submission/`.

- Output name: `final_submission_firstname_lastname.csv` (edit the name cell)
- Model: `final_solution/artifacts/json/model_lgbm.pkl`
- Train schema: `final_solution/artifacts/csv/ultimate_dataset.csv`
- Test data: `final_solution/artifacts/csv/ultimate_dataset_test.csv`



In [8]:
# Paths and parameters
from pathlib import Path
import pandas as pd
import joblib

PROJECT_ROOT = Path('/home/miso/Documents/WINDOWS/monsoon')
FINAL_SOLUTION = PROJECT_ROOT / 'final_solution'
ART_JSON = FINAL_SOLUTION / 'artifacts' / 'json'
ART_CSV = FINAL_SOLUTION / 'artifacts' / 'csv'

SUBMIT_DIR = FINAL_SOLUTION / 'submission_v2' 
SUBMIT_DIR.mkdir(parents=True, exist_ok=True)

# Edit your name here
FIRST_NAME = 'Mitul'
LAST_NAME = 'Solanki'

MODEL_PATH = ART_JSON / 'model_lgbm.pkl'
SCHEMA_SAMPLE = ART_CSV / 'ultimate_dataset.csv'  # train schema
ULTI_TEST = ART_CSV / 'ultimate_dataset_test.csv'  # test features

print('Paths set.')


Paths set.


In [2]:
# Load model and schema
model = joblib.load(MODEL_PATH)
schema_df = pd.read_csv(SCHEMA_SAMPLE, nrows=100)
feature_cols = [c for c in schema_df.columns if c not in ['uid','NAME_CONTRACT_TYPE','TARGET']]
print('Loaded model. Feature count:', len(feature_cols))


Loaded model. Feature count: 52


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [3]:
# Load test and validate columns (excluding TARGET)
if not ULTI_TEST.exists():
    raise FileNotFoundError(f'Missing {ULTI_TEST}. Build it with submission_v2 scripts.')

test_df = pd.read_csv(ULTI_TEST)
expected_train = set(schema_df.columns) - {'TARGET'}
actual_test = set(test_df.columns)
missing_in_test = expected_train - actual_test
extra_in_test = actual_test - expected_train
if missing_in_test:
    raise ValueError(f"Columns missing in test (excluding TARGET): {sorted(list(missing_in_test))[:20]}")
if extra_in_test:
    print(f"Note: Extra columns in test ignored by model: {sorted(list(extra_in_test))[:10]}")

X_test = test_df[[c for c in feature_cols if c in test_df.columns]].fillna(0)
uid_col = 'uid' if 'uid' in test_df.columns else None
print('Test shape:', X_test.shape)


Test shape: (46127, 52)


In [9]:
# Predict and write submission
import numpy as np

proba = model.predict_proba(X_test)[:, 1] if hasattr(model, 'predict_proba') else model.decision_function(X_test)

# Build submission frame matching sample
SAMPLE_PATH = PROJECT_ROOT / 'senior_ds_test' / 'data' / 'final_submission' / 'sample_submission.csv'
sample = pd.read_csv(SAMPLE_PATH)
sub = sample.copy()

# Identify id and probability columns
id_col = next((c for c in ['uid','id','customer_id','case_id'] if c in sub.columns), None)
if id_col is None and uid_col is not None:
    sub.insert(0, 'uid', test_df['uid'])
    id_col = 'uid'

prob_col = next((c for c in ['TARGET','probability','target','score'] if c in sub.columns), None)
if prob_col is None:
    if len(sub.columns) >= 2:
        prob_col = sub.columns[1]
    else:
        sub['TARGET'] = 0.0
        prob_col = 'TARGET'

# Assign values
if id_col and uid_col:
    sub[id_col] = test_df[uid_col].values
sub[prob_col] = proba

# Write
out_path = SUBMIT_DIR / f"final_submission_{FIRST_NAME}_{LAST_NAME}.csv"
sub.to_csv(out_path, index=False)
print('Wrote submission to:', out_path)


Wrote submission to: /home/miso/Documents/WINDOWS/monsoon/final_solution/submission_v2/final_submission_Mitul_Solanki.csv


## Notes on predictions and submission format

- The model outputs probabilities of class 1 (default). We do not submit hard labels (0/1).
- The second column in the submission CSV mirrors the header of `sample_submission.csv` (e.g., `TARGET` or `probability`).
- The evaluator computes ROC-AUC from these probabilities on the hidden test labels.
- If you need hard labels, apply a threshold to the probabilities separately (not part of this submission).
