In [1]:
# ----------------------------------------------------
# Financial Risk Profiling - Starter ML Pipeline
# ----------------------------------------------------
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline

In [2]:
# ----------------------------------------------------
# 1. Load data
# ----------------------------------------------------
train_df = pd.read_csv("train_updated.csv")
test_df  = pd.read_csv("test_updated.csv")
# print(train_df.describe())
# Target + Features
y = train_df["RiskFlag"]
X = train_df.drop(["RiskFlag", "ProfileID"], axis=1)

X_test_final = test_df.drop(["ProfileID"], axis=1)

X

Unnamed: 0,ApplicantYears,AnnualEarnings,RequestedSum,TrustMetric,WorkDuration,ActiveAccounts,OfferRate,RepayPeriod,DebtFactor,QualificationLevel,WorkCategory,RelationshipStatus,OwnsProperty,FamilyObligation,FundUseCase,JointApplicant
0,18,137576,209136,846,26,2,10.47,60,0.81,High School,Self-employed,Single,Yes,No,Business,No
1,47,57194,5970,748,30,2,19.72,36,0.73,High School,Unemployed,Divorced,No,Yes,Education,No
2,26,84328,95065,453,7,2,24.25,12,0.45,Master's,Self-employed,Married,No,No,Other,Yes
3,53,49795,229582,533,107,3,14.44,60,0.17,Bachelor's,Self-employed,Single,Yes,No,Auto,Yes
4,49,115450,22072,840,0,4,24.48,12,0.11,Bachelor's,Part-time,Single,No,Yes,Education,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
204272,40,116623,161673,651,79,2,23.44,12,0.87,Bachelor's,Part-time,Divorced,No,No,Home,Yes
204273,67,62958,189499,460,77,3,9.29,36,0.11,Bachelor's,Self-employed,Single,No,No,Business,Yes
204274,62,34372,59645,524,94,3,9.72,60,0.24,PhD,Full-time,Single,Yes,No,Auto,No
204275,44,146262,198454,489,7,4,4.31,48,0.30,High School,Self-employed,Married,Yes,No,Home,No


In [3]:

# ----------------------------------------------------
# 2. Separate numerical + categorical columns
# ----------------------------------------------------
numerical_cols = X.select_dtypes(include=['int64', 'float64']).columns
categorical_cols = X.select_dtypes(include=['object']).columns

# print(numerical_cols)
# print(categorical_cols)

In [4]:

# ----------------------------------------------------
# 3. Preprocessing pipeline
# ----------------------------------------------------
preprocess = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), numerical_cols),
        ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_cols)
    ]
)

In [5]:
# ----------------------------------------------------
# 4. Split training + validation
# ----------------------------------------------------
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [6]:
# ----------------------------------------------------
# 5. Train SVM Model
# ----------------------------------------------------

svm_clf = Pipeline(steps=[
    ("preprocess", preprocess),
    ("model", LinearSVC(C=1.0, max_iter=5000))
])

svm_clf.fit(X_train, y_train)
svm_pred = svm_clf.predict(X_val)

print("Linear SVM Validation Accuracy:", accuracy_score(y_val, svm_pred))


Linear SVM Validation Accuracy: 0.8837135304484042


In [7]:
# ----------------------------------------------------
# 6. Train Neural Network Model (MLP)
# ----------------------------------------------------
mlp_clf = Pipeline(steps=[
    ("preprocess", preprocess),
    ("model", MLPClassifier(hidden_layer_sizes=(32, 16),
                            activation="relu",
                            max_iter=300,
                            random_state=42))
])

mlp_clf.fit(X_train, y_train)
mlp_pred = mlp_clf.predict(X_val)

print("MLP Validation Accuracy:", accuracy_score(y_val, mlp_pred))


MLP Validation Accuracy: 0.8847170550225181


In [8]:
# -----------------------------
# Save SVM predictions
# -----------------------------
svm_preds = svm_clf.predict(X_test_final)

svm_submission = pd.DataFrame({
    "ProfileID": test_df["ProfileID"],
    "RiskFlag": svm_preds
})

svm_submission.to_csv("svm_submission.csv", index=False)
print("Saved svm_submission.csv")

# -----------------------------
# Save Neural Network predictions
# -----------------------------
nn_preds = mlp_clf.predict(X_test_final)

nn_submission = pd.DataFrame({
    "ProfileID": test_df["ProfileID"],
    "RiskFlag": nn_preds
})

nn_submission.to_csv("nn_submission.csv", index=False)
print("Saved nn_submission.csv")


Saved svm_submission.csv
Saved nn_submission.csv
