In [1]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score


In [2]:
train_path = "/content/drive/MyDrive/ML_Project/BinomialClassification/Dataset/train_updated.csv"
test_path  = "/content/drive/MyDrive/ML_Project/BinomialClassification/Dataset/test_updated.csv"

train_df = pd.read_csv(train_path)
test_df  = pd.read_csv(test_path)

TARGET = "RiskFlag"
ID_COL = "ProfileID"

numeric_features = [
    "ApplicantYears","AnnualEarnings","RequestedSum","TrustMetric","WorkDuration",
    "ActiveAccounts","OfferRate","RepayPeriod","DebtFactor"
]

categorical_features = [
    "QualificationLevel","WorkCategory","RelationshipStatus","FamilyObligation",
    "OwnsProperty","FundUseCase","JointApplicant"
]

preprocessor = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), numeric_features),
        ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_features),
    ]
)


In [3]:
# ============================================
# Helper function   (same as your fast version)
# ============================================
def run_and_save(model_name, model,
                 X_train, y_train,
                 X_val, y_val,
                 X_test_internal, y_test_internal):

    OUTPUT_DIR = model_name
    os.makedirs(OUTPUT_DIR, exist_ok=True)

    pipeline = Pipeline([
        ("preprocessor", preprocessor),
        ("clf", model)
    ])

    # Train
    print("Training:", model_name)
    pipeline.fit(X_train, y_train)

    # Validation
    val_pred = pipeline.predict(X_val)
    val_acc = accuracy_score(y_val, val_pred)

    with open(os.path.join(OUTPUT_DIR, "validation_report.txt"), "w") as f:
        f.write(classification_report(y_val, val_pred))
        f.write(f"\nValidation Accuracy: {val_acc}\n")

    # Internal Test
    test_pred = pipeline.predict(X_test_internal)
    test_acc = accuracy_score(y_test_internal, test_pred)

    with open(os.path.join(OUTPUT_DIR, "test_report.txt"), "w") as f:
        f.write(classification_report(y_test_internal, test_pred))
        f.write(f"\nTest Accuracy: {test_acc}\n")

    # Summary
    with open(os.path.join(OUTPUT_DIR, "accuracy_summary.txt"), "w") as f:
        f.write(f"Validation Accuracy: {val_acc}\n")
        f.write(f"Test Accuracy: {test_acc}\n")

    # Kaggle Submission
    X_final_test = test_df.drop(ID_COL, axis=1)
    final_pred = pipeline.predict(X_final_test)

    submission = pd.DataFrame({
        "ProfileID": test_df[ID_COL],
        "RiskFlag": final_pred.astype(int)
    })
    submission.to_csv(os.path.join(OUTPUT_DIR, "submission.csv"), index=False)

    print(model_name, "Completed!\n")

In [None]:
def run_all_svm_kernels():

    BASE_DIR = "/content/drive/MyDrive/ML_Project/BinomialClassification/SVM/SVM-NoTUning"
    os.makedirs(BASE_DIR, exist_ok=True)

    # ==========================================================
    # 80% SPLIT
    # ==========================================================
    train_80, test_10_internal = train_test_split(
        train_df, test_size=0.10, random_state=42, stratify=train_df[TARGET]
    )

    train_80_main, val_10 = train_test_split(
        train_80, test_size=0.1111, random_state=42, stratify=train_80[TARGET]
    )

    X_train_80 = train_80_main.drop([TARGET, ID_COL], axis=1)
    y_train_80 = train_80_main[TARGET]

    X_val_80 = val_10.drop([TARGET, ID_COL], axis=1)
    y_val_80 = val_10[TARGET]

    X_test_int_80 = test_10_internal.drop([TARGET, ID_COL], axis=1)
    y_test_int_80 = test_10_internal[TARGET]

    X_final_test = test_df.drop(ID_COL, axis=1)

    # ==========================================================
    # 20% SPLIT
    # ==========================================================
    train_20, _ = train_test_split(
        train_df, train_size=0.20, random_state=42, stratify=train_df[TARGET]
    )

    train_20_main, val_20 = train_test_split(
        train_20, test_size=0.1111, random_state=42, stratify=train_20[TARGET]
    )

    X_train_20 = train_20_main.drop([TARGET, ID_COL], axis=1)
    y_train_20 = train_20_main[TARGET]

    X_val_20 = val_20.drop([TARGET, ID_COL], axis=1)
    y_val_20 = val_20[TARGET]

    X_test_int_20 = test_10_internal.drop([TARGET, ID_COL], axis=1)
    y_test_int_20 = test_10_internal[TARGET]

    # ==========================================================
    # RUNNING ALL SVM KERNELS
    # ==========================================================

    kernels = [ "poly", "sigmoid"]

    modes = [
        ("80_skewed",    X_train_80, y_train_80, X_val_80, y_val_80, X_test_int_80, y_test_int_80, None),
        ("80_nonskewed", X_train_80, y_train_80, X_val_80, y_val_80, X_test_int_80, y_test_int_80, "balanced"),
        ("20_skewed",    X_train_20, y_train_20, X_val_20, y_val_20, X_test_int_20, y_test_int_20, None),
        ("20_nonskewed", X_train_20, y_train_20, X_val_20, y_val_20, X_test_int_20, y_test_int_20, "balanced")
    ]


    # ----- TRAIN ALL -----
    for kernel in kernels:
        for mode_name, Xtr, Ytr, Xv, Yv, Xt, Yt, cw in modes:

            model_name = os.path.join(BASE_DIR, f"{mode_name}_{kernel}")

            svm_model = SVC(
                kernel=kernel,
                C=1.0,
                gamma="scale",
                degree=3 if kernel == "poly" else 0,
                class_weight=cw,
                cache_size=500,       # improves speed
                probability=False
            )

            run_and_save(model_name, svm_model,
                         Xtr, Ytr, Xv, Yv, Xt, Yt)

    print("All Fast SVM Models Completed Successfully!")


# RUN EVERYTHING
run_all_svm_kernels()
print("All SVM Kernel Models Completed Successfully!")


Training: /content/drive/MyDrive/ML_Project/BinomialClassification/SVM/SVM-NoTUning/80_skewed_poly


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


/content/drive/MyDrive/ML_Project/BinomialClassification/SVM/SVM-NoTUning/80_skewed_poly Completed!

Training: /content/drive/MyDrive/ML_Project/BinomialClassification/SVM/SVM-NoTUning/80_nonskewed_poly
/content/drive/MyDrive/ML_Project/BinomialClassification/SVM/SVM-NoTUning/80_nonskewed_poly Completed!

Training: /content/drive/MyDrive/ML_Project/BinomialClassification/SVM/SVM-NoTUning/20_skewed_poly


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


/content/drive/MyDrive/ML_Project/BinomialClassification/SVM/SVM-NoTUning/20_skewed_poly Completed!

Training: /content/drive/MyDrive/ML_Project/BinomialClassification/SVM/SVM-NoTUning/20_nonskewed_poly
/content/drive/MyDrive/ML_Project/BinomialClassification/SVM/SVM-NoTUning/20_nonskewed_poly Completed!

Training: /content/drive/MyDrive/ML_Project/BinomialClassification/SVM/SVM-NoTUning/80_skewed_sigmoid
/content/drive/MyDrive/ML_Project/BinomialClassification/SVM/SVM-NoTUning/80_skewed_sigmoid Completed!

Training: /content/drive/MyDrive/ML_Project/BinomialClassification/SVM/SVM-NoTUning/80_nonskewed_sigmoid
