In [None]:
# PCA â†’ NEURAL NETWORK 
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.decomposition import PCA
from sklearn.neural_network import MLPClassifier

# Load data
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

train["retention_status"] = train["retention_status"].map({"Stayed":1, "Left":0}).astype(int)

TARGET="retention_status"
ID_COL="founder_id"

X = train.drop(columns=[TARGET, ID_COL, "founder_visibility"])
y = train[TARGET]
X_test = test.drop(columns=[ID_COL, "founder_visibility"], errors="ignore")

num_cols = X.select_dtypes(include=["int64","float64"]).columns
cat_cols = X.select_dtypes(include=["object","bool","category"]).columns

preprocessor = ColumnTransformer([
    ("num", Pipeline([
        ("imputer", SimpleImputer(strategy="median")),
        ("scale", StandardScaler())
    ]), num_cols),
    
    ("cat", Pipeline([
        ("imputer", SimpleImputer(strategy="most_frequent")),
        ("onehot", OneHotEncoder(handle_unknown="ignore"))
    ]), cat_cols)
])

# PCA + NN pipeline
model = Pipeline([
    ("prep", preprocessor),
    ("pca", PCA(n_components=40, random_state=42)),
    ("nn", MLPClassifier(
        hidden_layer_sizes=(64,32),
        activation="relu",
        solver="adam",
        learning_rate_init=0.001,
        max_iter=300,
        random_state=42
    ))
])

model.fit(X, y)
preds = model.predict(X_test)
labels = np.where(preds==1,"Stayed","Left")

submission = pd.DataFrame({
    "founder_id": test["founder_id"],
    "retention_status": labels
})

submission.to_csv("submission_pca_nn.csv", index=False)
print("Saved submission_pca_nn.csv")
