# Project 
## Kaggle Competition-Predicting Introvert from the Extrovert

In [1]:
# 📦 IMPORT LIBRARIES
import pandas as pd
import numpy as np

from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, StackingClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier

train=pd.read_csv(r"C:\Users\user\Desktop\Kaggle Competitions\playground-series-s5e7\train.csv")
test=pd.read_csv(r"C:\Users\user\Desktop\Kaggle Competitions\playground-series-s5e7\test.csv")
submission=pd.read_csv(r"C:\Users\user\Desktop\Kaggle Competitions\playground-series-s5e7\sample_submission.csv")

# 🎯 TARGET AND FEATURES
X = train.drop(columns=["id", "Personality"])
y = train["Personality"]
test_ids = test["id"]
X_test = test.drop(columns=["id"])

# 🧠 ENCODE CATEGORICALS IF ANY (here assuming all numerical)
le_y = LabelEncoder()
y_enc = le_y.fit_transform(y)

# 🔍 SCALING
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_test_scaled = scaler.transform(X_test)

# 🧱 BASE MODELS
xgb = XGBClassifier(n_estimators=500, max_depth=6, learning_rate=0.03, subsample=0.8,
                    colsample_bytree=0.9, random_state=42, use_label_encoder=False, eval_metric='mlogloss')
lgbm = LGBMClassifier(n_estimators=600, learning_rate=0.03, max_depth=7, subsample=0.9, colsample_bytree=0.9, random_state=42)
cat = CatBoostClassifier(iterations=400, depth=6, learning_rate=0.03, verbose=0, random_seed=42)
rf = RandomForestClassifier(n_estimators=400, max_depth=10, random_state=42)

# 🧠 STACKING MODEL
stack = StackingClassifier(
    estimators=[('xgb', xgb), ('lgbm', lgbm), ('cat', cat), ('rf', rf)],
    final_estimator=GradientBoostingClassifier(n_estimators=200, learning_rate=0.05, max_depth=5),
    cv=5,
    n_jobs=-1,
    passthrough=True
)

# 🎓 CROSS VALIDATION
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
for fold, (train_idx, val_idx) in enumerate(skf.split(X_scaled, y_enc)):
    X_train_fold, X_val_fold = X_scaled[train_idx], X_scaled[val_idx]
    y_train_fold, y_val_fold = y_enc[train_idx], y_enc[val_idx]
    
    stack.fit(X_train_fold, y_train_fold)
    val_preds = stack.predict(X_val_fold)
    acc = accuracy_score(y_val_fold, val_preds)
    print(f"Fold {fold + 1} Accuracy: {round(acc, 4)}")

# 🧪 FINAL PREDICTIONS
final_preds = stack.predict(X_test_scaled)
submission["Personality"] = le_y.inverse_transform(final_preds)
submission.to_csv("submission.csv", index=False)


ModuleNotFoundError: No module named 'lightgbm'