In [2]:
import os
import json
import yaml
import joblib
import warnings
import numpy as np
import pandas as pd

warnings.filterwarnings("ignore")

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score
from sklearn.utils import shuffle

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import save_model

# ============================================================
# PATHS
# ============================================================

BASE_DIR = r"C:\Users\NXTWAVE\Downloads\Predictive Model for Competitive Exam Success"
DATA_PATH = os.path.join(BASE_DIR, "RS_Session_256_AU_2981_A_to_F.csv")

MODEL_DIR = os.path.join(BASE_DIR, "models")
ARTIFACT_DIR = os.path.join(BASE_DIR, "artifacts")
OUTPUT_DIR = os.path.join(BASE_DIR, "outputs")
CONFIG_DIR = os.path.join(BASE_DIR, "configs")

for d in [MODEL_DIR, ARTIFACT_DIR, OUTPUT_DIR, CONFIG_DIR]:
    os.makedirs(d, exist_ok=True)

# ============================================================
# LOAD DATA
# ============================================================

df = pd.read_csv(DATA_PATH)
df = shuffle(df, random_state=42)

# ============================================================
# SELECT NUMERIC DATA ONLY
# ============================================================

df_numeric = df.select_dtypes(include=[np.number])

if df_numeric.shape[1] < 2:
    raise Exception("‚ùå Dataset must have features + target column")

X = df_numeric.iloc[:, :-1]
y = df_numeric.iloc[:, -1]

print("Target distribution:")
print(y.value_counts())

# ============================================================
# SCALING
# ============================================================

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

joblib.dump(scaler, os.path.join(ARTIFACT_DIR, "scaler.pkl"))

# ============================================================
# AIS FEATURE WEIGHTING (FIXED)
# ============================================================

def ais_feature_weighting(X, y, iterations=25):
    n_features = X.shape[1]
    antibodies = np.random.rand(iterations, n_features)

    best_weights = np.ones(n_features)
    best_score = -1

    for ab in antibodies:
        X_weighted = X * ab

        X_train, X_test, y_train, y_test = train_test_split(
            X_weighted, y, test_size=0.2, random_state=42
        )

        model = GradientBoostingClassifier()
        model.fit(X_train, y_train)

        preds = model.predict(X_test)
        score = accuracy_score(y_test, preds)

        if score > best_score:
            best_score = score
            best_weights = ab

    return best_weights

ais_weights = ais_feature_weighting(X_scaled, y)
X_ais = X_scaled * ais_weights

# ============================================================
# TRAIN FINAL MODEL
# ============================================================

X_train, X_test, y_train, y_test = train_test_split(
    X_ais, y, test_size=0.2, random_state=42
)

gb_model = GradientBoostingClassifier(
    n_estimators=150,
    learning_rate=0.05,
    max_depth=3
)

gb_model.fit(X_train, y_train)

# ============================================================
# EVALUATION
# ============================================================

y_pred = gb_model.predict(X_test)
y_prob = gb_model.predict_proba(X_test)[:, 1]

accuracy = accuracy_score(y_test, y_pred)

print(f"\n‚úÖ Model Accuracy: {accuracy:.4f}")

# ============================================================
# SAVE GRADIENT BOOSTING MODEL (.pkl)
# ============================================================

joblib.dump(gb_model, os.path.join(MODEL_DIR, "gb_ais_model.pkl"))

# ============================================================
# SAVE NEURAL NETWORK MODEL (.h5)
# ============================================================

nn_model = Sequential([
    Dense(32, activation="relu", input_shape=(X_train.shape[1],)),
    Dense(16, activation="relu"),
    Dense(1, activation="sigmoid")
])

nn_model.compile(
    optimizer="adam",
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

nn_model.fit(
    X_train,
    y_train,
    epochs=15,
    batch_size=16,
    verbose=0
)

save_model(nn_model, os.path.join(MODEL_DIR, "selection_model.h5"))

# ============================================================
# SAVE CONFIG (.yaml)
# ============================================================

config = {
    "project": "Predictive Model for Competitive Exam Success",
    "model": "Gradient Boosting + AIS",
    "features": list(X.columns),
    "accuracy": float(accuracy),
    "use_case": "CAT / GATE / UPSC Readiness Prediction"
}

with open(os.path.join(CONFIG_DIR, "model_config.yaml"), "w") as f:
    yaml.dump(config, f)

# ============================================================
# SAVE PREDICTIONS (.json)
# ============================================================

prediction_json = {
    "selection_probability": y_prob.tolist()
}

with open(os.path.join(OUTPUT_DIR, "selection_predictions.json"), "w") as f:
    json.dump(prediction_json, f, indent=4)

# ============================================================
# SAVE FINAL RESULTS CSV
# ============================================================

results_df = pd.DataFrame(X_test, columns=X.columns)
results_df["Actual"] = y_test.values
results_df["Predicted_Probability"] = y_prob

results_df.to_csv(
    os.path.join(OUTPUT_DIR, "final_results.csv"),
    index=False
)

print("\nüìÅ Files Generated Successfully:")
print("‚úî gb_ais_model.pkl")
print("‚úî selection_model.h5")
print("‚úî scaler.pkl")
print("‚úî model_config.yaml")
print("‚úî selection_predictions.json")
print("‚úî final_results.csv")


Target distribution:
2017
1138890    1
611539     1
67523      1
Name: count, dtype: int64

‚úÖ Model Accuracy: 0.0000





üìÅ Files Generated Successfully:
‚úî gb_ais_model.pkl
‚úî selection_model.h5
‚úî scaler.pkl
‚úî model_config.yaml
‚úî selection_predictions.json
‚úî final_results.csv
