In [2]:
# === predict_ecohabit.py ===
# Jupyter/Terminal safe predictor for EcoHabit model
# Uses artifacts from: C:\Users\sagni\Downloads\Eco Habit

import os
import sys
import argparse
import numpy as np
import pandas as pd
import joblib
from sklearn.base import BaseEstimator, TransformerMixin
from tensorflow.keras.models import load_model

# -----------------------------
# Paths
# -----------------------------
OUT_DIR = r"C:\Users\sagni\Downloads\Eco Habit"
PKL_PATH = os.path.join(OUT_DIR, "mindpal_preprocess.pkl")
H5_PATH  = os.path.join(OUT_DIR, "mindpal_model.h5")
DEFAULT_OUT = os.path.join(OUT_DIR, "predictions.csv")

# -----------------------------
# Helper classes (for joblib)
# -----------------------------
class ColumnSelector(BaseEstimator, TransformerMixin):
    def __init__(self, column): self.column = column
    def fit(self, X, y=None): return self
    def transform(self, X): return X[[self.column]]

class To1DString(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None): return self
    def transform(self, X):
        if isinstance(X, pd.DataFrame):
            return X.iloc[:, 0].astype(str).values
        return np.asarray(X).astype(str).ravel()

class DateTimeExpand(BaseEstimator, TransformerMixin):
    def __init__(self, columns): self.columns = columns
    def fit(self, X, y=None): return self
    def transform(self, X):
        outs = []
        for c in self.columns:
            s = pd.to_datetime(X[c], errors="coerce")
            outs.append(pd.DataFrame({
                f"{c}_year":  s.dt.year.fillna(0).astype(int),
                f"{c}_month": s.dt.month.fillna(0).astype(int),
                f"{c}_day":   s.dt.day.fillna(0).astype(int),
                f"{c}_dow":   s.dt.dayofweek.fillna(0).astype(int),
            }))
        return pd.concat(outs, axis=1) if outs else np.empty((len(X), 0))

# -----------------------------
# Utility
# -----------------------------
def ensure_dense_if_small(X, max_feats=50000):
    if hasattr(X, "toarray") and X.shape[1] <= max_feats:
        return X.toarray()
    return X

def compile_loaded_model(model, n_classes: int):
    if n_classes <= 2:
        model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
    else:
        model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

def build_df_for_text(text: str, bundle: dict):
    """Build a 1-row DataFrame for single-text prediction"""
    df_dict = {}
    # Fill numeric with 0, categorical with "unknown", datetime with today, text with input
    for c in bundle["numeric_cols"]: df_dict[c] = [0]
    for c in bundle["cat_cols"]: df_dict[c] = ["unknown"]
    for c in bundle["datetime_cols"]: df_dict[c] = [pd.Timestamp.today()]
    for c in bundle["text_cols"]: df_dict[c] = [text]
    return pd.DataFrame(df_dict)

# -----------------------------
# Predict function
# -----------------------------
def predict_dataframe(df_in, bundle, model, out_csv=None, print_out=False):
    preprocess = bundle["preprocess"]
    X_proc = preprocess.transform(df_in)
    X_proc = ensure_dense_if_small(X_proc)

    probs = model.predict(X_proc, verbose=0)
    n_classes = len(bundle["label_encoder"].classes_)
    if n_classes <= 2:
        y_pred = (probs.ravel() >= 0.5).astype(int)
        preds = bundle["label_encoder"].inverse_transform(y_pred)
        df_out = df_in.copy()
        df_out["pred_label"] = preds
        df_out["prob"] = probs.ravel()
    else:
        y_pred = np.argmax(probs, axis=1)
        preds = bundle["label_encoder"].inverse_transform(y_pred)
        df_out = df_in.copy()
        df_out["pred_label"] = preds
        for i, cls in enumerate(bundle["label_encoder"].classes_):
            df_out[f"prob_{cls}"] = probs[:, i]

    if out_csv:
        df_out.to_csv(out_csv, index=False, encoding="utf-8")
    if print_out:
        print(df_out[["pred_label"] + [c for c in df_out.columns if c.startswith("prob")]].head())
    return df_out

# -----------------------------
# Main
# -----------------------------
def main():
    parser = argparse.ArgumentParser(description="EcoHabit Prediction Script")
    parser.add_argument("--in", dest="in_csv", help="Path to input CSV (same columns as training)")
    parser.add_argument("--out", dest="out_csv", help="Where to save predictions CSV")
    parser.add_argument("--text", dest="single_text", help="Single text string for quick prediction")
    parser.add_argument("--print", action="store_true", help="Print predictions to console")
    args = parser.parse_args()

    # Jupyter/IPython arg fix
    if "ipykernel_launcher" in sys.argv[0]:
        sys.argv = [sys.argv[0]]

    # Load bundle + model
    bundle = joblib.load(PKL_PATH)
    model = load_model(H5_PATH)
    n_classes = len(bundle["label_encoder"].classes_)
    compile_loaded_model(model, n_classes)

    if args.single_text:
        df_in = build_df_for_text(args.single_text, bundle)
    elif args.in_csv:
        df_in = pd.read_csv(args.in_csv)
    else:
        raise ValueError("Provide either --text 'some sentence' or --in path\\to\\file.csv")

    out_csv = args.out_csv or DEFAULT_OUT
    predict_dataframe(df_in, bundle, model, out_csv, print_out=args.print)

if __name__ == "__main__":
    main()


usage: ipykernel_launcher.py [-h] [--in IN_CSV] [--out OUT_CSV] [--text SINGLE_TEXT] [--print]
ipykernel_launcher.py: error: unrecognized arguments: -f C:\Users\sagni\AppData\Roaming\jupyter\runtime\kernel-c8cc4447-445e-4194-a3ca-7019d0ade62a.json


SystemExit: 2