In [3]:
# === predict_ecohabit.py (Jupyter-safe with interactive fallback) ===
# Loads artifacts from: C:\Users\sagni\Downloads\Eco Habit
# Predict on: --text "..."  OR  --in <csv>
# If no args, prompts interactively.

import os, sys, argparse
import numpy as np
import pandas as pd
import joblib
from sklearn.base import BaseEstimator, TransformerMixin
from tensorflow.keras.models import load_model

# -----------------------------
# Paths
# -----------------------------
OUT_DIR   = r"C:\Users\sagni\Downloads\Eco Habit"
PKL_PATH  = os.path.join(OUT_DIR, "mindpal_preprocess.pkl")
H5_PATH   = os.path.join(OUT_DIR, "mindpal_model.h5")
DEFAULT_OUT = os.path.join(OUT_DIR, "predictions.csv")

# -----------------------------
# Helper classes (for joblib unpickle)
# -----------------------------
class ColumnSelector(BaseEstimator, TransformerMixin):
    def __init__(self, column): self.column = column
    def fit(self, X, y=None): return self
    def transform(self, X): return X[[self.column]]

class To1DString(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None): return self
    def transform(self, X):
        if isinstance(X, pd.DataFrame):
            return X.iloc[:, 0].astype(str).values
        return np.asarray(X).astype(str).ravel()

class DateTimeExpand(BaseEstimator, TransformerMixin):
    def __init__(self, columns): self.columns = columns
    def fit(self, X, y=None): return self
    def transform(self, X):
        outs = []
        for c in self.columns:
            s = pd.to_datetime(X[c], errors="coerce")
            outs.append(pd.DataFrame({
                f"{c}_year":  s.dt.year.fillna(0).astype(int),
                f"{c}_month": s.dt.month.fillna(0).astype(int),
                f"{c}_day":   s.dt.day.fillna(0).astype(int),
                f"{c}_dow":   s.dt.dayofweek.fillna(0).astype(int),
            }))
        return pd.concat(outs, axis=1) if outs else np.empty((len(X), 0))

# -----------------------------
# Utils
# -----------------------------
def ensure_dense_if_small(X, max_feats=50000):
    if hasattr(X, "toarray") and X.shape[1] <= max_feats:
        return X.toarray()
    return X

def compile_loaded_model(model, n_classes: int):
    # Compiling silences the absl warning; not required for predict(), but helpful.
    if n_classes <= 2:
        model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
    else:
        model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

def build_df_for_text(text: str, bundle: dict) -> pd.DataFrame:
    """
    Build a 1-row DataFrame matching training schema:
    - puts `text` into the first text column seen during training
    - fills numeric with 0, categorical with 'unknown', datetime with today
    """
    cols_num  = bundle.get("numeric_cols", [])
    cols_cat  = bundle.get("cat_cols", [])
    cols_txt  = bundle.get("text_cols", [])
    cols_dt   = bundle.get("datetime_cols", [])
    data = {}
    for c in cols_num: data[c] = [0]
    for c in cols_cat: data[c] = ["unknown"]
    for c in cols_dt:  data[c] = [pd.Timestamp.today()]
    if cols_txt:
        for i, c in enumerate(cols_txt):
            data[c] = [text] if i == 0 else [""]
    else:
        # Fallback if no text columns were detected during training
        data["text"] = [text]
    return pd.DataFrame(data)

def predict_dataframe(df_in: pd.DataFrame, bundle: dict, model, out_csv=None, print_out=False) -> pd.DataFrame:
    preprocess    = bundle["preprocess"]
    label_encoder = bundle["label_encoder"]
    X_proc = preprocess.transform(df_in)
    X_proc = ensure_dense_if_small(X_proc)

    probs = model.predict(X_proc, verbose=0)
    classes = [str(c) for c in label_encoder.classes_]
    n_classes = len(classes)

    if probs.ndim == 1 or probs.shape[1] == 1:
        # Binary: make 2-col prob matrix [neg, pos]
        pos = probs.ravel()
        neg = 1.0 - pos
        prob_mat = np.vstack([neg, pos]).T
        pred_idx = (pos >= 0.5).astype(int)
    else:
        prob_mat = probs
        pred_idx = np.argmax(prob_mat, axis=1)

    pred_labels = label_encoder.inverse_transform(pred_idx)
    out = df_in.copy()
    out.insert(0, "pred_label", pred_labels)
    # attach probabilities
    if n_classes <= 2 and prob_mat.shape[1] == 2:
        # keep only positive class prob if you prefer: out["prob"] = prob_mat[:, 1]
        # or add both:
        out["prob_neg"] = prob_mat[:, 0]
        out["prob_pos"] = prob_mat[:, 1]
    else:
        for j, cls in enumerate(classes):
            out[f"prob_{cls}"] = prob_mat[:, j]

    if out_csv:
        out.to_csv(out_csv, index=False, encoding="utf-8")
        print(f"[SAVE] Predictions -> {out_csv}")

    if print_out:
        cols = ["pred_label"] + [c for c in out.columns if c.startswith("prob_")]
        print(out[cols].head(20).to_string(index=False))

    return out

# -----------------------------
# CLI
# -----------------------------
def parse_args(argv=None):
    p = argparse.ArgumentParser(description="EcoHabit predictor")
    p.add_argument("--in",   dest="in_csv",   type=str, default=None, help="Input CSV (same columns as training)")
    p.add_argument("--out",  dest="out_csv",  type=str, default=None, help="Output predictions CSV path")
    p.add_argument("--text", dest="single_text", type=str, default=None, help="Single text to predict")
    p.add_argument("--print", dest="do_print", action="store_true", help="Print predictions to console")
    return p.parse_args(argv)

def main(argv=None):
    # If running inside Jupyter, strip its injected args.
    if argv is None and ("ipykernel_launcher" in sys.argv[0] or any(a == "-f" for a in sys.argv)):
        argv = []

    args = parse_args(argv)

    # Interactive fallback if no args given
    if args.single_text is None and args.in_csv is None:
        try:
            s = input("Enter a sentence to predict (or leave blank to use a CSV): ").strip()
        except EOFError:
            s = ""
        if s:
            args.single_text = s
        else:
            csv_path = input("Enter path to input CSV (or press Enter to cancel): ").strip()
            if not csv_path:
                raise ValueError("Provide either --text 'some sentence' or --in path\\to\\file.csv")
            args.in_csv = csv_path

    # Load artifacts
    if not os.path.exists(PKL_PATH):
        raise FileNotFoundError(f"Missing preprocess bundle: {PKL_PATH}")
    if not os.path.exists(H5_PATH):
        raise FileNotFoundError(f"Missing model file: {H5_PATH}")

    bundle = joblib.load(PKL_PATH)
    model  = load_model(H5_PATH)
    compile_loaded_model(model, len(bundle["label_encoder"].classes_))

    # Build input
    if args.single_text is not None:
        df_in = build_df_for_text(args.single_text, bundle)
    else:
        if not os.path.exists(args.in_csv):
            raise FileNotFoundError(f"Input CSV not found: {args.in_csv}")
        df_in = pd.read_csv(args.in_csv)

    out_csv = args.out_csv or DEFAULT_OUT
    predict_dataframe(df_in, bundle, model, out_csv=out_csv, print_out=args.do_print)

# -----------------------------
# Optional: importable helpers for notebooks
# -----------------------------
def predict_text(text: str, save_to: str = None, show=True):
    """Notebook helper: predict directly from a text string."""
    bundle = joblib.load(PKL_PATH)
    model  = load_model(H5_PATH)
    compile_loaded_model(model, len(bundle["label_encoder"].classes_))
    df_in = build_df_for_text(text, bundle)
    return predict_dataframe(df_in, bundle, model, out_csv=save_to, print_out=show)

def predict_csv(csv_path: str, save_to: str = None, show=True):
    """Notebook helper: predict directly from a CSV file."""
    bundle = joblib.load(PKL_PATH)
    model  = load_model(H5_PATH)
    compile_loaded_model(model, len(bundle["label_encoder"].classes_))
    df_in = pd.read_csv(csv_path)
    return predict_dataframe(df_in, bundle, model, out_csv=save_to, print_out=show)

# -----------------------------
# Entrypoint
# -----------------------------
if __name__ == "__main__":
    main()


Enter a sentence to predict (or leave blank to use a CSV):  
Enter path to input CSV (or press Enter to cancel):  C:\Users\sagni\Downloads\archive\sustainable_fashion_trends_2024.csv




[SAVE] Predictions -> C:\Users\sagni\Downloads\Eco Habit\predictions.csv
