In [1]:
from __future__ import annotations
import argparse
from pathlib import Path
from typing import Tuple, Iterable, Optional
import numpy as np
import pandas as pd

# Optional dependencies are imported lazily

def _load_model(model_path: Optional[Path]):
    import joblib
    if model_path is not None and model_path.exists():
        return joblib.load(model_path)
    # Fallback: look in current directory
    here = Path(__file__).resolve().parent
    mp = here / 'model_tfidf_lr.joblib'
    if mp.exists():
        return joblib.load(mp)
    return None


def _rule_score(text: str) -> float:
    from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
    vs = SentimentIntensityAnalyzer()
    return float(vs.polarity_scores(text).get('compound', 0.0))


def _score_to_label(score: float, pos: float = 0.2, neg: float = -0.2) -> str:
    return 'positive' if score >= pos else ('negative' if score <= neg else 'neutral')


def predict_one(text: str, model=None) -> Tuple[str, float]:
    """Predict for a single review. Returns (label, confidence/score).
    If `model` is None, uses rule-based fallback.
    """
    text = (text or '').strip()
    if not text:
        return '', 0.0
    if model is None:
        comp = _rule_score(text)
        return _score_to_label(comp), float(abs(comp))
    # Model path: pipeline with vectorizer -> classifier
    lab = model.predict([text])[0]
    try:
        proba = model.predict_proba([text])
        conf = float(np.max(proba))
    except Exception:
        conf = 0.0
    return str(lab), conf


def predict_many(texts: Iterable[str], model=None) -> pd.DataFrame:
    rows = []
    for t in texts:
        lab, conf = predict_one(t, model)
        rows.append({'text': t, 'sentiment': lab, 'confidence': conf})
    return pd.DataFrame(rows)


def predict_file(input_path: Path, text_col: str, out_csv: Path, model=None) -> Path:
    if input_path.suffix.lower() == '.json':
        df_in = pd.read_json(input_path, lines=True)
    else:
        df_in = pd.read_csv(input_path)

    if text_col not in df_in.columns:
        # Try common fallbacks
        for cand in ['reviewText', 'Text', 'text', 'review', 'content']:
            if cand in df_in.columns:
                text_col = cand
                break
        else:
            raise SystemExit(f"Text column '{text_col}' not found. Available: {df_in.columns.tolist()}")

    preds = predict_many(df_in[text_col].astype(str).fillna(''), model)
    out = pd.concat([df_in.reset_index(drop=True), preds[['sentiment', 'confidence']]], axis=1)
    out_csv.parent.mkdir(parents=True, exist_ok=True)
    out.to_csv(out_csv, index=False)
    print(f"[OK] Wrote predictions → {out_csv}")
    return out_csv


def main():
    ap = argparse.ArgumentParser()
    ap.add_argument('--model', type=str, default=None, help='Path to model_tfidf_lr.joblib')
    ap.add_argument('--text', type=str, help='Predict a single review text')
    ap.add_argument('--file', type=str, help='CSV/JSON path for batch prediction')
    ap.add_argument('--text-col', type=str, default='reviewText', help='Text column name in the file')
    ap.add_argument('--out', type=str, default='predictions.csv', help='Output CSV path for batch mode')
    args = ap.parse_args()

    model_path = Path(args.model) if args.model else None
    model = _load_model(model_path)

    if args.text:
        lab, conf = predict_one(args.text, model)
        print({'text': args.text, 'sentiment': lab, 'confidence': conf})
        return

    if args.file:
        predict_file(Path(args.file), args.text_col, Path(args.out), model)
        return

    ap.print_help()

if __name__ == '__main__':
    main()


usage: ipykernel_launcher.py [-h] [--model MODEL] [--text TEXT] [--file FILE] [--text-col TEXT_COL] [--out OUT]
ipykernel_launcher.py: error: unrecognized arguments: -f C:\Users\NXTWAVE\AppData\Roaming\jupyter\runtime\kernel-3be158de-37fa-4a76-bac9-c9812f0088f3.json


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
