In [6]:
# ===== URL -> auto-scrape -> suspicious score (single concise output) =====
# Paste & run in Colab / Jupyter. Requires /content/ig_auth_pipeline.pkl from your training step.

!pip -q install instagrapi joblib requests opencv-python-headless rapidfuzz >/dev/null

import os, re, getpass, logging, warnings
from pathlib import Path
from datetime import datetime, timezone
import joblib, requests, numpy as np, pandas as pd
from instagrapi import Client
from instagrapi.exceptions import ChallengeRequired, TwoFactorRequired
from rapidfuzz.fuzz import partial_ratio, ratio

warnings.filterwarnings("ignore")
logging.getLogger("instagrapi").setLevel(logging.CRITICAL)
logging.getLogger("urllib3").setLevel(logging.CRITICAL)

MODEL_PATH = "/content/ig_auth_pipeline.pkl"
SESSION_PATH = "/content/ig_session.json"
MAX_POSTS = 8
TIMEOUT_S = 8

# ---------------- helpers ----------------
def _norm_input(s: str) -> str:
    s = (s or "").strip()
    m = re.search(r"instagram\.com/([^/?#]+)/?", s, flags=re.I)
    if m: return m.group(1)
    return re.sub(r"[^A-Za-z0-9._]", "", s.lstrip("@"))

def get_client():
    cl = Client()
    if os.path.exists(SESSION_PATH):
        try:
            cl.load_settings(SESSION_PATH)
            cl.set_locale("en_US"); cl.set_timezone_offset(19800)
            cl.get_timeline_feed()
            return cl
        except Exception:
            pass
    # prompt once for credentials
    print("Instagram login required (used only to fetch public profile info).")
    ig_user = input("IG username: ").strip()
    ig_pass = getpass.getpass("IG password (hidden): ")
    cl.set_locale("en_US"); cl.set_timezone_offset(19800)
    try:
        cl.login(ig_user, ig_pass)
    except TwoFactorRequired:
        code = input("Enter 2FA code: ").strip(); cl.two_factor_login(code)
    except ChallengeRequired:
        try:
            try: cl.challenge_choose_method(1)
            except Exception: cl.challenge_choose_method(0)
            code = input("Enter verification code (email/SMS): ").strip(); cl.challenge_send_code(code)
        except Exception:
            raise RuntimeError("Login challenge failed — approve login in IG app and retry.")
    try: cl.dump_settings(SESSION_PATH)
    except Exception: pass
    return cl

def fetch_user_info_robust(cl: Client, username: str):
    try:
        return cl.user_info_by_username(username)
    except Exception:
        try:
            uid = cl.user_id_from_username(username)
            return cl.user_info(uid)
        except Exception as e:
            raise RuntimeError(f"Fetching user info failed for @{username}: {e}")

# ---------------- align to model features ----------------
if not os.path.exists(MODEL_PATH):
    raise FileNotFoundError(f"Model not found at {MODEL_PATH}. Run the training cell first.")

save_obj = joblib.load(MODEL_PATH)
if "feature_order" not in save_obj:
    raise RuntimeError("Saved model missing 'feature_order'. Retrain/save pipeline with that attribute.")

FEATURE_ORDER = save_obj["feature_order"]

# ---------------- extraction tailored to your dataset columns ----------------
def build_features_from_instainfo(ui):
    # ui is instagrapi user object
    followers = int(getattr(ui, "follower_count", 0) or 0)
    following = int(getattr(ui, "following_count", 0) or 0)
    media_count = int(getattr(ui, "media_count", 0) or 0)
    profile_pic = getattr(ui, "profile_pic_url", "") or ""
    full_name = getattr(ui, "full_name", "") or ""
    username = getattr(ui, "username", "") or ""
    bio = getattr(ui, "biography", "") or ""
    external = getattr(ui, "external_url", "") or ""
    is_private = bool(getattr(ui, "is_private", False))

    # canonical features required by your trained dataset
    feats = {}
    feats["has_profile_picture"] = 1 if profile_pic else 0
    feats["username_length"] = len(username)
    feats["fullname_words"] = len(full_name.split()) if full_name.strip() else 0
    feats["fullname_length"] = len(full_name)
    feats["name_equals_username"] = 1 if full_name.strip().lower() == username.strip().lower() and full_name.strip() else 0
    feats["description_length"] = len(bio)
    feats["external_url_present"] = 1 if external else 0
    feats["is_private"] = 1 if is_private else 0
    feats["num_posts"] = media_count
    feats["num_followers"] = followers
    feats["num_following"] = following
    # derived
    feats["followers_to_following_ratio"] = float(followers / (following if following>0 else 1))
    feats["log_followers"] = float(np.log1p(followers))
    return feats

# ---------------- main function ----------------
def predict_from_url(url_or_username: str):
    username = _norm_input(url_or_username)
    if not username:
        print("Prediction failed: invalid input.")
        return

    cl = get_client()
    try:
        ui = fetch_user_info_robust(cl, username)
    except Exception as e:
        print("Prediction failed:", str(e))
        return

    # reject private accounts (not enough public data)
    if getattr(ui, "is_private", False):
        print("Prediction failed: account is PRIVATE — cannot auto-scrape required features.")
        return

    feats = build_features_from_instainfo(ui)

    # align to model expected features
    for c in FEATURE_ORDER:
        if c not in feats:
            feats[c] = 0

    X_one = pd.DataFrame([feats])[FEATURE_ORDER]

    # transform & predict
    preproc = save_obj["preprocessor"]
    clf = save_obj["classifier"]
    calib = save_obj.get("calibrator", None)

    X_tr = preproc.transform(X_one)
    if calib is not None:
        prob = float(calib.predict_proba(X_tr)[:,1][0])
    else:
        prob = float(clf.predict_proba(X_tr)[:,1][0])

    # interpretation
    if prob < 0.30:
        interp = "LOW SUSPICION (Likely Real)"
    elif prob < 0.60:
        interp = "MEDIUM SUSPICION (Review)"
    else:
        interp = "HIGH SUSPICION (Likely Fake)"

    # concise single-line and one short reason
    # short reason: use simple heuristics for explanation
    reasons = []
    if feats.get("has_profile_picture",0) == 0:
        reasons.append("No profile picture")
    if feats.get("external_url_present",0) == 1:
        reasons.append("Has external link")
    if feats.get("followers_to_following_ratio",0) < 1:
        reasons.append("Low follower/following ratio")
    # pick up to 2 short reasons
    reasons = reasons[:2]

    # print concise output only
    print(f"Suspicious Score: {prob:.4f} ({prob*100:.1f}%) — {interp}")
    if reasons:
        print("- " + "; ".join(reasons))

# ---------------- run prompt ----------------
try:
    user_in = input("Paste public Instagram profile URL or @username: ").strip()
    predict_from_url(user_in)
except Exception as e:
    print("Prediction failed:", str(e))


Paste public Instagram profile URL or @username: https://www.instagram.com/gunav__s/?igsh=MXF5Yzl4bmtjYzRwNg%3D%3D#
Suspicious Score: 0.0000 (0.0%) — LOW SUSPICION (Likely Real)
- Low follower/following ratio
