# INIT

In [None]:
import pandas as pd
import yfinance as yf

In [None]:
!pip install yfinance

In [3]:
import pandas as pd
import yfinance as yf

In [None]:
pip install --upgrade yfinance pandas

# BANDARMOLOGY

## Filter saham yang masih hidup

In [27]:
# --- PATCH: Filter keluar saham "mati suri" menjadi roster aktif yang ringkas ---
# deps: pip install yfinance pandas numpy (sekali saja)

import re, os, time, zipfile, warnings
import numpy as np, pandas as pd, yfinance as yf
from IPython.display import display

warnings.filterwarnings("ignore")

# ====== CONFIG (ubah sesuai selera) ======
INPUT_PATH         = "candidates_from_excel.csv"  # CSV/Excel berisi daftar ticker
PREFER_EXCEL       = False                        # True jika INPUT_PATH adalah Excel
OUTPUT_ACTIVE_CSV  = "candidates_active_filtered.csv"
OUTPUT_FULL_CSV    = "candidates_full_with_flags.csv"

# Lookback & threshold “hidup”
LOOKBACK_DAYS      = 60         # cek aktivitas 60 hari bursa terakhir
MIN_NONZERO_DAYS   = 15         # minimal hari dengan volume > 0
MIN_PCT_NONZERO    = 0.50       # minimal % hari aktif (mis. 50%)
MAX_CONSEC_ZERO    = 10         # maksimal run berturut-turut volume=0
MIN_MED_VALUE_90D  = 7.5e9      # Rp 7.5 miliar median value traded 90D (Close*Vol)
MIN_PRICE_FLOOR    = 200        # harga minimum
MIN_TURNOVER       = 0.0005     # 0.05% median turnover (opsional, di-skip jika sharesOut tidak ada)

# YF batching
DL_PERIOD_FOR_ACTIVITY = 120    # tarik 120 hari, evaluasi 60 hari terakhir
DL_PERIOD_FOR_LIQ      = 200    # untuk median value 90D
CHUNK_SIZE             = 50
PAUSE_SEC              = 0.8

# ====== Helpers ======
def extract_tickers_from_df(df):
    tickers = set()
    for c in df.columns:
        s = df[c].astype(str).str.upper().str.strip()
        extracted = s.str.extract(r'\b([A-Z]{2,5}(?:\.JK)?)\b')[0].dropna()
        for sym in extracted:
            base = sym.replace(".JK","")
            if 2 <= len(base) <= 5 and base.isalpha():
                tickers.add(sym if sym.endswith(".JK") else f"{sym}.JK")
    return sorted(tickers)

def load_candidates(path, prefer_excel=False):
    path_l = path.lower()
    if prefer_excel:
        try:
            import openpyxl
            if path_l.endswith(".xlsx"):
                xl = pd.ExcelFile(path, engine="openpyxl")
                codes=set()
                for sh in xl.sheet_names:
                    df = xl.parse(sh)
                    codes |= set(extract_tickers_from_df(df))
                return sorted(codes)
            elif path_l.endswith(".xls"):
                import xlrd
                xl = pd.ExcelFile(path, engine="xlrd")
                codes=set()
                for sh in xl.sheet_names:
                    df = xl.parse(sh)
                    codes |= set(extract_tickers_from_df(df))
                return sorted(codes)
        except Exception:
            pass
    # CSV fallback / default
    df = pd.read_csv(path)
    cols_lower = [c.lower() for c in df.columns]
    if "ticker" in cols_lower:
        s = df[df.columns[cols_lower.index("ticker")]]
        return extract_tickers_from_df(pd.DataFrame({"ticker": s}))
    if "code" in cols_lower:
        s = df[df.columns[cols_lower.index("code")]]
        return extract_tickers_from_df(pd.DataFrame({"code": s}))
    return extract_tickers_from_df(df)

def _slice_px(px, t, batch_len):
    if isinstance(px.columns, pd.MultiIndex):
        try:
            return px[t].dropna()
        except Exception:
            return pd.DataFrame()
    else:
        return px.dropna() if batch_len == 1 else pd.DataFrame()

def download_panel(tickers, period_days, chunk=50, pause=0.8):
    """Return dict[ticker] -> DF OHLCV"""
    out = {}
    for i in range(0, len(tickers), chunk):
        batch = tickers[i:i+chunk]
        try:
            px = yf.download(batch, period=f"{period_days}d", interval="1d",
                             auto_adjust=False, group_by="ticker", progress=False, threads=True, timeout=45)
        except Exception:
            px = pd.DataFrame()
        for t in batch:
            try:
                df = _slice_px(px, t, len(batch))
            except Exception:
                df = pd.DataFrame()
            out[t] = df
        time.sleep(pause)
    return out

def fetch_meta(tickers):
    rows=[]
    for t in tickers:
        tk = yf.Ticker(t)
        try:
            info = tk.info or {}
        except Exception:
            info = {}
        rows.append({
            "ticker": t,
            "marketCap": info.get("marketCap"),
            "sector": info.get("sector"),
            "industry": info.get("industry"),
            "sharesOut": info.get("sharesOutstanding") or info.get("floatShares")
        })
    return pd.DataFrame(rows)

def max_consecutive_zeros(s: pd.Series) -> int:
    z = (s == 0).astype(int)
    if z.sum() == 0:
        return 0
    groups = (z != z.shift()).cumsum()
    # jumlah pada grup zeros (nilai 1); grup ones (nilai 0) akan summing ke 0, aman
    return int(z.groupby(groups).sum().max())

# ====== Load daftar awal ======
CANDIDATES_ALL = load_candidates(INPUT_PATH, prefer_excel=PREFER_EXCEL)
print(f"Total kandidat masuk: {len(CANDIDATES_ALL)}")

if not CANDIDATES_ALL:
    raise SystemExit("Daftar kandidat kosong. Cek INPUT_PATH / kolom 'ticker'/'code'.")

# ====== Ambil data aktivitas & likuiditas ======
print("Unduh OHLCV untuk aktivitas…")
px_act = download_panel(CANDIDATES_ALL, DL_PERIOD_FOR_ACTIVITY, chunk=CHUNK_SIZE, pause=PAUSE_SEC)
print("Unduh OHLCV untuk likuiditas (median value 90D)…")
px_liq = download_panel(CANDIDATES_ALL, DL_PERIOD_FOR_LIQ, chunk=CHUNK_SIZE, pause=PAUSE_SEC)

print("Ambil metadata (shares outstanding/industry)…")
meta = fetch_meta(CANDIDATES_ALL)

# ====== Hitung metrik ======
rows = []
for t in CANDIDATES_ALL:
    dfa = px_act.get(t, pd.DataFrame())
    dfl = px_liq.get(t, pd.DataFrame())

    # defaults
    last_close = np.nan
    nonzero_days = 0
    pct_nonzero = 0.0
    max_zero_run = LOOKBACK_DAYS
    med_value_90d = 0.0
    med_vol_90d   = 0.0

    # aktivitas 60D
    if not dfa.empty:
        last_close = float(dfa["Close"].iloc[-1])
        look = dfa.tail(LOOKBACK_DAYS)
        if not look.empty:
            v = look["Volume"].fillna(0)
            nonzero_days = int((v > 0).sum())
            pct_nonzero  = float(nonzero_days / len(look))
            max_zero_run = max_consecutive_zeros(v)

    # likuiditas 90D (ambil dari 200D panel)
    if not dfl.empty:
        val = (dfl["Close"] * dfl["Volume"])
        med_value_90d = float(val.rolling(90).median().dropna().iloc[-1]) if len(val) >= 90 else float(val.median())
        med_vol_90d   = float(dfl["Volume"].rolling(90).median().dropna().iloc[-1]) if len(dfl) >= 90 else float(dfl["Volume"].median())

    rows.append({
        "ticker": t,
        "last_close": last_close,
        "nonzero_days_60d": nonzero_days,
        "pct_nonzero_60d": round(pct_nonzero, 3),
        "max_consec_zero_60d": max_zero_run,
        "med_value_90d": med_value_90d,
        "med_volume_90d": med_vol_90d,
    })

feat = pd.DataFrame(rows)
df = meta.merge(feat, on="ticker", how="right")

# Turnover median (opsional, jika sharesOut tersedia)
df["turnover_med"] = np.where(df["sharesOut"].fillna(0)>0, df["med_volume_90d"] / df["sharesOut"], np.nan)

# ====== Klasifikasi ======
def classify(row):
    # default flags
    reasons = []
    # kondisi "suspended-like"
    if row["nonzero_days_60d"] == 0 or row["pct_nonzero_60d"] < 0.1 or row["max_consec_zero_60d"] >= LOOKBACK_DAYS:
        return "SUSPECT_SUSPENDED", "no trading in 60d / full zero-volume window"

    # fail checks
    if not pd.isna(row["last_close"]) and row["last_close"] < MIN_PRICE_FLOOR:
        reasons.append("price < floor")
    if row["pct_nonzero_60d"] < MIN_PCT_NONZERO:
        reasons.append("pct_nonzero < threshold")
    if row["nonzero_days_60d"] < MIN_NONZERO_DAYS:
        reasons.append("nonzero_days < threshold")
    if row["max_consec_zero_60d"] > MAX_CONSEC_ZERO:
        reasons.append("max zero run too long")
    if row["med_value_90d"] < MIN_MED_VALUE_90D:
        reasons.append("med_value_90d < min")

    # turnover optional
    if not pd.isna(row["turnover_med"]) and row["turnover_med"] < MIN_TURNOVER:
        reasons.append("turnover < min")

    if reasons:
        return "DORMANT", "; ".join(reasons)
    return "ACTIVE", "pass"

lab = df.apply(classify, axis=1, result_type="expand")
lab.columns = ["status", "why"]
df = pd.concat([df, lab], axis=1)

# Urutkan yang paling likuid dulu
df = df.sort_values(["status", "med_value_90d"], ascending=[True, False]).reset_index(drop=True)

# ====== Simpan output ======
df.to_csv(OUTPUT_FULL_CSV, index=False)
active = df[df["status"]=="ACTIVE"][["ticker"]]
active.to_csv(OUTPUT_ACTIVE_CSV, index=False)

print(f"✅ Disaring: {len(df)} total | ACTIVE: {len(active)} | DORMANT: {len(df[df.status=='DORMANT'])} | SUSPECT_SUSPENDED: {len(df[df.status=='SUSPECT_SUSPENDED'])}")
print(f"💾 Saved full flags: {OUTPUT_FULL_CSV}")
print(f"💾 Saved active-only: {OUTPUT_ACTIVE_CSV}")

display(df.head(20))


Total kandidat masuk: 954
Unduh OHLCV untuk aktivitas…
Unduh OHLCV untuk likuiditas (median value 90D)…
Ambil metadata (shares outstanding/industry)…
✅ Disaring: 954 total | ACTIVE: 101 | DORMANT: 773 | SUSPECT_SUSPENDED: 80
💾 Saved full flags: candidates_full_with_flags.csv
💾 Saved active-only: candidates_active_filtered.csv


Unnamed: 0,ticker,marketCap,sector,industry,sharesOut,last_close,nonzero_days_60d,pct_nonzero_60d,max_consec_zero_60d,med_value_90d,med_volume_90d,turnover_med,status,why
0,BBCA.JK,1022950000000000.0,Financial Services,Banks - Regional,123247000000.0,8300.0,60,1.0,0,805783200000.0,92770800.0,0.000753,ACTIVE,pass
1,BBRI.JK,560768300000000.0,Financial Services,Banks - Regional,151559000000.0,3700.0,60,1.0,0,782751800000.0,196737900.0,0.001298,ACTIVE,pass
2,BMRI.JK,435866500000000.0,Financial Services,Banks - Regional,93333300000.0,4670.0,60,1.0,0,740686200000.0,154202250.0,0.001652,ACTIVE,pass
3,ANTM.JK,74255170000000.0,Basic Materials,Gold,24030800000.0,3090.0,60,1.0,0,514042700000.0,189333000.0,0.007879,ACTIVE,pass
4,BRPT.JK,230512800000000.0,Basic Materials,Chemicals,93704400000.0,2460.0,60,1.0,0,269739000000.0,168220150.0,0.001795,ACTIVE,pass
5,TLKM.JK,291242900000000.0,Communication Services,Telecom Services,99062200000.0,2940.0,60,1.0,0,259120500000.0,98005150.0,0.000989,ACTIVE,pass
6,BBNI.JK,151635200000000.0,Financial Services,Banks - Regional,578684000.0,4070.0,60,1.0,0,231694700000.0,56493650.0,0.097624,ACTIVE,pass
7,BRMS.JK,63802800000000.0,Basic Materials,Other Industrial Metals & Mining,25570200000.0,450.0,60,1.0,0,216173700000.0,572995800.0,0.022409,ACTIVE,pass
8,PTRO.JK,37822500000000.0,Basic Materials,Other Industrial Metals & Mining,10086000000.0,3750.0,60,1.0,0,206095800000.0,65679450.0,0.006512,ACTIVE,pass
9,ASII.JK,199179300000000.0,Industrials,Conglomerates,40483600000.0,4920.0,60,1.0,0,183947900000.0,38560150.0,0.000952,ACTIVE,pass


## Versi 1.0

In [26]:
# === ONE-CELL PIPELINE: Bandarmology Watchlist (IDX non-bank mid/small) ===
# 1) install deps (aman dijalankan berulang)
!pip install -q yfinance pandas numpy

# 2) CONFIG – ganti sesuai kebutuhan
CSV_OR_EXCEL_PATH = "candidates_from_excel.csv"   # pakai CSV hasil ekstraksi tadi (atau file kamu sendiri)
USE_EXCEL_FIRST   = False                         # set True jika file di atas adalah Excel
UNIVERSE_TOPN   = 10
VALUE_MIN       = 1.0e10      # ~ Rp 10 miliar (median value traded 90D)
VALUE_MAX       = 1.5e11      # ~ Rp 150 miliar
CAP_Q_MAX       = 0.75        # buang top 25% market cap (approx blue chips)
MIN_TURNOVER    = 0.001       # ≥0.1% (median Vol/SharesOut)
MIN_PRICE       = 200
CHUNK_DL        = 50          # batch size yfinance (download OHLCV)
PAUSE_SEC       = 0.8         # jeda antar batch
PERIOD_DAYS     = 45          # tarik 45 hari
LOOKBACK_DAYS   = 30          # aktif jika Volume 30D > 0
EXPORT_CSV      = "watchlist_bandarmology.csv"
EXPORT_JSON     = "watchlist_bandarmology.json"

# 3) IMPORTS
import re, json, time, zipfile, os, warnings
import numpy as np, pandas as pd, yfinance as yf
from datetime import datetime
from IPython.display import display

warnings.filterwarnings("ignore")
INDUSTRY_EX_RE  = re.compile(r"Bank|Capital Markets|Insurance|Mortgage|Credit", re.I)

# 4) HELPERS (robust untuk yfinance multi/single ticker)
def _slice_px(px, t, batch_len):
    """Ambil DataFrame untuk ticker t dari hasil yf.download.
       Handle kasus: MultiIndex (multi ticker) atau single-level (satu ticker)."""
    if isinstance(px.columns, pd.MultiIndex):
        try:
            return px[t].dropna()
        except Exception:
            return pd.DataFrame()
    else:
        # single ticker dalam batch
        return px.dropna() if batch_len == 1 else pd.DataFrame()

def extract_tickers_from_df(df):
    tickers = set()
    for c in df.columns:
        s = df[c].astype(str).str.upper().str.strip()
        extracted = s.str.extract(r'\b([A-Z]{2,5}(?:\.JK)?)\b')[0].dropna()
        for sym in extracted:
            base = sym.replace(".JK","")
            if 2 <= len(base) <= 5 and base.isalpha():
                tickers.add(sym if sym.endswith(".JK") else f"{sym}.JK")
    return sorted(tickers)

def load_candidates(path, prefer_excel=False):
    path_l = path.lower()
    if prefer_excel:
        try:
            import openpyxl  # ensure engine available
            if path_l.endswith(".xlsx"):
                xl = pd.ExcelFile(path, engine="openpyxl")
                codes=set()
                for sh in xl.sheet_names:
                    df = xl.parse(sh)
                    codes |= set(extract_tickers_from_df(df))
                return sorted(codes)
            elif path_l.endswith(".xls"):
                import xlrd
                xl = pd.ExcelFile(path, engine="xlrd")
                codes=set()
                for sh in xl.sheet_names:
                    df = xl.parse(sh)
                    codes |= set(extract_tickers_from_df(df))
                return sorted(codes)
        except Exception:
            pass
    # CSV fallback / default
    df = pd.read_csv(path)
    # gunakan kolom 'ticker' atau 'code' jika ada, kalau tidak scan seluruh kolom
    cols_lower = [c.lower() for c in df.columns]
    if "ticker" in cols_lower:
        s = df[df.columns[cols_lower.index("ticker")]]
        return extract_tickers_from_df(pd.DataFrame({"ticker": s}))
    if "code" in cols_lower:
        s = df[df.columns[cols_lower.index("code")]]
        return extract_tickers_from_df(pd.DataFrame({"code": s}))
    return extract_tickers_from_df(df)

def filter_has_volume_30d(tickers, period_days=45, lookback=30, chunk=50, pause=0.8):
    ok=[]
    for i in range(0, len(tickers), chunk):
        batch = tickers[i:i+chunk]
        try:
            px = yf.download(batch, period=f"{period_days}d", interval="1d",
                             auto_adjust=False, group_by='ticker', progress=False, threads=True, timeout=30)
        except Exception:
            px = pd.DataFrame()
        for t in batch:
            try:
                df = _slice_px(px, t, len(batch))
                if not df.empty and df['Volume'].tail(lookback).sum() > 0:
                    ok.append(t)
            except Exception:
                pass
        time.sleep(pause)
    return sorted(set(ok))

# 5) UNIVERSE BUILDER
def fetch_meta(tickers):
    rows=[]
    for t in tickers:
        tk = yf.Ticker(t)
        try:
            info = tk.info or {}
        except Exception:
            info = {}
        rows.append({
            "ticker": t,
            "marketCap": info.get("marketCap"),
            "sector": info.get("sector"),
            "industry": info.get("industry"),
            "sharesOut": info.get("sharesOutstanding") or info.get("floatShares")
        })
    return pd.DataFrame(rows)

def liquidity_panel(tickers, chunk=60, pause=0.6):
    rows=[]
    if not tickers:
        return pd.DataFrame(rows)
    for i in range(0, len(tickers), chunk):
        batch = tickers[i:i+chunk]
        try:
            px = yf.download(batch, period="180d", interval="1d",
                             auto_adjust=False, group_by="ticker", progress=False, threads=True, timeout=45)
        except Exception:
            px = pd.DataFrame()
        for t in batch:
            try:
                df = _slice_px(px, t, len(batch))
            except Exception:
                df = pd.DataFrame()
            if df.empty:
                rows.append({"ticker": t, "med_value": 0.0, "last_close": np.nan, "med_volume90": 0.0})
                continue
            med_value = float((df["Close"]*df["Volume"]).rolling(90).median().dropna().iloc[-1]
                              if len(df)>=90 else (df["Close"]*df["Volume"]).median())
            rows.append({
                "ticker": t,
                "med_value": med_value,
                "last_close": float(df["Close"].iloc[-1]),
                "med_volume90": float(df["Volume"].rolling(90).median().dropna().iloc[-1]
                                      if len(df)>=90 else df["Volume"].median())
            })
        time.sleep(pause)
    return pd.DataFrame(rows)

def build_universe(tickers,
                   value_min=VALUE_MIN, value_max=VALUE_MAX,
                   cap_quantile_max=CAP_Q_MAX,
                   min_turnover=MIN_TURNOVER, min_price=MIN_PRICE):
    if not tickers:
        return [], pd.DataFrame()
    meta = fetch_meta(tickers)
    liq  = liquidity_panel(tickers)
    if liq.empty and meta.empty:
        return [], pd.DataFrame()
    df = meta.merge(liq, on="ticker", how="left")
    # exclude finansial berat
    df = df[~df["industry"].fillna("").str.contains(INDUSTRY_EX_RE)]
    # buang top-25% market cap
    cap_cut = df["marketCap"].dropna().quantile(cap_quantile_max) if df["marketCap"].notna().any() else None
    if cap_cut:
        df = df[(df["marketCap"].isna()) | (df["marketCap"] <= cap_cut)]
    # turnover median aproksimasi
    df["turnover"] = np.where(df["sharesOut"].fillna(0)>0, df["med_volume90"]/df["sharesOut"], 0.0)
    # filter likuiditas & harga
    df = df[(df["med_value"]>=value_min) & (df["med_value"]<=value_max) &
            (df["last_close"]>=min_price) & (df["turnover"]>=min_turnover)]
    if df.empty:
        return [], df
    topn = df.sort_values("med_value", ascending=False).head(UNIVERSE_TOPN)["ticker"].tolist()
    return topn, df.sort_values("med_value", ascending=False).reset_index(drop=True)

# 6) BANDARMOLOGY METRICS
def atr(df, n=14):
    h,l,c = df['High'], df['Low'], df['Close']
    tr = np.maximum(h-l, np.maximum((h-c.shift()).abs(), (l-c.shift()).abs()))
    return tr.rolling(n).mean()

def obv(df):
    vol = df['Volume'].fillna(0)
    direction = np.sign(df['Close'].diff()).fillna(0)
    return (direction*vol).cumsum()

def adl(df):
    h,l,c,v = df['High'], df['Low'], df['Close'], df['Volume']
    rng = (h - l).replace(0, np.nan)
    clv = ((c - l) - (h - c)) / rng
    clv = clv.fillna(0)
    return (clv * v).cumsum()

def percentile_rank(series, lookback=120):
    def _pr(x):
        s = pd.Series(x)
        return s.rank(pct=True).iloc[-1]
    return series.rolling(lookback, min_periods=lookback).apply(_pr, raw=False)

def label_row(r):
    status, reasons, score = "Akumulasi Lanjut", [], 50
    if r['atr_pctile'] <= 0.35: score += 20; reasons.append("ATR%ile rendah")
    if r['vol_ma5_ma20'] < 0.7: score += 10; reasons.append("Volume dry-up")
    if r['obv_slope'] > 0: score += 15; reasons.append("OBV naik")
    if r['adl_slope'] > 0: score += 10; reasons.append("ADL naik")
    if (r['breakout_55'] and r['vol_spike'] and r['near_high']):
        score += 25; reasons += ["Breakout 55D","Vol spike","Close near high"]
    if r['above_emas']: score += 10; reasons.append("Di atas EMA20/50")
    if r['upthrust']: score -= 15; reasons.append("Upthrust")
    if r['obv_div']: score -= 10; reasons.append("Divergensi OBV")
    score = int(max(0, min(100, score)))
    if (r['breakout_55'] and r['vol_spike'] and r['near_high'] and
        r['obv_slope']>0 and r['adl_slope']>0 and r['above_emas']):
        status = "Akhir Akumulasi – Siap Markup"
    elif r['upthrust'] or r['obv_div']:
        status = "Distribusi Dini"
    else:
        status = "Akumulasi Lanjut"
    return pd.Series({"status": status, "score": score, "reasons": ", ".join(reasons)})

def scan(tickers, chunk=50, pause=0.6):
    if not tickers: 
        return pd.DataFrame()
    rows=[]
    # tarik sekaligus (lebih cepat) – untuk robustness, pakai satu panggilan saja
    try:
        data = yf.download(tickers, period="420d", interval="1d", auto_adjust=False,
                           group_by='ticker', progress=False, threads=True, timeout=60)
    except Exception:
        data = pd.DataFrame()
    for t in tickers:
        try:
            df = _slice_px(data, t, len(tickers)).dropna().copy()
        except Exception:
            df = pd.DataFrame()
        if len(df) < 120:
            continue
        df['ATR14'] = atr(df, 14)
        df['ATRp']  = (df['ATR14'] / df['Close']).replace([np.inf, -np.inf], np.nan)
        df['ATRp_pctile'] = percentile_rank(df['ATRp'])

        df['OBV'] = obv(df); df['ADL'] = adl(df)
        df['OBV_slope20'] = df['OBV'].diff(20)
        df['ADL_slope20'] = df['ADL'].diff(20)

        df['EMA20'] = df['Close'].ewm(span=20).mean()
        df['EMA50'] = df['Close'].ewm(span=50).mean()
        df['above_emas'] = (df['Close'] > df['EMA20']) & (df['Close'] > df['EMA50'])

        df['HH55'] = df['High'].rolling(55).max()
        df['LL55'] = df['Low'].rolling(55).min()
        df['breakout_55'] = df['Close'] > df['HH55'].shift(1)

        df['vol_ma20'] = df['Volume'].rolling(20).mean()
        df['vol_ma5']  = df['Volume'].rolling(5).mean()
        df['vol_ma5_ma20'] = (df['vol_ma5'] / df['vol_ma20']).replace([np.inf,-np.inf], np.nan)
        df['vol_spike'] = df['Volume'] > 1.8*df['vol_ma20']

        df['near_high'] = (df['High'] - df['Close']) <= 0.2*df['ATR14']

        rng = (df['High'] - df['Low']).replace(0, np.nan)
        lower_tail = (df[['Close','Open']].min(axis=1) - df['Low']).abs()
        df['spring_like'] = (df['Low'] < df['LL55'].shift(1)) & ((lower_tail / rng) >= 0.6)

        upper_tail = (df['High'] - df[['Close','Open']].max(axis=1)).abs()
        df['upthrust'] = (df['High'] > df['HH55'].shift(1)) & \
                         (df['Close'] < df['HH55'].shift(1)) & \
                         ((upper_tail / rng) >= 0.5) & \
                         (df['Volume'] > 1.5*df['vol_ma20'])

        df['price_slope20'] = df['Close'].diff(20)
        df['obv_div'] = (df['price_slope20']>0) & (df['OBV_slope20']<=0)

        last = df.iloc[-1]
        feat = {
            "ticker": t,
            "date": df.index[-1].date().isoformat(),
            "close": round(float(last['Close']),2),
            "value_traded": float(last['Close']*last['Volume']),
            "atr_pctile": float(last['ATRp_pctile']) if pd.notna(last['ATRp_pctile']) else 1.0,
            "vol_ma5_ma20": float(last['vol_ma5_ma20']) if pd.notna(last['vol_ma5_ma20']) else 1.0,
            "obv_slope": float(last['OBV_slope20']) if pd.notna(last['OBV_slope20']) else 0.0,
            "adl_slope": float(last['ADL_slope20']) if pd.notna(last['ADL_slope20']) else 0.0,
            "breakout_55": bool(last['breakout_55']) if pd.notna(last['breakout_55']) else False,
            "vol_spike": bool(last['vol_spike']) if pd.notna(last['vol_spike']) else False,
            "near_high": bool(last['near_high']) if pd.notna(last['near_high']) else False,
            "above_emas": bool(last['above_emas']) if pd.notna(last['above_emas']) else False,
            "spring_like": bool(last['spring_like']) if pd.notna(last['spring_like']) else False,
            "upthrust": bool(last['upthrust']) if pd.notna(last['upthrust']) else False,
            "obv_div": bool(last['obv_div']) if pd.notna(last['obv_div']) else False,
        }
        lab = label_row(pd.Series(feat))
        rows.append({**feat, **lab.to_dict()})
    out = pd.DataFrame(rows)
    if out.empty:
        return out
    status_rank = {"Akhir Akumulasi – Siap Markup":0, "Akumulasi Lanjut":1, "Distribusi Dini":2}
    out['status_rank'] = out['status'].map(status_rank)
    return out.sort_values(by=['status_rank','score','value_traded'], ascending=[True,False,False]).reset_index(drop=True)

# 7) PIPELINE – Load candidates → aktif 30D → universe → scan → simpan
print("📥 Load kandidat…")
CANDIDATES_ALL = load_candidates(CSV_OR_EXCEL_PATH, prefer_excel=USE_EXCEL_FIRST)
print(f"Total kandidat (unik, normalisasi .JK): {len(CANDIDATES_ALL)}")

print("🔎 Filter aktif 30 hari (yfinance)…")
CANDIDATES = filter_has_volume_30d(CANDIDATES_ALL, period_days=PERIOD_DAYS, lookback=LOOKBACK_DAYS, chunk=CHUNK_DL, pause=PAUSE_SEC)
print(f"✅ Aktif 30D: {len(CANDIDATES)} | contoh:", CANDIDATES[:12])

print("🏗️ Build universe non-bank mid/small cap…")
topN, uni_df = build_universe(CANDIDATES)
display(uni_df.head(15))
print(f"Top {len(topN)} untuk scan:", topN)

print("\n📈 Scan bandarmology…")
df_watch = scan(topN)

if df_watch.empty:
    print("⚠️ Hasil kosong. Coba turunkan VALUE_MIN / naikkan VALUE_MAX / tambah UNIVERSE_TOPN / perbesar jeda & kecilkan CHUNK.")
else:
    cols_show = ['date','ticker','close','status','score','reasons']
    print("\n🎯 Rekomendasi (urut prioritas):")
    display(df_watch[cols_show])

    as_of = datetime.now().strftime("%Y-%m-%d")
    df_watch.to_csv(EXPORT_CSV, index=False)
    payload = {
        "as_of": as_of,
        "universe_topN": topN,
        "watchlist": df_watch[cols_show].to_dict(orient="records")
    }
    with open(EXPORT_JSON, "w", encoding="utf-8") as f:
        json.dump(payload, f, ensure_ascii=False, indent=2)
    print(f"\n💾 Saved: {EXPORT_CSV} & {EXPORT_JSON}")
# === ONE-CELL PIPELINE: Bandarmology Watchlist (IDX non-bank mid/small) ===
# 1) install deps (aman dijalankan berulang)
!pip install -q yfinance pandas numpy

# 2) CONFIG – ganti sesuai kebutuhan
CSV_OR_EXCEL_PATH = "candidates_from_excel.csv"   # pakai CSV hasil ekstraksi tadi (atau file kamu sendiri)
USE_EXCEL_FIRST   = False                         # set True jika file di atas adalah Excel
UNIVERSE_TOPN   = 10
VALUE_MIN       = 1.0e10      # ~ Rp 10 miliar (median value traded 90D)
VALUE_MAX       = 1.5e11      # ~ Rp 150 miliar
CAP_Q_MAX       = 0.75        # buang top 25% market cap (approx blue chips)
MIN_TURNOVER    = 0.001       # ≥0.1% (median Vol/SharesOut)
MIN_PRICE       = 200
CHUNK_DL        = 50          # batch size yfinance (download OHLCV)
PAUSE_SEC       = 0.8         # jeda antar batch
PERIOD_DAYS     = 45          # tarik 45 hari
LOOKBACK_DAYS   = 30          # aktif jika Volume 30D > 0
EXPORT_CSV      = "watchlist_bandarmology.csv"
EXPORT_JSON     = "watchlist_bandarmology.json"

# 3) IMPORTS
import re, json, time, zipfile, os, warnings
import numpy as np, pandas as pd, yfinance as yf
from datetime import datetime
from IPython.display import display

warnings.filterwarnings("ignore")
INDUSTRY_EX_RE  = re.compile(r"Bank|Capital Markets|Insurance|Mortgage|Credit", re.I)

# 4) HELPERS (robust untuk yfinance multi/single ticker)
def _slice_px(px, t, batch_len):
    """Ambil DataFrame untuk ticker t dari hasil yf.download.
       Handle kasus: MultiIndex (multi ticker) atau single-level (satu ticker)."""
    if isinstance(px.columns, pd.MultiIndex):
        try:
            return px[t].dropna()
        except Exception:
            return pd.DataFrame()
    else:
        # single ticker dalam batch
        return px.dropna() if batch_len == 1 else pd.DataFrame()

def extract_tickers_from_df(df):
    tickers = set()
    for c in df.columns:
        s = df[c].astype(str).str.upper().str.strip()
        extracted = s.str.extract(r'\b([A-Z]{2,5}(?:\.JK)?)\b')[0].dropna()
        for sym in extracted:
            base = sym.replace(".JK","")
            if 2 <= len(base) <= 5 and base.isalpha():
                tickers.add(sym if sym.endswith(".JK") else f"{sym}.JK")
    return sorted(tickers)

def load_candidates(path, prefer_excel=False):
    path_l = path.lower()
    if prefer_excel:
        try:
            import openpyxl  # ensure engine available
            if path_l.endswith(".xlsx"):
                xl = pd.ExcelFile(path, engine="openpyxl")
                codes=set()
                for sh in xl.sheet_names:
                    df = xl.parse(sh)
                    codes |= set(extract_tickers_from_df(df))
                return sorted(codes)
            elif path_l.endswith(".xls"):
                import xlrd
                xl = pd.ExcelFile(path, engine="xlrd")
                codes=set()
                for sh in xl.sheet_names:
                    df = xl.parse(sh)
                    codes |= set(extract_tickers_from_df(df))
                return sorted(codes)
        except Exception:
            pass
    # CSV fallback / default
    df = pd.read_csv(path)
    # gunakan kolom 'ticker' atau 'code' jika ada, kalau tidak scan seluruh kolom
    cols_lower = [c.lower() for c in df.columns]
    if "ticker" in cols_lower:
        s = df[df.columns[cols_lower.index("ticker")]]
        return extract_tickers_from_df(pd.DataFrame({"ticker": s}))
    if "code" in cols_lower:
        s = df[df.columns[cols_lower.index("code")]]
        return extract_tickers_from_df(pd.DataFrame({"code": s}))
    return extract_tickers_from_df(df)

def filter_has_volume_30d(tickers, period_days=45, lookback=30, chunk=50, pause=0.8):
    ok=[]
    for i in range(0, len(tickers), chunk):
        batch = tickers[i:i+chunk]
        try:
            px = yf.download(batch, period=f"{period_days}d", interval="1d",
                             auto_adjust=False, group_by='ticker', progress=False, threads=True, timeout=30)
        except Exception:
            px = pd.DataFrame()
        for t in batch:
            try:
                df = _slice_px(px, t, len(batch))
                if not df.empty and df['Volume'].tail(lookback).sum() > 0:
                    ok.append(t)
            except Exception:
                pass
        time.sleep(pause)
    return sorted(set(ok))

# 5) UNIVERSE BUILDER
def fetch_meta(tickers):
    rows=[]
    for t in tickers:
        tk = yf.Ticker(t)
        try:
            info = tk.info or {}
        except Exception:
            info = {}
        rows.append({
            "ticker": t,
            "marketCap": info.get("marketCap"),
            "sector": info.get("sector"),
            "industry": info.get("industry"),
            "sharesOut": info.get("sharesOutstanding") or info.get("floatShares")
        })
    return pd.DataFrame(rows)

def liquidity_panel(tickers, chunk=60, pause=0.6):
    rows=[]
    if not tickers:
        return pd.DataFrame(rows)
    for i in range(0, len(tickers), chunk):
        batch = tickers[i:i+chunk]
        try:
            px = yf.download(batch, period="180d", interval="1d",
                             auto_adjust=False, group_by="ticker", progress=False, threads=True, timeout=45)
        except Exception:
            px = pd.DataFrame()
        for t in batch:
            try:
                df = _slice_px(px, t, len(batch))
            except Exception:
                df = pd.DataFrame()
            if df.empty:
                rows.append({"ticker": t, "med_value": 0.0, "last_close": np.nan, "med_volume90": 0.0})
                continue
            med_value = float((df["Close"]*df["Volume"]).rolling(90).median().dropna().iloc[-1]
                              if len(df)>=90 else (df["Close"]*df["Volume"]).median())
            rows.append({
                "ticker": t,
                "med_value": med_value,
                "last_close": float(df["Close"].iloc[-1]),
                "med_volume90": float(df["Volume"].rolling(90).median().dropna().iloc[-1]
                                      if len(df)>=90 else df["Volume"].median())
            })
        time.sleep(pause)
    return pd.DataFrame(rows)

def build_universe(tickers,
                   value_min=VALUE_MIN, value_max=VALUE_MAX,
                   cap_quantile_max=CAP_Q_MAX,
                   min_turnover=MIN_TURNOVER, min_price=MIN_PRICE):
    if not tickers:
        return [], pd.DataFrame()
    meta = fetch_meta(tickers)
    liq  = liquidity_panel(tickers)
    if liq.empty and meta.empty:
        return [], pd.DataFrame()
    df = meta.merge(liq, on="ticker", how="left")
    # exclude finansial berat
    df = df[~df["industry"].fillna("").str.contains(INDUSTRY_EX_RE)]
    # buang top-25% market cap
    cap_cut = df["marketCap"].dropna().quantile(cap_quantile_max) if df["marketCap"].notna().any() else None
    if cap_cut:
        df = df[(df["marketCap"].isna()) | (df["marketCap"] <= cap_cut)]
    # turnover median aproksimasi
    df["turnover"] = np.where(df["sharesOut"].fillna(0)>0, df["med_volume90"]/df["sharesOut"], 0.0)
    # filter likuiditas & harga
    df = df[(df["med_value"]>=value_min) & (df["med_value"]<=value_max) &
            (df["last_close"]>=min_price) & (df["turnover"]>=min_turnover)]
    if df.empty:
        return [], df
    topn = df.sort_values("med_value", ascending=False).head(UNIVERSE_TOPN)["ticker"].tolist()
    return topn, df.sort_values("med_value", ascending=False).reset_index(drop=True)

# 6) BANDARMOLOGY METRICS
def atr(df, n=14):
    h,l,c = df['High'], df['Low'], df['Close']
    tr = np.maximum(h-l, np.maximum((h-c.shift()).abs(), (l-c.shift()).abs()))
    return tr.rolling(n).mean()

def obv(df):
    vol = df['Volume'].fillna(0)
    direction = np.sign(df['Close'].diff()).fillna(0)
    return (direction*vol).cumsum()

def adl(df):
    h,l,c,v = df['High'], df['Low'], df['Close'], df['Volume']
    rng = (h - l).replace(0, np.nan)
    clv = ((c - l) - (h - c)) / rng
    clv = clv.fillna(0)
    return (clv * v).cumsum()

def percentile_rank(series, lookback=120):
    def _pr(x):
        s = pd.Series(x)
        return s.rank(pct=True).iloc[-1]
    return series.rolling(lookback, min_periods=lookback).apply(_pr, raw=False)

def label_row(r):
    status, reasons, score = "Akumulasi Lanjut", [], 50
    if r['atr_pctile'] <= 0.35: score += 20; reasons.append("ATR%ile rendah")
    if r['vol_ma5_ma20'] < 0.7: score += 10; reasons.append("Volume dry-up")
    if r['obv_slope'] > 0: score += 15; reasons.append("OBV naik")
    if r['adl_slope'] > 0: score += 10; reasons.append("ADL naik")
    if (r['breakout_55'] and r['vol_spike'] and r['near_high']):
        score += 25; reasons += ["Breakout 55D","Vol spike","Close near high"]
    if r['above_emas']: score += 10; reasons.append("Di atas EMA20/50")
    if r['upthrust']: score -= 15; reasons.append("Upthrust")
    if r['obv_div']: score -= 10; reasons.append("Divergensi OBV")
    score = int(max(0, min(100, score)))
    if (r['breakout_55'] and r['vol_spike'] and r['near_high'] and
        r['obv_slope']>0 and r['adl_slope']>0 and r['above_emas']):
        status = "Akhir Akumulasi – Siap Markup"
    elif r['upthrust'] or r['obv_div']:
        status = "Distribusi Dini"
    else:
        status = "Akumulasi Lanjut"
    return pd.Series({"status": status, "score": score, "reasons": ", ".join(reasons)})

def scan(tickers, chunk=50, pause=0.6):
    if not tickers: 
        return pd.DataFrame()
    rows=[]
    # tarik sekaligus (lebih cepat) – untuk robustness, pakai satu panggilan saja
    try:
        data = yf.download(tickers, period="420d", interval="1d", auto_adjust=False,
                           group_by='ticker', progress=False, threads=True, timeout=60)
    except Exception:
        data = pd.DataFrame()
    for t in tickers:
        try:
            df = _slice_px(data, t, len(tickers)).dropna().copy()
        except Exception:
            df = pd.DataFrame()
        if len(df) < 120:
            continue
        df['ATR14'] = atr(df, 14)
        df['ATRp']  = (df['ATR14'] / df['Close']).replace([np.inf, -np.inf], np.nan)
        df['ATRp_pctile'] = percentile_rank(df['ATRp'])

        df['OBV'] = obv(df); df['ADL'] = adl(df)
        df['OBV_slope20'] = df['OBV'].diff(20)
        df['ADL_slope20'] = df['ADL'].diff(20)

        df['EMA20'] = df['Close'].ewm(span=20).mean()
        df['EMA50'] = df['Close'].ewm(span=50).mean()
        df['above_emas'] = (df['Close'] > df['EMA20']) & (df['Close'] > df['EMA50'])

        df['HH55'] = df['High'].rolling(55).max()
        df['LL55'] = df['Low'].rolling(55).min()
        df['breakout_55'] = df['Close'] > df['HH55'].shift(1)

        df['vol_ma20'] = df['Volume'].rolling(20).mean()
        df['vol_ma5']  = df['Volume'].rolling(5).mean()
        df['vol_ma5_ma20'] = (df['vol_ma5'] / df['vol_ma20']).replace([np.inf,-np.inf], np.nan)
        df['vol_spike'] = df['Volume'] > 1.8*df['vol_ma20']

        df['near_high'] = (df['High'] - df['Close']) <= 0.2*df['ATR14']

        rng = (df['High'] - df['Low']).replace(0, np.nan)
        lower_tail = (df[['Close','Open']].min(axis=1) - df['Low']).abs()
        df['spring_like'] = (df['Low'] < df['LL55'].shift(1)) & ((lower_tail / rng) >= 0.6)

        upper_tail = (df['High'] - df[['Close','Open']].max(axis=1)).abs()
        df['upthrust'] = (df['High'] > df['HH55'].shift(1)) & \
                         (df['Close'] < df['HH55'].shift(1)) & \
                         ((upper_tail / rng) >= 0.5) & \
                         (df['Volume'] > 1.5*df['vol_ma20'])

        df['price_slope20'] = df['Close'].diff(20)
        df['obv_div'] = (df['price_slope20']>0) & (df['OBV_slope20']<=0)

        last = df.iloc[-1]
        feat = {
            "ticker": t,
            "date": df.index[-1].date().isoformat(),
            "close": round(float(last['Close']),2),
            "value_traded": float(last['Close']*last['Volume']),
            "atr_pctile": float(last['ATRp_pctile']) if pd.notna(last['ATRp_pctile']) else 1.0,
            "vol_ma5_ma20": float(last['vol_ma5_ma20']) if pd.notna(last['vol_ma5_ma20']) else 1.0,
            "obv_slope": float(last['OBV_slope20']) if pd.notna(last['OBV_slope20']) else 0.0,
            "adl_slope": float(last['ADL_slope20']) if pd.notna(last['ADL_slope20']) else 0.0,
            "breakout_55": bool(last['breakout_55']) if pd.notna(last['breakout_55']) else False,
            "vol_spike": bool(last['vol_spike']) if pd.notna(last['vol_spike']) else False,
            "near_high": bool(last['near_high']) if pd.notna(last['near_high']) else False,
            "above_emas": bool(last['above_emas']) if pd.notna(last['above_emas']) else False,
            "spring_like": bool(last['spring_like']) if pd.notna(last['spring_like']) else False,
            "upthrust": bool(last['upthrust']) if pd.notna(last['upthrust']) else False,
            "obv_div": bool(last['obv_div']) if pd.notna(last['obv_div']) else False,
        }
        lab = label_row(pd.Series(feat))
        rows.append({**feat, **lab.to_dict()})
    out = pd.DataFrame(rows)
    if out.empty:
        return out
    status_rank = {"Akhir Akumulasi – Siap Markup":0, "Akumulasi Lanjut":1, "Distribusi Dini":2}
    out['status_rank'] = out['status'].map(status_rank)
    return out.sort_values(by=['status_rank','score','value_traded'], ascending=[True,False,False]).reset_index(drop=True)

# 7) PIPELINE – Load candidates → aktif 30D → universe → scan → simpan
print("📥 Load kandidat…")
CANDIDATES_ALL = load_candidates(CSV_OR_EXCEL_PATH, prefer_excel=USE_EXCEL_FIRST)
print(f"Total kandidat (unik, normalisasi .JK): {len(CANDIDATES_ALL)}")

print("🔎 Filter aktif 30 hari (yfinance)…")
CANDIDATES = filter_has_volume_30d(CANDIDATES_ALL, period_days=PERIOD_DAYS, lookback=LOOKBACK_DAYS, chunk=CHUNK_DL, pause=PAUSE_SEC)
print(f"✅ Aktif 30D: {len(CANDIDATES)} | contoh:", CANDIDATES[:12])

print("🏗️ Build universe non-bank mid/small cap…")
topN, uni_df = build_universe(CANDIDATES)
display(uni_df.head(15))
print(f"Top {len(topN)} untuk scan:", topN)

print("\n📈 Scan bandarmology…")
df_watch = scan(topN)

if df_watch.empty:
    print("⚠️ Hasil kosong. Coba turunkan VALUE_MIN / naikkan VALUE_MAX / tambah UNIVERSE_TOPN / perbesar jeda & kecilkan CHUNK.")
else:
    cols_show = ['date','ticker','close','status','score','reasons']
    print("\n🎯 Rekomendasi (urut prioritas):")
    display(df_watch[cols_show])

    as_of = datetime.now().strftime("%Y-%m-%d")
    df_watch.to_csv(EXPORT_CSV, index=False)
    payload = {
        "as_of": as_of,
        "universe_topN": topN,
        "watchlist": df_watch[cols_show].to_dict(orient="records")
    }
    with open(EXPORT_JSON, "w", encoding="utf-8") as f:
        json.dump(payload, f, ensure_ascii=False, indent=2)
    print(f"\n💾 Saved: {EXPORT_CSV} & {EXPORT_JSON}")


📥 Load kandidat…
Total kandidat (unik, normalisasi .JK): 954
🔎 Filter aktif 30 hari (yfinance)…
✅ Aktif 30D: 870 | contoh: ['AADI.JK', 'AALI.JK', 'ABBA.JK', 'ABDA.JK', 'ABMM.JK', 'ACES.JK', 'ACRO.JK', 'ACST.JK', 'ADCP.JK', 'ADES.JK', 'ADHI.JK', 'ADMF.JK']
🏗️ Build universe non-bank mid/small cap…


Unnamed: 0,ticker,marketCap,sector,industry,sharesOut,med_value,last_close,med_volume90,turnover
0,SMIL.JK,3028078034944,Industrials,Rental & Leasing Services,8751670272,28839160000.0,346.0,105306250.0,0.012033
1,INET.JK,2425965248512,Communication Services,Telecom Services,8423490048,23359620000.0,288.0,156415600.0,0.018569
2,MERI.JK,287766151168,,,1035129984,20723440000.0,278.0,54006250.0,0.052173
3,ITMA.JK,739299229696,Basic Materials,Other Industrial Metals & Mining,999052992,20572160000.0,740.0,25095100.0,0.025119
4,HRTA.JK,3384865980416,Consumer Cyclical,Luxury Goods,4605259776,19258270000.0,735.0,31544900.0,0.00685
5,MARK.JK,2545999937536,Healthcare,Medical Instruments & Supplies,3800000000,16251960000.0,670.0,21839350.0,0.005747
6,UNIQ.JK,1261869989888,Energy,Thermal Coal,3138980096,15328450000.0,402.0,33549500.0,0.010688
7,PTPP.JK,2399911804928,Industrials,Engineering & Construction,6185339904,13714090000.0,388.0,34811250.0,0.005628
8,PSAT.JK,2193877893120,,,1482349952,12271290000.0,1480.0,6717650.0,0.004532
9,ELSA.JK,3503280095232,Energy,Oil & Gas Equipment & Services,7298500096,12214610000.0,480.0,25176250.0,0.00345


Top 10 untuk scan: ['SMIL.JK', 'INET.JK', 'MERI.JK', 'ITMA.JK', 'HRTA.JK', 'MARK.JK', 'UNIQ.JK', 'PTPP.JK', 'PSAT.JK', 'ELSA.JK']

📈 Scan bandarmology…

🎯 Rekomendasi (urut prioritas):


Unnamed: 0,date,ticker,close,status,score,reasons
0,2025-08-08,SMIL.JK,346.0,Akumulasi Lanjut,100,"ATR%ile rendah, OBV naik, ADL naik, Di atas EMA20/50"
1,2025-08-08,HRTA.JK,735.0,Akumulasi Lanjut,100,"ATR%ile rendah, OBV naik, ADL naik, Di atas EMA20/50"
2,2025-08-08,INET.JK,288.0,Akumulasi Lanjut,95,"Volume dry-up, OBV naik, ADL naik, Di atas EMA20/50"
3,2025-08-08,MARK.JK,670.0,Akumulasi Lanjut,95,"ATR%ile rendah, OBV naik, ADL naik"
4,2025-08-08,UNIQ.JK,402.0,Akumulasi Lanjut,85,"ATR%ile rendah, OBV naik"
5,2025-08-08,ITMA.JK,740.0,Akumulasi Lanjut,85,"ATR%ile rendah, OBV naik"
6,2025-08-08,PTPP.JK,388.0,Akumulasi Lanjut,80,"ATR%ile rendah, Volume dry-up"
7,2025-08-08,ELSA.JK,480.0,Akumulasi Lanjut,80,"ATR%ile rendah, Volume dry-up"



💾 Saved: watchlist_bandarmology.csv & watchlist_bandarmology.json


  pid, fd = os.forkpty()


📥 Load kandidat…
Total kandidat (unik, normalisasi .JK): 954
🔎 Filter aktif 30 hari (yfinance)…
✅ Aktif 30D: 870 | contoh: ['AADI.JK', 'AALI.JK', 'ABBA.JK', 'ABDA.JK', 'ABMM.JK', 'ACES.JK', 'ACRO.JK', 'ACST.JK', 'ADCP.JK', 'ADES.JK', 'ADHI.JK', 'ADMF.JK']
🏗️ Build universe non-bank mid/small cap…


KeyboardInterrupt: 

# Alphavantage Macro Analysis

### Alphavantage Gold Only

In [None]:
#  INI KHUSUS EMAS ALPHAVANTAGE

import requests
import json

# Ganti dengan API key kamu
API_KEY = 'W0CY87H193QOH05M'

# Ambil semua berita (bisa difilter lebih lanjut jika perlu)
url = f'https://www.alphavantage.co/query?function=NEWS_SENTIMENT&limit=1000&apikey={API_KEY}'
response = requests.get(url)
data = response.json()

# Filter berita yang mengandung kata 'gold' atau 'xau' di title atau summary
gold_related_news = [
    {
        "title": item["title"],
        "summary": item.get("summary", "")
    }
    for item in data.get("feed", [])
    if "gold" in item["title"].lower() 
    or "xau" in item["title"].lower()
    or "gold" in item.get("summary", "").lower()
    or "xau" in item.get("summary", "").lower()
]

# Print hasilnya
print(json.dumps(gold_related_news, indent=4))


### Alphavantage Other Currencies

In [3]:
#  INI BISA BUAT YANG LAINNYA
import requests
import json  # Tambahkan impor json

# Replace the "demo" API key with your own key
url = 'https://www.alphavantage.co/query?function=NEWS_SENTIMENT&tickers=FOREX:USD&apikey=W0CY87H193QOH05M'
r = requests.get(url)
data = r.json()

# Gunakan json.dumps() untuk mencetak dengan format yang rapi
print(json.dumps(data, indent=2))  


{
  "items": "50",
  "sentiment_score_definition": "x <= -0.35: Bearish; -0.35 < x <= -0.15: Somewhat-Bearish; -0.15 < x < 0.15: Neutral; 0.15 <= x < 0.35: Somewhat_Bullish; x >= 0.35: Bullish",
  "relevance_score_definition": "0 < x <= 1, with a higher score indicating higher relevance.",
  "feed": [
    {
      "title": "4 Ways to Double Your 2025 Retirement-Account Contributions by the End of the Year",
      "url": "https://www.fool.com/retirement/2025/08/08/4-ways-to-double-your-2025-retirement-account-cont/",
      "time_published": "20250808T093000",
      "authors": [
        "Kailey Hagen"
      ],
      "summary": "There's plenty of time left in 2025 to make a difference in your retirement savings.",
      "banner_image": "https://g.foolcdn.com/image/?url=https%3A%2F%2Fg.foolcdn.com%2Feditorial%2Fimages%2F828115%2Fsmiling-person-looking-at-smartphone-and-typing-on-calculator.jpg&op=resize&w=700",
      "source": "Motley Fool",
      "category_within_source": "n/a",
      "sou

### Alphavantage to Sentiment

In [None]:
import requests
import json

# replace the "demo" apikey below with your own key from https://www.alphavantage.co/support/#api-key
url = 'https://www.alphavantage.co/query?function=NEWS_SENTIMENT&limit=1000&tickers=FOREX:USD&apikey=W0CY87H193QOH05M'
r = requests.get(url)
data = r.json()

gold_titles = [item["title"] for item in data["feed"] if "gold" in item["title"].lower() or "xau" in item["title"].lower()]

print(json.dumps(gold_titles, indent=4))

In [None]:
import openai
import json

# API Key OpenAI kamu
OPENAI_API_KEY = "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"

client = openai.OpenAI(api_key=OPENAI_API_KEY)

#nanti ini diganti pakai alphavantage
news_titles = [
    {
        "title": "India's private sector capex likely to slow down due to tariffs, corporations planning new capex may defer: Goldman Sachs - ETCFO",
        "summary": "Goldman Sachs reports that India's private sector capital expenditure may decline due to global tariff uncertainty, potentially impacting capital goods and infrastructure sectors. While India's GDP has been resilient, exports and port activity are vulnerable, especially with a US slowdown."
    },
    {
        "title": "China's Central Bank's Gold Stash Spikes To 2,292 Tonnes And Beijing Is Still Buying - Bank of America  ( NYSE:BAC ) , Franklin Templeton Holdings Trust Franklin Responsibly Sourced Gold ETF  ( ARCA:FGDL ) ",
        "summary": "China's central bank continued to accumulate gold, adding 5 tonnes to its stash in March, marking its 5th consecutive monthly purchase."
    },
    {
        "title": "'Rich Dad, Poor Dad' author calls for $1 million BTC by 2035",
        "summary": "Financial educator, author of Rich Dad, Poor Dad, and investor Robert Kiyosaki recently forecasted a $1 million Bitcoin ( BTC ) price by 2035 as the US dollar continues to lose value to inflationary monetary policies.\"I strongly believe, by 2035, that one Bitcoin will be over $ 1 million, Gold ..."
    },
    {
        "title": "Should You Buy Citigroup While It's Below $70?",
        "summary": "If you're on the hunt for bargains after the recent stock market dip, look no further than Citigroup ( NYSE: C ) . The bank is trading at a 27% discount to its tangible book value, making it a golden opportunity for value-focused investors.CEO Jane Fraser is looking to make her mark and bring the ..."
    },
    {
        "title": "Gold prices set to trend upward in near to long term, says TBZ CFO Mukesh Sharma - ETCFO",
        "summary": "The CFO of Tribhovandas Bhimji Zaveri, speaks on India's gold market outlook and the factors behind its rise in a conversation with ETCFO. says the correlation between gold prices and global uncertainty is growing complex today."
    },
    {
        "title": "Gold Bar Whiskey Distillery Hosts GUINNESS WORLD RECORDS\u2122 Event to Break Most People Shaking Cocktails Simultaneously Record",
        "summary": "Hosted by Gold Bar Whiskey Distillery and San Francisco Giants Broadcaster and 2x World Series Champion, Hunter Pence, guests have the opportunity to make Bay Area history - alongside live music, delicious food, a unique vendor village and more."
    },
    {
        "title": "If You Had Bet On Gold After The 1971 Nixon Shock, You'd Have Beaten The S&P 500 - Unless You Did This One Thing - Vanguard S&P 500 ETF  ( ARCA:VOO ) , SPDR S&P 500  ( ARCA:SPY ) ",
        "summary": "It was the summer of 1971. The United States was embroiled in the Vietnam War, inflationary pressures were building, and global confidence in the U.S. dollar was faltering."
    },
    {
        "title": "U.S. Coins & Jewelry Celebrates National Coin Week with Regional Treasure Hunt Valued Over $10,000, Kicking Off 40th Anniversary and Expansion",
        "summary": "Houston, TX, April 18, 2025 ( GLOBE NEWSWIRE ) -- U.S. Coins & Jewelry, Houston's trusted destination for rare coins, gold and silver bullion, fine jewelry and sought-after sports memorabilia, is celebrating its 40th anniversary in a bold way during National Coin Week ( April 20-26 ) -with a ..."
    },
    {
        "title": "'s 'Stock Whisper' Index: 5 Stocks Investors Secretly Monitor But Don't Talk About Yet - Alamos Gold  ( NYSE:AGI ) , American Express  ( NYSE:AXP ) ",
        "summary": "The Stock Whisper Index highlights five stocks seeing increased attention from Benzinga readers during the week. Earnings season has led to strong interest in company's with upcoming financial results. Markets are messy-but the right setups can still deliver triple-digit gains."
    },
    {
        "title": "Bitcoin vs. gold: How do they stack up for investors?",
        "summary": "As gold prices break new highs, many Bitcoiners are seeking ways to get exposure to the precious metal, but have been met with some hurdles along the way.Although physical gold is accessible in the form of jewelry, gold bars and coins, many industry executives are concerned about aspects like ..."
    }
]

prompt = f"""
Berikut adalah list daftar berita pasar keuangan terbaru:

{json.dumps(news_titles, indent=2)}

Analisis sentimen berita ini dan berikan probabilitas arah pasar dengan penjelasan berikut:

{{
    "metode_probabilitas": "Jelaskan metode yang digunakan untuk menilai probabilitas pergerakan harga berdasarkan analisis sentimen, misalnya: apakah menggunakan model NLP berbasis lexicon, machine learning, atau analisis historis?",
    "intraday": {{
        "xau_bull_today": "Probabilitas harga emas naik secara intraday (%)",
        "xau_bear_today": "Probabilitas harga emas turun secara intraday (%)",
        "intraday_rationale": "Berikan penjelasan berbasis sentimen berita yang mendukung probabilitas ini."
    }},

    "swing": {{
        "xau_bull_thisweek": "Probabilitas harga emas naik secara mingguan (%)",
        "xau_bear_thisweek": "Probabilitas harga emas turun secara mingguan (%)",
        "midlongterm_rationale": "Jelaskan alasan di balik probabilitas mingguan berdasarkan tren sentimen berita."
    }}
}} 
berikan menggunakan format JSON
"""
print("----------------------------------------------------------------------------")
print(prompt)
print("----------------------------------------------------------------------------")

try:
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[{"role": "system", "content": "Kamu adalah analis pasar keuangan."},
                  {"role": "user", "content": prompt}],
        response_format={"type": "json_object"},  # ✅ Pastikan output berupa JSON murni
        temperature=0.7,
    )

    # JSON sudah valid, langsung akses tanpa parsing manual
    output = json.loads(response.choices[0].message.content)
    print(json.dumps(output, indent=2))

except openai.OpenAIError as e:
    print(f"Error dari OpenAI API: {e}")
except Exception as e:
    print(f"Terjadi kesalahan: {e}")


----------------------------------------------------------------------------

Berikut adalah list daftar berita pasar keuangan terbaru:

[
  {
    "title": "India's private sector capex likely to slow down due to tariffs, corporations planning new capex may defer: Goldman Sachs - ETCFO",
    "summary": "Goldman Sachs reports that India's private sector capital expenditure may decline due to global tariff uncertainty, potentially impacting capital goods and infrastructure sectors. While India's GDP has been resilient, exports and port activity are vulnerable, especially with a US slowdown."
  },
  {
    "title": "China's Central Bank's Gold Stash Spikes To 2,292 Tonnes And Beijing Is Still Buying - Bank of America  ( NYSE:BAC ) , Franklin Templeton Holdings Trust Franklin Responsibly Sourced Gold ETF  ( ARCA:FGDL ) ",
    "summary": "China's central bank continued to accumulate gold, adding 5 tonnes to its stash in March, marking its 5th consecutive monthly purchase."
  },
  {
    "titl

# Forexfactory Predicition (NEWS SENTIMENT)

In [None]:
import requests
import os
import time
import schedule
from openai import OpenAI
import base64
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from datetime import datetime
from PIL import Image
import sys
import json

# 🔹 Ambil API Key dari environment variable
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
if not OPENAI_API_KEY:
    raise ValueError("❌ ERROR: API Key OpenAI tidak ditemukan. Setel variabel lingkungan OPENAI_API_KEY.")

# 🔹 Inisialisasi klien OpenAI
client = OpenAI(api_key=OPENAI_API_KEY)

# 🔹 Konfigurasi Chrome agar berjalan headless
chrome_options = Options()
#chrome_options.add_argument("--headless")
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36")
chrome_options.add_argument("--user-data-dir=/home/kemalwiryawan/.config/google-chrome")
chrome_options.add_argument("--profile-directory=Default")  # atau 'Profile 1' tergantung hasil di atas


# 🔹 Folder penyimpanan screenshot
SAVE_DIR = "screenshots"
os.makedirs(SAVE_DIR, exist_ok=True)

def get_forex_news_sentiment():
    """🔹 Mengambil sentimen berita Forex dari Alpha Vantage API"""
    API_KEY = "W0CY87H193QOH05M"  # Ganti dengan API key Anda
    url = f"https://www.alphavantage.co/query?function=NEWS_SENTIMENT&tickers=FOREX:USD&apikey={API_KEY}"
    
    try:
        response = requests.get(url)
        data = response.json()
        print(data)
        exit("Program dihentikan!")
        return data  # Mengembalikan data sentimen berita dalam format JSON
    except Exception as e:
        print(f"❌ Error mengambil data sentimen berita: {e}")
        return None

def read_forex_news(image_path):
    """🔹 Mengirim screenshot ke OpenAI API dan membaca kalender berita."""
    with open(image_path, "rb") as image_file:
        image_data = base64.b64encode(image_file.read()).decode("utf-8")

    json_filename = os.path.join(SAVE_DIR, f"forexfactory_{datetime.now().strftime('%Y-%m-%d')}.json")

    #example news
    news_titles = """
    {
    "probability_method": "Sentiment analysis is performed using a lexicon-based NLP model that identifies positive and negative keywords related to the price movement of gold in the news. This method categorizes the news into positive, negative, or neutral sentiment and analyzes its cumulative impact on the gold market.",
    "intraday": {
        "xau_bull_today": 65,
        "xau_bear_today": 35,
        "intraday_rationale": "The intraday sentimeeconomic_events = [
    {
        "currency": "USD",
        "event_name": "FOMC Member Goolsbee Speaks",
        "event_date": "2025-04-21T19:30:00",
        "event_status": "Confirmed",
        "impact_level": "Medium",
        "forecast": None,
        "actual": None,
        "previous": None,
        "ai": {
            "ai_forecast": None,
            "ai_forecast_confidence": None,
            "ai_pair_to_trade": "EUR/USD",
            "ai_recommendation": "bullish",
            "ai_recommendation_position_timing": "putbeforenews",
            "ai_rationale": "Speeches by FOMC members can influence USD based on rate outlooks and economic assessments. Market participants should be alert to any hawkish or dovish cues."
        }
    },
    {
        "currency": "USD",
        "event_name": "CB Leading Index m/m",
        "event_date": "2025-04-21T21:00:00",
        "event_status": "Confirmed",
        "impact_level": "Low",
        "forecast": -0.5,
        "actual": -0.5,
        "previous": -0.3,
        "ai": {
            "ai_forecast": -0.5,
            "ai_forecast_confidence": "85",
            "ai_pair_to_trade": "USD/JPY",
            "ai_recommendation": "bearish",
            "ai_recommendation_position_timing": "putafternews",
            "ai_rationale": "A declining leading index suggests weakening economic conditions, likely impacting USD negatively in the short term."
        }
    }
]
nt for gold is generally positive, with several news reports indicating increased interest and buying of gold, such as China's continued gold reserves accumulation and positive views from the CFO of TBZ on gold price trends. This boosts investor confidence to buy gold, increasing the probability of an intraday price rise."
    },
    "swing": {
        "xau_bull_thisweek": 70,
        "xau_bear_thisweek": 30,
        "midlongterm_rationale": "On a weekly basis, gold prices are expected to rise due to several ongoing positive factors, such as gold purchases by China’s central bank and the outlook that gold prices will continue to increase in the medium to long term. Additionally, global uncertainty and inflation affecting the US dollar enhance gold's appeal as a hedge asset, strengthening the bullish sentiment."
    }
    }

    """
  
    prompt = """
    Extract all economic news events from this screenshot of the calendar and convert each into a structured JSON document. 
    For each event in the table, provide a comprehensive analysis with historical context and a data-driven forecast using global economic sources. 
    Ensure you include all events and structure each as follows:
    {
    "timestamp": "{timestamp}",
    "source": "{source}",
    "event":[{
        "currency": "{currency}",
        "event_name": "{event_name}",
        "event_date": "{date_time}",
        "event_status": "{status}",
        "impact_level": "{level}",
        "forecast": {forecast},
        "actual": {actual},
        "previous": {previous}
        "ai":[{
            "ai_forecast": {ai_forecast},
            "ai_forecast_confidence": "0 to 100",
            "ai_pair_to_trade : {FX_PAIR}
            "ai_recommendation": "bullish/bearish",
            "ai_recommendation_position_timing": "putbeforenews/notrade/putafternews",
            "ai_rationale": "macro explanation in simple term"
            }]
        }
    }]

    Ensure that you capture, process and return data for each event separately,
    """

    prompt += "You can use this news to get sentiment in prediction." + news_titles


    tools = [
        {
            "type": "function",
            "function": {
                "name": "get_forex_news_sentiment",
                "description": "Mengambil data sentimen berita terbaru untuk analisis AI dalam menentukan bias pasar.",
                "parameters": {},
            }
        }
    ]

    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": "You are an expert in financial market analysis. Extract data from the image."},
            {"role": "user", "content": [
                {"type": "text", "text": prompt},
                {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}", "detail": "high"}}
            ]}],
        #tools=tools,
        #tool_choice="auto",
        response_format={"type": "json_object"},
        temperature=0.75,
        max_tokens=16384
    )

    #print(response)
    #sys.exit("Program dihentikan!")  # Bisa juga dengan sys.exit(1) untuk error

    try:
        # Ambil data baru dari API
        new_data = json.loads(response.choices[0].message.content)
        response_content = response.choices[0].message.content

        if not response_content:
            print("❌ Error: Respon dari OpenAI kosong atau None.")
            return None
        
        try:
            new_data = json.loads(response_content)
        except json.JSONDecodeError as e:
            print(f"❌ Error decoding JSON dari OpenAI: {e}")
            return None
        
        # Pastikan data baru adalah sebuah list
        if isinstance(new_data, dict) and "events" in new_data:
            new_events = new_data["events"]
        elif isinstance(new_data, list):
            new_events = new_data
        else:
            # Jika tidak, bungkus dalam list
            new_events = [new_data]
    except json.JSONDecodeError:
        print("❌ Error: Respon dari API tidak valid.")
        return None

    # Membaca file JSON jika ada, dan menambah data baru
    if os.path.exists(json_filename):
        with open(json_filename, "r") as json_file:
            try:
                data = json.load(json_file)
                # Validasi jika data bukan list
                if not isinstance(data, list):
                    data = []
            except json.JSONDecodeError:
                data = []
    else:
        data = []

    # Tambahkan data baru ke dalam list existing
    data.extend(new_events)

    # Menulis data gabungan ke file JSON
    with open(json_filename, "w") as json_file:
        json.dump(data, json_file, indent=2)
    print(f"📄 Data Forex News diperbarui: {json_filename}")
    return data

def clear_old_screenshots():
    """🔹 Hapus hanya file gambar di folder screenshots sebelum mengambil yang baru."""
    for file in os.listdir(SAVE_DIR):
        file_path = os.path.join(SAVE_DIR, file)
        try:
            if (
                os.path.isfile(file_path)
                and file.lower().endswith((".png", ".jpg", ".jpeg"))
                and file.startswith("forexfactory")  # Cek prefix
            ):
                os.remove(file_path)
                print(f"🗑️ File gambar lama dihapus: {file_path}")
        except Exception as e:
            print(f"❌ Gagal menghapus {file_path}: {e}")

# Fungsi untuk mengambil screenshot dengan ukuran yang diinginkan
def take_screenshot():
    """🔹 Mengambil full-page screenshot ForexFactory dan parsing dengan OpenAI."""
    print("\n📸 Mengambil full-page screenshot forexfactory.com v0.0.6")
    # 🔥 Hapus screenshot lama sebelum mengambil yang baru
    clear_old_screenshots()

    driver = webdriver.Chrome(options=chrome_options)
    driver.set_window_size(1200, 5000)
    driver.get("https://www.forexfactory.com/calendar?week=this")
    time.sleep(10)

    # **Mengatur tinggi window agar sesuai tinggi halaman**
    #page_height = driver.execute_script("return document.body.scrollHeight")
    #driver.set_window_size(1200, 960)  # Set ukuran jendela ke dimensi desktop
    # Dapatkan tinggi penuh dari halaman
    #scroll_height = driver.execute_script("return document.body.scrollHeight")
    # Set ukuran window sesuai tinggi halaman
    #driver.set_window_size(1200, 5000)   

    today_str = datetime.now().strftime("%Y-%m-%d")
    temp_png = os.path.join(SAVE_DIR, f"forexfactory_{today_str}.png")
    final_jpg = os.path.join(SAVE_DIR, f"forexfactory_{today_str}.jpg")

    # **Ambil screenshot**
    driver.save_screenshot(temp_png)
    driver.quit()
    print(f"✅ Full-page screenshot PNG tersimpan: {temp_png}")

    try:
        with Image.open(temp_png) as img:
            img.convert("RGB").save(final_jpg, "JPEG", quality=50)
        os.remove(temp_png)
        print(f"✅ Screenshot JPG tersimpan: {final_jpg}")
        
        # 🔥 **Parsing Kalender Forex menggunakan OpenAI**
        forex_data = read_forex_news(final_jpg)
        print("🔍 Hasil Parsing Forex News:\n")
        print(json.dumps(forex_data, indent=4))

    except Exception as e:
        print(f"❌ Gagal konversi PNG ke JPG: {e}")

# 🔹 Jadwalkan setiap 15 menit
schedule.every(15).minutes.do(take_screenshot)
print("🚀 Service berjalan... Ambil full-page screenshot setiap 15 menit.")
take_screenshot()

while True:
    schedule.run_pending()
    time.sleep(1)

# Technical Recommendation

In [11]:
import os
from datetime import datetime
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
import time
import random
import tempfile
import base64
from openai import OpenAI

# 🔹 Ambil API Key dari environment variable
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
if not OPENAI_API_KEY:
    raise ValueError("❌ ERROR: API Key OpenAI tidak ditemukan. Setel variabel lingkungan OPENAI_API_KEY.")

# 🔹 Inisialisasi klien OpenAI
client = OpenAI(api_key=OPENAI_API_KEY)

# List user-agent yang berbeda
user_agents = [
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
    "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0"
]
random_user_agent = random.choice(user_agents)

# Konfigurasi Chrome
options = Options()
options.add_argument(f"user-agent={random_user_agent}")
#options.add_argument("--headless=new")  # Mode headless

# Gunakan direktori sesi sementara agar tidak mudah terdeteksi
#user_data_dir = tempfile.mkdtemp()
#options.add_argument(f"--user-data-dir={user_data_dir}")
options.add_argument("--user-data-dir=/tmp/chrome_dev_test")  # Gunakan sesi login yang sudah ada

options.add_argument("--no-sandbox")  
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option("useAutomationExtension", False)

# Jalankan ChromeDriver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# Atur ukuran layar ke 1200x1200
driver.set_window_size(1200, 1800)

# URL Halaman Chart TradingView
chart_url = "https://www.tradingview.com/chart/90FNc4nG/?symbol=OANDA%3AXAUUSD"
driver.get(chart_url)

# Sembunyikan webdriver agar tidak terdeteksi
driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")

print("✅ Chrome berhasil dibuka. Mengecek apakah login diperlukan...")

try:
    # Cek apakah sudah login dengan mencari elemen utama chart
    if WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.CLASS_NAME, "chart-container"))):
        print("🎉 Sudah login! Langsung ke halaman chart.")
    else:
        raise Exception("Elemen chart tidak ditemukan, mungkin perlu login.")
    
except Exception:
    print("🔄 Belum login, mencoba proses login...")
    
    try:
        # Cek tombol "log in"
        login_button = WebDriverWait(driver, 5).until(
            EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'log in')]"))
        )
        login_button.click()
        print("✅ Tombol login diklik. Menunggu halaman login terbuka...")

        try:
            email_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.XPATH, "//button[contains(., 'Email')]"))
            )
            
            # Scroll ke tombol jika diperlukan
            driver.execute_script("arguments[0].scrollIntoView();", email_button)
            
            # Klik tombol email
            email_button.click()
            print("✅ Tombol Email berhasil diklik!")
        except Exception as e:
            print(f"❌ Gagal mengklik tombol Email: {e}")        
        
        # Tunggu input username muncul
        username_input = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.ID, "id_username"))
        )
    
        # Kosongkan field username
        username_input.send_keys(Keys.CONTROL + "a")  # Select all text
        username_input.send_keys(Keys.DELETE)
        time.sleep(5)  # Tunggu agar benar-benar kosong
    
        # Masukkan email
        email_kamu = "mkemalw@gmail.com"  # GANTI dengan email kamu
        username_input.send_keys(email_kamu)
        print(f"✅ Berhasil input email: {email_kamu}")
    
        # Tunggu input password muncul
        password_input = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.ID, "id_password"))
        )
    
        # Kosongkan field password
        password_input.send_keys(Keys.CONTROL + "a")
        password_input.send_keys(Keys.DELETE)
        time.sleep(2)
    
        # Masukkan password
        password_kamu = "3Desember1986!@#"  # GANTI dengan password kamu
        password_input.send_keys(password_kamu)
        print("✅ Berhasil input password.")
    
        # Tunggu elemen tombol "Sign in" muncul dan bisa diklik
        login_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, "//button[@data-overflow-tooltip-text='Sign in']"))
        )
    
        # Scroll ke tombol jika tidak terlihat
        driver.execute_script("arguments[0].scrollIntoView();", login_button)
        time.sleep(1)
    
        # Klik tombol menggunakan JavaScript jika klik biasa gagal
        try:
            login_button.click()
            print("✅ Tombol Sign in diklik.")
        except:
            driver.execute_script("arguments[0].click();", login_button)
            print("✅ Tombol Sign in diklik via JavaScript.")

        # Tunggu redirect ke halaman chart
        time.sleep(5)
        
        #if "chart" not in driver.current_url:
        #    print("🔄 Tidak kembali ke chart. Reload halaman chart...")
        #    driver.get(chart_url)

        # Tunggu chart muncul
        #WebDriverWait(driver, 10).until(
        #    EC.presence_of_element_located((By.CLASS_NAME, "chart-container"))
        #)
        
        print("🎉 Login sukses! Kembali ke halaman chart XAU/USD.")

    except Exception as e:
        print(f"❌ Error saat login: {e}")
        driver.quit()
        exit()

# Buat folder screenshot jika belum ada
screenshot_folder = "screenshots"
os.makedirs(screenshot_folder, exist_ok=True)

def capture_screenshot():
    try:
        # Tunggu hingga elemen chart-container muncul
        WebDriverWait(driver, 30).until(
            EC.presence_of_element_located((By.CLASS_NAME, "chart-container"))  
        )

        # Pastikan halaman telah sepenuhnya dimuat
        WebDriverWait(driver, 30).until(
            lambda d: d.execute_script("return document.readyState") == "complete"
        )

        # Emulasi tekan tombol Shift + F untuk fullscreen
        action = ActionChains(driver)
        action.key_down(Keys.SHIFT).send_keys("f").key_up(Keys.SHIFT).perform()

        # Tunggu sebentar agar mode fullscreen aktif
        time.sleep(2)
        action = ActionChains(driver)
        action.key_down(Keys.ALT).send_keys("r").key_up(Keys.ALT).perform()
        time.sleep(2)

        # Ambil screenshot
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = os.path.join(screenshot_folder, f"tradingview_XAUUSD_15M_{timestamp}.png")

        driver.save_screenshot(filename)
        print(f"✅ Screenshot saved: {filename}")

        return filename  # Kembalikan path gambar untuk digunakan dalam OCR

    except Exception as e:
        print(f"⚠️ Gagal mengambil screenshot: {e}")
        return None


def image_to_base64(image_path):
    if not os.path.exists(image_path):
        print(f"⚠️ File tidak ditemukan: {image_path}")
        return None
    
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")


def analyze_chart_with_gpt4o(image_base64):
    if image_base64 is None:
        print("⚠️ Tidak ada gambar untuk dianalisis.")
        return None
    
    try:
        prompt = """
        Key Analysis Requirements are
        Supply & Demand Zones: Identify key supply and demand areas using exponential price movements and imbalance price action.
        Market Structure Patterns: Detect and classify the following price action formations: Rally-Base-Rally (RBR); Rally-Base-Drop (RBD);Drop-Base-Drop (DBD);Drop-Base-Rally (DBR);Consolidation zones
        Breakout Validation: Differentiate between genuine breakouts and false breakouts. Ensure accuracy in detecting the highest high and lowest low within the chart.
        Indicator Recognition: The blue line represents the VWAP indicator. Additionally, RSI is displayed at the bottom of the screen—extract relevant insights.
        Output Format: The extracted data should be structured in a precise JSON format to be consumed by an Expert Advisor (EA). Ensure reliability, accuracy, and completeness in your output.
        [SCHEMA]
        {
        "timestamp": """ + datetime.now().strftime("%Y%m%d_%H%M%S") + """
        "pair": "{pair}",
        "timeframe: "{timeframe}"
        "price" : current price,
        "price_action":[{ 
            "demand_sup_area": clear virgin demand area in range (price - price),
            "supply_res_area": clear virgin supply area in price range (price - price),
        },
        { 
            "demand_sup_area": other clear virgin demand area in range (price - price),
            "supply_res_area": other clear virgin supply area in price range (price - price),
        }
        ]
        "recomendation":[{
            "bias" : "bullish/bearish/sideway"
            "action" : "wait/buylimit/buystop/selllimit/sellstop",
            "RR" : Risk Reward Ratio if wait put null (1:1.5/1:2/1:3/1:4),
            "entry_price" : best entry price if wait put null (numbers),
            "TP" : take profit price if wait put null (numbers),
            "SL" : stop loss price if wait put null (numbers),
            "probability" : 0 - 100 (%),
            "rationale" : explained in simple term why we take the action and how probabilty calcaluted,
            "lot_size" : using Kelly criterion formula in 1000 USD equity based on probabilty (numbers),
            "IF_SL" : if stop loss do martingale or not on the next trade (martingale/nomarti),
        }]
        [/SCHEMA]
        """

        economic_events = [
            {
                "currency": "USD",
                "event_name": "FOMC Member Goolsbee Speaks",
                "event_date": "2025-04-21T19:30:00",
                "event_status": "Confirmed",
                "impact_level": "Medium",
                "forecast": None,
                "actual": None,
                "previous": None,
                "ai": {
                    "ai_forecast": None,
                    "ai_forecast_confidence": None,
                    "ai_pair_to_trade": "EUR/USD",
                    "ai_recommendation": "bullish",
                    "ai_recommendation_position_timing": "putbeforenews",
                    "ai_rationale": "Speeches by FOMC members can influence USD based on rate outlooks and economic assessments. Market participants should be alert to any hawkish or dovish cues."
                }
            },
            {
                "currency": "USD",
                "event_name": "CB Leading Index m/m",
                "event_date": "2025-04-21T21:00:00",
                "event_status": "Confirmed",
                "impact_level": "Low",
                "forecast": -0.5,
                "actual": -0.5,
                "previous": -0.3,
                "ai": {
                    "ai_forecast": -0.5,
                    "ai_forecast_confidence": "85",
                    "ai_pair_to_trade": "USD/JPY",
                    "ai_recommendation": "bearish",
                    "ai_recommendation_position_timing": "putafternews",
                    "ai_rationale": "A declining leading index suggests weakening economic conditions, likely impacting USD negatively in the short term."
                }
            }
        ]

        
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": "You are a world-class quant analyst specializing in financial market analysis. Your task is to extract and analyze data from the attached TradingView screenshot with precision."},
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": prompt},
                        {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_base64}", "detail": "high"}}
                    ]
                }],
            # Gantilah yang error jadi:
tools=[
    {
        "type": "function",
        "function": {
            "name": "assistance",
            "description": "Extract structured financial data from chart image and economic event context",
            "parameters": {
                "type": "object",
                "properties": {
                    "trend": {
                        "type": "string",
                        "description": "The direction of the market trend (e.g., 'uptrend', 'downtrend', 'sideways')"
                    },
                    "support_levels": {
                        "type": "array",
                        "items": {"type": "number"},
                        "description": "Key support levels identified in the chart"
                    },
                    "resistance_levels": {
                        "type": "array",
                        "items": {"type": "number"},
                        "description": "Key resistance levels identified in the chart"
                    },
                    "pattern": {
                        "type": "string",
                        "description": "Any chart pattern identified (e.g., 'head and shoulders', 'double bottom')"
                    },
                    "recommendation": {
                        "type": "string",
                        "description": "Suggested action based on analysis: 'buy', 'sell', or 'hold'"
                    },
                    "news_alignment": {
                        "type": "string",
                        "description": "Does technical analysis agree or conflict with upcoming economic events?"
                    }
                },
                "required": ["trend", "support_levels", "resistance_levels"]
            }
        }
    }
],

            tool_choice="auto",
            response_format={"type": "json_object"},
            temperature=0.75,
            max_tokens=16384
        )
        return response.choices[0].message.content
    
    except Exception as e:
        print(f"⚠️ Gagal menganalisis gambar dengan GPT-4o: {e}")
        return None


# Loop untuk mengambil screenshot setiap 15 menit
try:
    while True:
        screenshot_path = capture_screenshot()
        if screenshot_path:
            image_base64 = image_to_base64(screenshot_path)
            analysis_result = analyze_chart_with_gpt4o(image_base64)
            print("📊 Analisis GPT-4o:", analysis_result)

        delay = random.randint(180, 200)
        print(f"⏳ Menunggu {delay} detik sebelum screenshot berikutnya...")
        time.sleep(delay)

except KeyboardInterrupt:
    print("\n🛑 Dihentikan oleh pengguna.")

✅ Chrome berhasil dibuka. Mengecek apakah login diperlukan...
🔄 Belum login, mencoba proses login...
✅ Tombol login diklik. Menunggu halaman login terbuka...
✅ Tombol Email berhasil diklik!
✅ Berhasil input email: mkemalw@gmail.com
✅ Berhasil input password.
✅ Tombol Sign in diklik.
🎉 Login sukses! Kembali ke halaman chart XAU/USD.
✅ Screenshot saved: screenshots/tradingview_XAUUSD_15M_20250421_121723.png
📊 Analisis GPT-4o: None
⏳ Menunggu 181 detik sebelum screenshot berikutnya...
✅ Screenshot saved: screenshots/tradingview_XAUUSD_15M_20250421_122032.png
📊 Analisis GPT-4o: None
⏳ Menunggu 184 detik sebelum screenshot berikutnya...

🛑 Dihentikan oleh pengguna.
