# INIT

In [3]:
import pandas as pd
import yfinance as yf

In [4]:
!pip install yfinance

Defaulting to user installation because normal site-packages is not writeable


In [5]:
import pandas as pd
import yfinance as yf

In [6]:
# Ambil data BBRI per menit selama 7 hari terakhir
bbri = yf.download("BBRI.JK", interval="1m", period="1d")

# Ubah index (Datetime) ke zona waktu Jakarta (WIB)
bbri.index = bbri.index.tz_convert("Asia/Jakarta")

# Hapus data dengan Volume = 0 (kemungkinan di luar jam trading)
bbri = bbri[bbri['Volume'] > 0]

# Tampilkan 5 baris pertama untuk melihat datanya
print(bbri.head())

YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed

1 Failed download:
['BBRI.JK']: YFRateLimitError('Too Many Requests. Rate limited. Try after a while.')


Empty DataFrame
Columns: [(Adj Close, BBRI.JK), (Close, BBRI.JK), (High, BBRI.JK), (Low, BBRI.JK), (Open, BBRI.JK), (Volume, BBRI.JK)]
Index: []


In [7]:
import pandas as pd
import yfinance as yf

# Ambil data 2 hari terakhir
bbri = yf.download("BBRI.JK", interval="1m", period="8d")

# Ubah timezone ke Jakarta (WIB)
bbri.index = bbri.index.tz_convert("Asia/Jakarta")

# Ambil tanggal unik
bbri['Date'] = bbri.index.date
unique_dates = bbri['Date'].unique()

# Simpan hasil dalam list
data_rows = []

for date in unique_dates:
    # Filter data hanya untuk tanggal ini
    daily_data = bbri[bbri['Date'] == date]
    
    # Ambil data menit pertama (09:00) dan kedua (09:01)
    first_minute = daily_data.between_time("09:00:00", "09:00:59")
    second_minute = daily_data.between_time("09:01:00", "09:01:59")
    
    # Ambil harga penutupan harian
    close_data = daily_data.between_time("15:59:00", "16:00:00")
    close_price = close_data["Close"].values[0] if not close_data.empty else None

    # Pastikan tidak ada data yang kosong
    if not first_minute.empty and not second_minute.empty:
        data_rows.append([
            "BBRI", date,
            first_minute["Open"].values[0], first_minute["High"].values[0], 
            first_minute["Low"].values[0], first_minute["Close"].values[0],
            second_minute["Open"].values[0], second_minute["High"].values[0], 
            second_minute["Low"].values[0], second_minute["Close"].values[0],
            close_price
        ])

# Buat DataFrame hasil
columns = ["Ticker", "Tanggal", "O1", "H1", "L1", "C1", "O2", "H2", "L2", "C2", "Close Price"]
result_df = pd.DataFrame(data_rows, columns=columns)

# Tampilkan hasil
print(result_df)

[*********************100%***********************]  1 of 1 completed

1 Failed download:
['BBRI.JK']: YFRateLimitError('Too Many Requests. Rate limited. Try after a while.')


Empty DataFrame
Columns: [Ticker, Tanggal, O1, H1, L1, C1, O2, H2, L2, C2, Close Price]
Index: []


In [8]:
import pandas as pd
import yfinance as yf
from datetime import datetime, timedelta

# Daftar ticker
tickers = ["BBRI.JK"]

# Loop untuk mengambil 80 hari data (10 batch x 8 hari)
total_days = 80
batch_size = 8
loops = total_days // batch_size

data_rows = []

for i in range(loops):
    # Hitung tanggal mulai (mundur dari hari ini)
    end_date = datetime.today() - timedelta(days=i * batch_size)
    start_date = end_date - timedelta(days=batch_size)

    print(f"Fetching data from {start_date.date()} to {end_date.date()}...")

    for ticker in tickers:
        try:
            # Ambil data dengan start & end (Yahoo hanya mendukung interval > 1m)
            data = yf.download(ticker, interval="5m", start=start_date.strftime('%Y-%m-%d'), 
                               end=end_date.strftime('%Y-%m-%d'), timeout=20)

            # Ubah timezone ke Jakarta (WIB)
            data.index = data.index.tz_convert("Asia/Jakarta")

            # Resample ke 1 menit
            data = data.resample('1min').ffill()

            # Ambil tanggal unik
            data['Date'] = data.index.date
            unique_dates = data['Date'].unique()

            for date in unique_dates:
                # Filter data hanya untuk tanggal ini
                daily_data = data[data['Date'] == date]

                # Ambil data menit pertama (09:00) dan kedua (09:01)
                first_minute = daily_data.between_time("09:00:00", "09:00:59")
                second_minute = daily_data.between_time("09:01:00", "09:01:59")

                # Ambil harga penutupan harian (16:00)
                close_data = daily_data.between_time("15:59:00", "16:00:00")
                close_price = close_data["Close"].iloc[0] if not close_data.empty else None

                # Pastikan tidak ada data yang kosong
                if not first_minute.empty and not second_minute.empty:
                    data_rows.append([
                        ticker.replace(".JK", ""), date,
                        first_minute["Open"].iloc[0].item(), first_minute["High"].iloc[0].item(), 
                        first_minute["Low"].iloc[0].item(), first_minute["Close"].iloc[0].item(),
                        second_minute["Open"].iloc[0].item(), second_minute["High"].iloc[0].item(), 
                        second_minute["Low"].iloc[0].item(), second_minute["Close"].iloc[0].item(),
                        close_price.item() if close_price is not None else None
                    ])
        except Exception as e:
            print(f"‚ö†Ô∏è Gagal mengambil data {ticker}: {e}")

# Buat DataFrame hasil
columns = ["Ticker", "Tanggal", "O1", "H1", "L1", "C1", "O2", "H2", "L2", "C2", "Close Price"]
result_df = pd.DataFrame(data_rows, columns=columns)

# Simpan ke CSV
result_df.to_csv("stock_data_80days.csv", index=False)

print("‚úÖ Data berhasil disimpan ke stock_data_80days.csv")

Fetching data from 2025-08-03 to 2025-08-11...


[*********************100%***********************]  1 of 1 completed

1 Failed download:
['BBRI.JK']: YFRateLimitError('Too Many Requests. Rate limited. Try after a while.')


‚ö†Ô∏è Gagal mengambil data BBRI.JK: 'MultiIndex' object has no attribute 'date'
Fetching data from 2025-07-26 to 2025-08-03...


[*********************100%***********************]  1 of 1 completed

1 Failed download:
['BBRI.JK']: YFRateLimitError('Too Many Requests. Rate limited. Try after a while.')


‚ö†Ô∏è Gagal mengambil data BBRI.JK: 'MultiIndex' object has no attribute 'date'
Fetching data from 2025-07-18 to 2025-07-26...


[*********************100%***********************]  1 of 1 completed

1 Failed download:
['BBRI.JK']: YFRateLimitError('Too Many Requests. Rate limited. Try after a while.')


‚ö†Ô∏è Gagal mengambil data BBRI.JK: 'MultiIndex' object has no attribute 'date'
Fetching data from 2025-07-10 to 2025-07-18...


[*********************100%***********************]  1 of 1 completed

1 Failed download:
['BBRI.JK']: YFRateLimitError('Too Many Requests. Rate limited. Try after a while.')


‚ö†Ô∏è Gagal mengambil data BBRI.JK: 'MultiIndex' object has no attribute 'date'
Fetching data from 2025-07-02 to 2025-07-10...


[*********************100%***********************]  1 of 1 completed

1 Failed download:
['BBRI.JK']: YFRateLimitError('Too Many Requests. Rate limited. Try after a while.')


‚ö†Ô∏è Gagal mengambil data BBRI.JK: 'MultiIndex' object has no attribute 'date'
Fetching data from 2025-06-24 to 2025-07-02...


[*********************100%***********************]  1 of 1 completed

1 Failed download:
['BBRI.JK']: YFRateLimitError('Too Many Requests. Rate limited. Try after a while.')


‚ö†Ô∏è Gagal mengambil data BBRI.JK: 'MultiIndex' object has no attribute 'date'
Fetching data from 2025-06-16 to 2025-06-24...


[*********************100%***********************]  1 of 1 completed

1 Failed download:
['BBRI.JK']: YFRateLimitError('Too Many Requests. Rate limited. Try after a while.')


‚ö†Ô∏è Gagal mengambil data BBRI.JK: 'MultiIndex' object has no attribute 'date'
Fetching data from 2025-06-08 to 2025-06-16...


[*********************100%***********************]  1 of 1 completed

1 Failed download:
['BBRI.JK']: YFRateLimitError('Too Many Requests. Rate limited. Try after a while.')


‚ö†Ô∏è Gagal mengambil data BBRI.JK: 'MultiIndex' object has no attribute 'date'
Fetching data from 2025-05-31 to 2025-06-08...


[*********************100%***********************]  1 of 1 completed

1 Failed download:
['BBRI.JK']: YFRateLimitError('Too Many Requests. Rate limited. Try after a while.')


‚ö†Ô∏è Gagal mengambil data BBRI.JK: 'MultiIndex' object has no attribute 'date'
Fetching data from 2025-05-23 to 2025-05-31...


[*********************100%***********************]  1 of 1 completed

1 Failed download:
['BBRI.JK']: YFRateLimitError('Too Many Requests. Rate limited. Try after a while.')


‚ö†Ô∏è Gagal mengambil data BBRI.JK: 'MultiIndex' object has no attribute 'date'
‚úÖ Data berhasil disimpan ke stock_data_80days.csv


In [9]:
import pandas as pd
import yfinance as yf
from IPython.display import display

# Daftar ticker yang ingin diambil
tickers = [
    # Saham LQ45 awal:
    "BBRI.JK", "BMRI.JK", "BBCA.JK", "TLKM.JK", "BBNI.JK", "BRIS.JK", "BRPT.JK", 
    "PTRO.JK", "ADRO.JK", "ASII.JK", "ANTM.JK", "BBTN.JK", "CPIN.JK", "ERAA.JK", 
    "GGRM.JK", "HMSP.JK", "ICBP.JK", "INCO.JK", "INDF.JK", "INKP.JK", "INTP.JK", 
    "ITMG.JK", "JPFA.JK", "JSMR.JK", "KLBF.JK", "MDKA.JK", "MEDC.JK", "MIKA.JK", 
    "MNCN.JK", "PGAS.JK", "PTBA.JK", "SCMA.JK", "SMGR.JK", "SMRA.JK", "TBIG.JK", 
    "TINS.JK", "TKIM.JK", "TOWR.JK", "UNTR.JK", "UNVR.JK", "WSKT.JK", "EXCL.JK", "AMRT.JK",
    
    # Grup Prajogo Pangestu
    "TPIA.JK", "STAR.JK", "BREN.JK", "CUAN.JK",
    
    # Grup Salim
    "IMAS.JK", "ROTI.JK", "DPNS.JK",
    
    # Grup Hartono (Djarum)
    "BELI.JK",
    
    # Grup Chairul Tanjung
    "MEGA.JK",
    
    # Grup Garibaldi (Boy) Thohir
    "GOTO.JK",
    
    # Grup Mochtar Riady (Lippo)
    "LPKR.JK", "SILO.JK", "LPPF.JK", "MLPL.JK",
    
    # Grup Hary Tanoesoedibjo (MNC)
    "MSIN.JK", "BCAP.JK", "BABP.JK",
    
    # Grup Eddy Kusnadi Sariaatmadja (Emtek)
    "EMTK.JK", "BUKA.JK",
    
    # Grup Peter Sondakh (Rajawali)
    "ARCI.JK", "BWPT.JK", "META.JK",
    
    # Grup Alexander Tedja (Pakuwon)
    "PWON.JK",
    
    # Grup Eka Tjipta Widjaja (Sinarmas)
    "SMAR.JK", "DSSA.JK", "BSDE.JK",
    
    # Grup Sugianto Kusuma (Aguan - Agung Sedayu)
    "PANI.JK",
    
    # Grup Saratoga (Edwin Soeryadjaya & Sandiaga Uno)
    "SRTG.JK", "MPMX.JK", "PALM.JK", "AGII.JK", "PRAY.JK"
]

# Simpan semua hasil dalam satu list
data_rows = []
failed_tickers = []

def download_data_for_ticker(ticker):
    try:
        # Ambil data 8 hari terakhir untuk setiap ticker
        data = yf.download(ticker, interval="1m", period="8d", timeout=20)
        # Ubah timezone ke Jakarta (WIB)
        data.index = data.index.tz_convert("Asia/Jakarta")

        # Ambil tanggal unik
        data['Date'] = data.index.date
        unique_dates = data['Date'].unique()

        for date in unique_dates:
            # Filter data hanya untuk tanggal ini
            daily_data = data[data['Date'] == date]

            # Ambil data menit pertama (09:00) dan kedua (09:01)
            first_minute = daily_data.between_time("09:00:00", "09:00:59")
            second_minute = daily_data.between_time("09:01:00", "09:01:59")

            # Ambil harga penutupan harian (16:00)
            close_data = daily_data.between_time("15:59:00", "16:00:00")
            close_price = close_data["Close"].iloc[0] if not close_data.empty else None

            # Pastikan tidak ada data yang kosong
            if not first_minute.empty and not second_minute.empty:
                data_rows.append([
                    ticker.replace(".JK", ""), date,
                    first_minute["Open"].iloc[0].item(), first_minute["High"].iloc[0].item(), 
                    first_minute["Low"].iloc[0].item(), first_minute["Close"].iloc[0].item(),
                    second_minute["Open"].iloc[0].item(), second_minute["High"].iloc[0].item(), 
                    second_minute["Low"].iloc[0].item(), second_minute["Close"].iloc[0].item(),
                    close_price.item() if close_price is not None else None
                ])
    except Exception as e:
        print(f"‚ö†Ô∏è Gagal mengambil data {ticker}: {e}")
        failed_tickers.append(ticker)

# Lakukan pengambilan data untuk semua ticker
for ticker in tickers:
    download_data_for_ticker(ticker)

# Coba lagi untuk ticker yang gagal
if failed_tickers:
    print("üîÑ Mencoba ulang untuk ticker yang gagal...")
    for ticker in failed_tickers:
        download_data_for_ticker(ticker)

# Buat DataFrame hasil
columns = ["Ticker", "Tanggal", "O1", "H1", "L1", "C1", "O2", "H2", "L2", "C2", "Close Price"]
result_df = pd.DataFrame(data_rows, columns=columns)

# Setup tampilan Pandas
pd.set_option('display.max_columns', None)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('display.max_rows', 10)

# Tampilkan hasil menggunakan IPython display (untuk jupyter notebook) atau head/tail
display(result_df)  # Gunakan ini jika Anda mengoperasikan pada Jupyter Notebook

[*********************100%***********************]  1 of 1 completed

1 Failed download:
['BBRI.JK']: YFRateLimitError('Too Many Requests. Rate limited. Try after a while.')
[*********************100%***********************]  1 of 1 completed

1 Failed download:
['BMRI.JK']: YFRateLimitError('Too Many Requests. Rate limited. Try after a while.')
[*********************100%***********************]  1 of 1 completed

1 Failed download:
['BBCA.JK']: YFRateLimitError('Too Many Requests. Rate limited. Try after a while.')
[*********************100%***********************]  1 of 1 completed

1 Failed download:
['TLKM.JK']: YFRateLimitError('Too Many Requests. Rate limited. Try after a while.')
[*********************100%***********************]  1 of 1 completed

1 Failed download:
['BBNI.JK']: YFRateLimitError('Too Many Requests. Rate limited. Try after a while.')
[*********************100%***********************]  1 of 1 completed

1 Failed download:
['BRIS.JK']: YFRateLimitError('Too Many R

Unnamed: 0,Ticker,Tanggal,O1,H1,L1,C1,O2,H2,L2,C2,Close Price


In [10]:
pip install --upgrade yfinance pandas

Defaulting to user installation because normal site-packages is not writeable
Collecting yfinance
  Downloading yfinance-0.2.65-py2.py3-none-any.whl.metadata (5.8 kB)
Collecting pandas
  Downloading pandas-2.3.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (91 kB)
Collecting curl_cffi>=0.7 (from yfinance)
  Downloading curl_cffi-0.13.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Collecting protobuf>=3.19.0 (from yfinance)
  Downloading protobuf-6.31.1-cp39-abi3-manylinux2014_x86_64.whl.metadata (593 bytes)
Collecting certifi>=2024.2.2 (from curl_cffi>=0.7->yfinance)
  Downloading certifi-2025.8.3-py3-none-any.whl.metadata (2.4 kB)
Downloading yfinance-0.2.65-py2.py3-none-any.whl (119 kB)
Downloading pandas-2.3.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.0 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m12.0/12.0 MB[

In [11]:
import numpy as np
import random
from datetime import timedelta, date

def generate_random_dates(start_date, num_dates):
    dates = set()
    while len(dates) < num_dates:
        random_days = random.randint(1, 365)
        random_date = start_date + timedelta(days=random_days)
        dates.add(random_date)
    return list(dates)

def simulate_monte_carlo(ticker, base_data, num_days=30):
    # Start with existing data
    if base_data.empty:
        raise ValueError(f"No base data available for ticker {ticker}")

    # Get some statistics from base data for simulation
    mean_open = base_data['O1'].mean()
    std_dev_open = base_data['O1'].std()

    mean_close = base_data['Close Price'].mean()
    std_dev_close = base_data['Close Price'].std()

    # Simulate 30 days of data
    simulated_data = []
    random_dates = generate_random_dates(date.today(), num_days)

    for random_date in random_dates:
        open_price = np.random.normal(mean_open, std_dev_open)
        close_price = np.random.normal(mean_close, std_dev_close)

        # For simplicity, assume H=L=O=C for simulation, you can improve it further
        simulated_data.append([
            ticker.replace(".JK", ""), random_date,
            open_price, open_price, open_price, open_price,  # O1, H1, L1, C1
            open_price, open_price, open_price, open_price,  # O2, H2, L2, C2
            close_price                                     # Close Price
        ])
    
    return simulated_data

# Example to add simulated data for BBRI
some_ticker = "BBRI.JK"  # Change this to another ticker if needed
base_data = result_df[result_df['Ticker'] == some_ticker.replace('.JK', '')]

# Simulate data and add to results
simulated_rows = simulate_monte_carlo(some_ticker, base_data)
simulated_df = pd.DataFrame(simulated_rows, columns=columns)
result_df = pd.concat([result_df, simulated_df], ignore_index=True)

# Display combined data
print(result_df)

ValueError: No base data available for ticker BBRI.JK

In [None]:
import pandas as pd
import yfinance as yf
from IPython.display import display

# Daftar ticker yang ingin diambil
tickers = [
    # Saham LQ45 awal:
    "BBRI.JK", "BMRI.JK", "BBCA.JK", "TLKM.JK", "BBNI.JK", "BRIS.JK", "BRPT.JK", 
    "PTRO.JK", "ADRO.JK", "ASII.JK", "ANTM.JK", "BBTN.JK", "CPIN.JK", "ERAA.JK", 
    "GGRM.JK", "HMSP.JK", "ICBP.JK", "INCO.JK", "INDF.JK", "INKP.JK", "INTP.JK", 
    "ITMG.JK", "JPFA.JK", "JSMR.JK", "KLBF.JK", "MDKA.JK", "MEDC.JK", "MIKA.JK", 
    "MNCN.JK", "PGAS.JK", "PTBA.JK", "SCMA.JK", "SMGR.JK", "SMRA.JK", "TBIG.JK", 
    "TINS.JK", "TKIM.JK", "TOWR.JK", "UNTR.JK", "UNVR.JK", "WSKT.JK", "EXCL.JK", "AMRT.JK",
    
    # Grup Prajogo Pangestu
    "TPIA.JK", "STAR.JK", "BREN.JK", "CUAN.JK",
    
    # Grup Salim
    "IMAS.JK", "ROTI.JK", "DPNS.JK",
    
    # Grup Hartono (Djarum)
    "BELI.JK",
    
    # Grup Chairul Tanjung
    "MEGA.JK",
    
    # Grup Garibaldi (Boy) Thohir
    "GOTO.JK",
    
    # Grup Mochtar Riady (Lippo)
    "LPKR.JK", "SILO.JK", "LPPF.JK", "MLPL.JK",
    
    # Grup Hary Tanoesoedibjo (MNC)
    "MSIN.JK", "BCAP.JK", "BABP.JK",
    
    # Grup Eddy Kusnadi Sariaatmadja (Emtek)
    "EMTK.JK", "BUKA.JK",
    
    # Grup Peter Sondakh (Rajawali)
    "ARCI.JK", "BWPT.JK", "META.JK",
    
    # Grup Alexander Tedja (Pakuwon)
    "PWON.JK",
    
    # Grup Eka Tjipta Widjaja (Sinarmas)
    "SMAR.JK", "DSSA.JK", "BSDE.JK",
    
    # Grup Sugianto Kusuma (Aguan - Agung Sedayu)
    "PANI.JK",
    
    # Grup Saratoga (Edwin Soeryadjaya & Sandiaga Uno)
    "SRTG.JK", "MPMX.JK", "PALM.JK", "AGII.JK", "PRAY.JK"
]

# Simpan semua hasil dalam satu list
data_rows = []
failed_tickers = []

def download_data_for_ticker(ticker):
    try:
        # Ambil data 8 hari terakhir untuk setiap ticker
        data = yf.download(ticker, interval="1m", period="8d", timeout=20)
        # Ubah timezone ke Jakarta (WIB)
        data.index = data.index.tz_convert("Asia/Jakarta")

        # Ambil tanggal unik
        data['Date'] = data.index.date
        unique_dates = data['Date'].unique()

        for date in unique_dates:
            # Filter data hanya untuk tanggal ini
            daily_data = data[data['Date'] == date]

            # Ambil data menit pertama (09:00) dan kedua (09:01)
            first_minute = daily_data.between_time("09:00:00", "09:00:59")
            second_minute = daily_data.between_time("09:01:00", "09:01:59")

            # Ambil harga penutupan harian (16:00)
            close_data = daily_data.between_time("15:59:00", "16:00:00")
            close_price = close_data["Close"].iloc[0] if not close_data.empty else None

            # Pastikan tidak ada data yang kosong
            if not first_minute.empty and not second_minute.empty:
                data_rows.append([
                    ticker.replace(".JK", ""), date,
                    first_minute["Open"].iloc[0].item(), first_minute["High"].iloc[0].item(), 
                    first_minute["Low"].iloc[0].item(), first_minute["Close"].iloc[0].item(),
                    second_minute["Open"].iloc[0].item(), second_minute["High"].iloc[0].item(), 
                    second_minute["Low"].iloc[0].item(), second_minute["Close"].iloc[0].item(),
                    close_price.item() if close_price is not None else None
                ])
    except Exception as e:
        print(f"‚ö†Ô∏è Gagal mengambil data {ticker}: {e}")
        failed_tickers.append(ticker)

# Lakukan pengambilan data untuk semua ticker
for ticker in tickers:
    download_data_for_ticker(ticker)

# Coba lagi untuk ticker yang gagal
if failed_tickers:
    print("üîÑ Mencoba ulang untuk ticker yang gagal...")
    for ticker in failed_tickers:
        download_data_for_ticker(ticker)

# Buat DataFrame hasil
columns = ["Ticker", "Tanggal", "O1", "H1", "L1", "C1", "O2", "H2", "L2", "C2", "Close Price"]
result_df = pd.DataFrame(data_rows, columns=columns)

# Setup tampilan Pandas
pd.set_option('display.max_columns', None)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('display.max_rows', 10)

# Tampilkan hasil menggunakan IPython display (untuk jupyter notebook) atau head/tail
display(result_df)  # Gunakan ini jika Anda mengoperasikan pada Jupyter Notebook

YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

Unnamed: 0,Ticker,Tanggal,O1,H1,L1,C1,O2,H2,L2,C2,Close Price
0,BBRI,2025-02-25,3890.0,3910.0,3880.0,3910.0,3910.0,3910.0,3900.0,3910.0,3800.0
1,BBRI,2025-02-26,3810.0,3840.0,3800.0,3830.0,3830.0,3840.0,3820.0,3830.0,3820.0
2,BBRI,2025-02-27,3790.0,3800.0,3790.0,3790.0,3790.0,3800.0,3780.0,3790.0,3630.0
3,BBRI,2025-02-28,3500.0,3500.0,3420.0,3420.0,3420.0,3450.0,3410.0,3440.0,3360.0
4,BBRI,2025-03-03,3520.0,3540.0,3510.0,3520.0,3520.0,3520.0,3470.0,3490.0,3670.0
...,...,...,...,...,...,...,...,...,...,...,...
416,MPMX,2025-02-27,950.0,950.0,950.0,950.0,950.0,950.0,950.0,950.0,945.0
417,MPMX,2025-03-03,935.0,935.0,935.0,935.0,935.0,935.0,930.0,935.0,940.0
418,MPMX,2025-03-04,940.0,945.0,940.0,945.0,945.0,945.0,945.0,945.0,920.0
419,MPMX,2025-03-05,925.0,925.0,925.0,925.0,930.0,930.0,930.0,930.0,930.0


In [None]:
import pandas as pd
import yfinance as yf
from IPython.display import display

# Daftar ticker yang ingin diambil
tickers = [
    # Saham LQ45 awal:
    "BBRI.JK", "BMRI.JK", "BBCA.JK", "TLKM.JK", "BBNI.JK", "BRIS.JK", "BRPT.JK", 
    "PTRO.JK", "ADRO.JK", "ASII.JK", "ANTM.JK", "BBTN.JK", "CPIN.JK", "ERAA.JK", 
    "GGRM.JK", "HMSP.JK", "ICBP.JK", "INCO.JK", "INDF.JK", "INKP.JK", "INTP.JK", 
    "ITMG.JK", "JPFA.JK", "JSMR.JK", "KLBF.JK", "MDKA.JK", "MEDC.JK", "MIKA.JK", 
    "MNCN.JK", "PGAS.JK", "PTBA.JK", "SCMA.JK", "SMGR.JK", "SMRA.JK", "TBIG.JK", 
    "TINS.JK", "TKIM.JK", "TOWR.JK", "UNTR.JK", "UNVR.JK", "WSKT.JK", "EXCL.JK", "AMRT.JK",
    
    # Grup Prajogo Pangestu
    "TPIA.JK", "STAR.JK", "BREN.JK", "CUAN.JK",
    
    # Grup Salim
    "IMAS.JK", "ROTI.JK", "DPNS.JK",
    
    # Grup Hartono (Djarum)
    "BELI.JK",
    
    # Grup Chairul Tanjung
    "MEGA.JK",
    
    # Grup Garibaldi (Boy) Thohir
    "GOTO.JK",
    
    # Grup Mochtar Riady (Lippo)
    "LPKR.JK", "SILO.JK", "LPPF.JK", "MLPL.JK",
    
    # Grup Hary Tanoesoedibjo (MNC)
    "MSIN.JK", "BCAP.JK", "BABP.JK",
    
    # Grup Eddy Kusnadi Sariaatmadja (Emtek)
    "EMTK.JK", "BUKA.JK",
    
    # Grup Peter Sondakh (Rajawali)
    "ARCI.JK", "BWPT.JK", "META.JK",
    
    # Grup Alexander Tedja (Pakuwon)
    "PWON.JK",
    
    # Grup Eka Tjipta Widjaja (Sinarmas)
    "SMAR.JK", "DSSA.JK", "BSDE.JK",
    
    # Grup Sugianto Kusuma (Aguan - Agung Sedayu)
    "PANI.JK",
    
    # Grup Saratoga (Edwin Soeryadjaya & Sandiaga Uno)
    "SRTG.JK", "MPMX.JK", "PALM.JK", "AGII.JK", "PRAY.JK"
]

# Simpan semua hasil dalam satu list (untuk warm-up stage)
data_rows = []
failed_tickers = []

def warm_up_data():
    for ticker in tickers:
        try:
            # Ambil data 8 hari terakhir untuk setiap ticker
            data = yf.download(ticker, interval="1m", period="8d", timeout=20)
            # Ubah timezone ke Jakarta (WIB)
            data.index = data.index.tz_convert("Asia/Jakarta")

            # Ambil tanggal unik
            data['Date'] = data.index.date
            unique_dates = data['Date'].unique()

            for date in unique_dates:
                # Filter data hanya untuk tanggal ini
                daily_data = data[data['Date'] == date]

                # Ambil data menit pertama (09:00) dan kedua (09:01)
                first_minute = daily_data.between_time("09:00:00", "09:00:59")
                second_minute = daily_data.between_time("09:01:00", "09:01:59")

                # Ambil harga penutupan harian (16:00)
                close_data = daily_data.between_time("15:59:00", "16:00:00")
                close_price = close_data["Close"].iloc[0] if not close_data.empty else None

                # Pastikan tidak ada data yang kosong
                if not first_minute.empty and not second_minute.empty:
                    data_rows.append([
                        ticker.replace(".JK", ""), date,
                        first_minute["Open"].iloc[0].item(), first_minute["High"].iloc[0].item(), 
                        first_minute["Low"].iloc[0].item(), first_minute["Close"].iloc[0].item(),
                        second_minute["Open"].iloc[0].item(), second_minute["High"].iloc[0].item(), 
                        second_minute["Low"].iloc[0].item(), second_minute["Close"].iloc[0].item(),
                        close_price.item() if close_price is not None else None
                    ])
        except Exception as e:
            print(f"‚ö†Ô∏è Gagal mengambil data {ticker}: {e}")
            failed_tickers.append(ticker)

    # Coba lagi untuk ticker yang gagal
    if failed_tickers:
        print("üîÑ Mencoba ulang untuk ticker yang gagal...")
        for ticker in failed_tickers:
            download_data_for_ticker(ticker)

# Lakukan warming-up dengan mengunduh data
warm_up_data()

# Buat DataFrame hasil
columns = ["Ticker", "Tanggal", "O1", "H1", "L1", "C1", "O2", "H2", "L2", "C2", "Close Price"]
result_df = pd.DataFrame(data_rows, columns=columns)

# Tampilkan hasil data sebelum aplikasi logika
print("Data setelah warm-up:")
display(result_df)

# Logic Validation: Tambahkan Kolom is_2min_valid
if not result_df.empty:
    result_df['is_2min_valid'] = result_df.apply(
        lambda row: 1 if row['C1'] > row['O1'] and row['Close Price'] >= row['C2'] else 0, axis=1
    )

    # Setup tampilan Pandas
    pd.set_option('display.max_columns', None)
    pd.set_option('display.expand_frame_repr', False)
    pd.set_option('display.max_rows', 10)

    # Tampilkan hasil setelah logic validasi
    print("Data setelah logic validasi:")
    display(result_df)
else:
    print("Tidak ada data yang berhasil diproses.")

[*********************100%***********************]  1 of 1 completed

1 Failed download:
['BBRI.JK']: ReadTimeout(ReadTimeoutError("HTTPSConnectionPool(host='query2.finance.yahoo.com', port=443): Read timed out. (read timeout=20)"))
[*********************100%***********************]  1 of 1 completed

1 Failed download:
['BMRI.JK']: ConnectionError(MaxRetryError("HTTPSConnectionPool(host='query2.finance.yahoo.com', port=443): Max retries exceeded with url: /v8/finance/chart/BMRI.JK?range=8d&interval=1m&includePrePost=False&events=div%2Csplits%2CcapitalGains&crumb=UY5AFC0vL2Z (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7eff8cc7cfe0>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution'))"))
[*********************100%***********************]  1 of 1 completed

1 Failed download:
['BBCA.JK']: ConnectionError(MaxRetryError("HTTPSConnectionPool(host='query2.finance.yahoo.com', port=443): Max retries exceeded with url: /v8/

Data setelah warm-up:





Unnamed: 0,Ticker,Tanggal,O1,H1,L1,C1,O2,H2,L2,C2,Close Price
0,BRIS,2025-02-25,3070.0,3080.0,3070.0,3070.0,3080.0,3080.0,3070.0,3080.0,2990.0
1,BRIS,2025-02-26,3000.0,3010.0,2990.0,3000.0,3010.0,3010.0,2990.0,3000.0,2870.0
2,BRIS,2025-02-27,2880.0,2900.0,2870.0,2870.0,2870.0,2870.0,2840.0,2850.0,2630.0
3,BRIS,2025-02-28,2620.0,2630.0,2550.0,2570.0,2570.0,2580.0,2560.0,2570.0,2500.0
4,BRIS,2025-03-03,2570.0,2640.0,2570.0,2630.0,2630.0,2630.0,2590.0,2600.0,2660.0
...,...,...,...,...,...,...,...,...,...,...,...
376,MPMX,2025-02-27,950.0,950.0,950.0,950.0,950.0,950.0,950.0,950.0,945.0
377,MPMX,2025-03-03,935.0,935.0,935.0,935.0,935.0,935.0,930.0,935.0,940.0
378,MPMX,2025-03-04,940.0,945.0,940.0,945.0,945.0,945.0,945.0,945.0,920.0
379,MPMX,2025-03-05,925.0,925.0,925.0,925.0,930.0,930.0,930.0,930.0,930.0


Data setelah logic validasi:


Unnamed: 0,Ticker,Tanggal,O1,H1,L1,C1,O2,H2,L2,C2,Close Price,is_2min_valid
0,BRIS,2025-02-25,3070.0,3080.0,3070.0,3070.0,3080.0,3080.0,3070.0,3080.0,2990.0,0
1,BRIS,2025-02-26,3000.0,3010.0,2990.0,3000.0,3010.0,3010.0,2990.0,3000.0,2870.0,0
2,BRIS,2025-02-27,2880.0,2900.0,2870.0,2870.0,2870.0,2870.0,2840.0,2850.0,2630.0,0
3,BRIS,2025-02-28,2620.0,2630.0,2550.0,2570.0,2570.0,2580.0,2560.0,2570.0,2500.0,0
4,BRIS,2025-03-03,2570.0,2640.0,2570.0,2630.0,2630.0,2630.0,2590.0,2600.0,2660.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...
376,MPMX,2025-02-27,950.0,950.0,950.0,950.0,950.0,950.0,950.0,950.0,945.0,0
377,MPMX,2025-03-03,935.0,935.0,935.0,935.0,935.0,935.0,930.0,935.0,940.0,0
378,MPMX,2025-03-04,940.0,945.0,940.0,945.0,945.0,945.0,945.0,945.0,920.0,0
379,MPMX,2025-03-05,925.0,925.0,925.0,925.0,930.0,930.0,930.0,930.0,930.0,0


In [None]:
import pandas as pd
import yfinance as yf
from IPython.display import display

# Daftar ticker yang ingin diambil
tickers = [
    "BBRI.JK", "BMRI.JK", "BBCA.JK", "TLKM.JK", "BBNI.JK"
]

# Ticker untuk indeks Hang Seng dan Nikkei
index_tickers = {
    'hang_seng': "^HSI",
    'nikkei': "^N225"
}

# Simpan semua hasil dalam satu list (untuk warm-up stage)
data_rows = []
failed_tickers = []

def download_index_data(ticker, start, end):
    try:
        # Ambil data trading pada hari yang bersangkutan
        data = yf.download(ticker, start=start, end=end, interval="1d", timeout=20)  # Daily OHLC data
        if not data.empty:
            first_ohlc = data.iloc[0][['Open', 'High', 'Low', 'Close']]
            return first_ohlc.tolist()
        else:
            return [None, None, None, None]
    except Exception as e:
        print(f"‚ö†Ô∏è Gagal mengambil data {ticker}: {e}")
        return [None, None, None, None]

def warm_up_data():
    for ticker in tickers:
        try:
            # Ambil data 8 hari terakhir untuk setiap ticker
            data = yf.download(ticker, interval="1m", period="8d", timeout=20)
            # Ubah timezone ke Jakarta (WIB)
            data.index = data.index.tz_convert("Asia/Jakarta")

            # Ambil tanggal unik
            data['Date'] = data.index.date
            unique_dates = data['Date'].unique()

            for date in unique_dates:
                # Filter data hanya untuk tanggal ini
                daily_data = data[data['Date'] == date]

                # Load index values for Hang Seng and Nikkei
                hs_ohlc = download_index_data(index_tickers['hang_seng'], date, date + pd.Timedelta(days=1))
                nikkei_ohlc = download_index_data(index_tickers['nikkei'], date, date + pd.Timedelta(days=1))

                # Ambil data menit pertama (09:00) dan kedua (09:01)
                first_minute = daily_data.between_time("09:00:00", "09:00:59")
                second_minute = daily_data.between_time("09:01:00", "09:01:59")

                # Ambil harga penutupan harian (16:00)
                close_data = daily_data.between_time("15:59:00", "16:00:00")
                close_price = close_data["Close"].iloc[0] if not close_data.empty else None

                if not first_minute.empty and not second_minute.empty:
                    hs_open, hs_high, hs_low, hs_close = hs_ohlc
                    nikkei_open, nikkei_high, nikkei_low, nikkei_close = nikkei_ohlc

                    # Tentukan apakah Hang Seng dan Nikkei Bullish atau Bearish
                    hs_bull = 1 if (hs_close > hs_open) else 0
                    nikkei_bull = 1 if (nikkei_close > nikkei_open) else 0

                    data_rows.append([
                        date,
                        hs_open, hs_high, hs_low, hs_close,  # Hang Seng
                        nikkei_open, nikkei_high, nikkei_low, nikkei_close,  # Nikkei
                        ticker.replace(".JK", ""),
                        first_minute["Open"].iloc[0].item(), first_minute["High"].iloc[0].item(), 
                        first_minute["Low"].iloc[0].item(), first_minute["Close"].iloc[0].item(),
                        second_minute["Open"].iloc[0].item(), second_minute["High"].iloc[0].item(), 
                        second_minute["Low"].iloc[0].item(), second_minute["Close"].iloc[0].item(),
                        close_price.item() if close_price is not None else None,
                        hs_bull,  # Kolom untuk ngeset bullish atau bearish Hang Seng
                        nikkei_bull  # Kolom untuk ngeset bullish atau bearish Nikkei
                    ])

        except Exception as e:
            print(f"‚ö†Ô∏è Gagal mengambil data {ticker}: {e}")
            failed_tickers.append(ticker)

    if failed_tickers:
        print("üîÑ Mencoba ulang untuk ticker yang gagal...")
        for ticker in failed_tickers:
            download_index_data(ticker)

# Lakukan warm-up dengan mengunduh data
warm_up_data()

# Buat DataFrame hasil dengan kolom terstruktur
columns = [
    "TGL",
    "HS_O", "HS_H", "HS_L", "HS_C", 
    "Ni_O", "Ni_H", "Ni_L", "Ni_C",
    "SAHAM", 
    "O1", "H1", "L1", "C1", 
    "O2", "H2", "L2", "C2", 
    "CLOSE",
    "HS_Bull", "Ni_Bull"  # Kolom tambahan untuk bull/bear
]
result_df = pd.DataFrame(data_rows, columns=columns)

# Batasi semua nilai float ke 2 desimal
float_columns = [
    "HS_O", "HS_H", "HS_L", "HS_C", 
    "Ni_O", "Ni_H", "Ni_L", "Ni_C",
    "O1", "H1", "L1", "C1", 
    "O2", "H2", "L2", "C2", 
    "CLOSE"
]

result_df[float_columns] = result_df[float_columns].round(2)

# Tampilkan hasil data sebelum aplikasi logika
print("Data setelah warm-up:")
display(result_df)

# Logic Validation: Tambahkan Kolom is_2min_valid
if not result_df.empty:
    result_df['is_2min_valid'] = result_df.apply(
        lambda row: 1 if (row.get('C1', 0) > row.get('O1', 0) and 
                           row.get('C2', 0) >= row.get('C1', 0) and 
                           row.get('Close Price', 0) >= row.get('C2', 0)) else 0, 
        axis=1
    )

    # Setup tampilan Pandas agar hanya menampilkan 2 desimal
    pd.options.display.float_format = '{:.2f}'.format
    pd.set_option('display.max_columns', None)
    pd.set_option('display.expand_frame_repr', False)
    pd.set_option('display.max_rows', 10)

    # Tampilkan hasil setelah logic validasi
    print("Data setelah logic validasi:")
    display(result_df)
else:
    print("Tidak ada data yang berhasil diproses.")


[*********************100%***********************]  1 of 1 completed

1 Failed download:
['BBRI.JK']: ReadTimeout(ReadTimeoutError("HTTPSConnectionPool(host='query2.finance.yahoo.com', port=443): Read timed out. (read timeout=20)"))
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*******

Data setelah warm-up:


Unnamed: 0,TGL,HS_O,HS_H,HS_L,HS_C,Ni_O,Ni_H,Ni_L,Ni_C,SAHAM,O1,H1,L1,C1,O2,H2,L2,C2,CLOSE,HS_Bull,Ni_Bull
0,2025-02-26,23238.34,23973.17,23238.34,23787.93,38143.68,38175.24,37742.76,38142.37,BMRI,4860.00,4920.00,4850.00,4880.00,4900.00,4900.00,4870.00,4890.00,4920.00,1,0
1,2025-02-27,23783.75,24076.53,23441.74,23718.29,38251.11,38369.96,38061.94,38256.17,BMRI,4930.00,4950.00,4920.00,4930.00,4920.00,4920.00,4890.00,4890.00,4660.00,0,1
2,2025-02-28,23616.18,23616.18,22798.34,22941.32,37853.48,37925.28,36840.12,37155.50,BMRI,4620.00,4620.00,4580.00,4600.00,4590.00,4610.00,4590.00,4600.00,4600.00,0,0
3,2025-03-03,23135.12,23405.98,22861.75,23006.27,37617.50,37827.62,37372.26,37785.47,BMRI,4740.00,4770.00,4700.00,4740.00,4740.00,4740.00,4720.00,4730.00,4900.00,0,1
4,2025-03-04,22678.25,23051.01,22547.53,22941.77,37532.01,37587.49,36816.16,37331.18,BMRI,4890.00,4900.00,4800.00,4820.00,4820.00,4840.00,4790.00,4840.00,4840.00,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27,2025-03-03,23135.12,23405.98,22861.75,23006.27,37617.50,37827.62,37372.26,37785.47,BBNI,4180.00,4190.00,4130.00,4170.00,4160.00,4170.00,4130.00,4150.00,4260.00,0,1
28,2025-03-04,22678.25,23051.01,22547.53,22941.77,37532.01,37587.49,36816.16,37331.18,BBNI,4240.00,4240.00,4170.00,4170.00,4170.00,4200.00,4170.00,4200.00,4220.00,1,0
29,2025-03-05,23200.42,23636.65,23090.63,23594.21,37345.74,37606.34,37178.72,37418.24,BBNI,4250.00,4280.00,4250.00,4280.00,4280.00,4300.00,4270.00,4290.00,4460.00,1,1
30,2025-03-06,23987.09,24410.93,23987.09,24369.71,37654.95,37874.38,37580.88,37704.93,BBNI,4560.00,4580.00,4540.00,4570.00,4570.00,4600.00,4560.00,4600.00,4590.00,1,1


Data setelah logic validasi:


Unnamed: 0,TGL,HS_O,HS_H,HS_L,HS_C,Ni_O,Ni_H,Ni_L,Ni_C,SAHAM,O1,H1,L1,C1,O2,H2,L2,C2,CLOSE,HS_Bull,Ni_Bull,is_2min_valid
0,2025-02-26,23238.34,23973.17,23238.34,23787.93,38143.68,38175.24,37742.76,38142.37,BMRI,4860.00,4920.00,4850.00,4880.00,4900.00,4900.00,4870.00,4890.00,4920.00,1,0,0
1,2025-02-27,23783.75,24076.53,23441.74,23718.29,38251.11,38369.96,38061.94,38256.17,BMRI,4930.00,4950.00,4920.00,4930.00,4920.00,4920.00,4890.00,4890.00,4660.00,0,1,0
2,2025-02-28,23616.18,23616.18,22798.34,22941.32,37853.48,37925.28,36840.12,37155.50,BMRI,4620.00,4620.00,4580.00,4600.00,4590.00,4610.00,4590.00,4600.00,4600.00,0,0,0
3,2025-03-03,23135.12,23405.98,22861.75,23006.27,37617.50,37827.62,37372.26,37785.47,BMRI,4740.00,4770.00,4700.00,4740.00,4740.00,4740.00,4720.00,4730.00,4900.00,0,1,0
4,2025-03-04,22678.25,23051.01,22547.53,22941.77,37532.01,37587.49,36816.16,37331.18,BMRI,4890.00,4900.00,4800.00,4820.00,4820.00,4840.00,4790.00,4840.00,4840.00,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27,2025-03-03,23135.12,23405.98,22861.75,23006.27,37617.50,37827.62,37372.26,37785.47,BBNI,4180.00,4190.00,4130.00,4170.00,4160.00,4170.00,4130.00,4150.00,4260.00,0,1,0
28,2025-03-04,22678.25,23051.01,22547.53,22941.77,37532.01,37587.49,36816.16,37331.18,BBNI,4240.00,4240.00,4170.00,4170.00,4170.00,4200.00,4170.00,4200.00,4220.00,1,0,0
29,2025-03-05,23200.42,23636.65,23090.63,23594.21,37345.74,37606.34,37178.72,37418.24,BBNI,4250.00,4280.00,4250.00,4280.00,4280.00,4300.00,4270.00,4290.00,4460.00,1,1,0
30,2025-03-06,23987.09,24410.93,23987.09,24369.71,37654.95,37874.38,37580.88,37704.93,BBNI,4560.00,4580.00,4540.00,4570.00,4570.00,4600.00,4560.00,4600.00,4590.00,1,1,0


# PREFLIGHT FOR YFINANCE CONNECTION

In [None]:
# === PREFLIGHT: Cek koneksi ke Yahoo Finance / yfinance sebelum download berat ===
# aman dijalankan berulang
!pip install -q requests yfinance

import socket, ssl, time, json, os
import requests, yfinance as yf
from urllib.parse import urlparse

TIMEOUT = 15  # detik

ENDPOINTS = [
    "https://query1.finance.yahoo.com/v7/finance/quote?symbols=AAPL",
    "https://query1.finance.yahoo.com/v8/finance/chart/TLKM.JK?range=5d&interval=1d",
]

TEST_SYMBOLS = ["AAPL", "TLKM.JK"]  # 1 global, 1 Indonesia
RESULTS = []

def check_dns(host):
    t0 = time.time()
    try:
        ip = socket.gethostbyname(host)
        return True, ip, (time.time()-t0)
    except Exception as e:
        return False, str(e), (time.time()-t0)

def check_tls(host, port=443):
    t0 = time.time()
    ctx = ssl.create_default_context()
    try:
        with socket.create_connection((host, port), timeout=TIMEOUT) as sock:
            with ctx.wrap_socket(sock, server_hostname=host) as ssock:
                cert = ssock.getpeercert()
        return True, "ok", (time.time()-t0)
    except Exception as e:
        return False, str(e), (time.time()-t0)

def http_probe(url):
    t0 = time.time()
    try:
        r = requests.get(url, timeout=TIMEOUT, allow_redirects=False,
                         headers={"User-Agent":"Mozilla/5.0"})
        return True, {"status": r.status_code, "location": r.headers.get("Location"), "len": len(r.content)}, (time.time()-t0)
    except Exception as e:
        return False, str(e), (time.time()-t0)

def yf_probe(symbol, period="5d", interval="1d"):
    t0 = time.time()
    try:
        df = yf.download(symbol, period=period, interval=interval, progress=False, threads=False, timeout=TIMEOUT)
        ok = not df.empty
        return ok, {"rows": len(df)}, (time.time()-t0)
    except Exception as e:
        return False, str(e), (time.time()-t0)

print("üîé Preflight: Yahoo Finance connectivity\n")

# 1) DNS + TLS untuk host utama
hosts = {"query1.finance.yahoo.com", "guce.yahoo.com", "www.yahoo.com"}
for h in hosts:
    ok, info, dt = check_dns(h)
    RESULTS.append(("DNS", h, ok, info, dt))
    print(f"DNS  {h:<28} {'OK' if ok else 'FAIL'}  {info}  ({dt:.2f}s)")
for h in hosts:
    ok, info, dt = check_tls(h, 443)
    RESULTS.append(("TLS", h, ok, info, dt))
    print(f"TLS  {h:<28} {'OK' if ok else 'FAIL'}  {info}  ({dt:.2f}s)")

# 2) HTTP GET ke endpoint quote/chart (tanpa yfinance)
for url in ENDPOINTS:
    host = urlparse(url).hostname
    ok, info, dt = http_probe(url)
    RESULTS.append(("HTTP", host, ok, info, dt))
    if ok:
        status = info["status"]
        loc    = info["location"]
        note   = f"status={status}" + (f", redirect‚Üí{loc}" if loc else "")
        print(f"HTTP {host:<28} OK    {note}  ({dt:.2f}s)")
    else:
        print(f"HTTP {host:<28} FAIL  {info}  ({dt:.2f}s)")

# 3) Tes ringan via yfinance untuk 2 simbol
for sym in TEST_SYMBOLS:
    ok, info, dt = yf_probe(sym, period="5d", interval="1d")
    RESULTS.append(("YF", sym, ok, info, dt))
    if ok:
        print(f"YF   {sym:<10} OK    rows={info['rows']}  ({dt:.2f}s)")
    else:
        print(f"YF   {sym:<10} FAIL  {info}  ({dt:.2f}s)")

# 4) Rekomendasi cepat berdasarkan hasil
print("\nü©∫ Diagnosis & saran:")
dns_fail = [r for r in RESULTS if r[0]=="DNS" and not r[2]]
http_fail= [r for r in RESULTS if r[0]=="HTTP" and not r[2]]
yf_fail  = [r for r in RESULTS if r[0]=="YF" and not r[2]]

if dns_fail:
    print("- DNS gagal untuk:", ", ".join(set([h for _,h,_,_,_ in dns_fail])))
    print("  ‚ûú Coba ganti DNS (mis. 8.8.8.8/1.1.1.1) atau cek VPN/firewall.")
if http_fail:
    # deteksi redirect ke guce (consent)
    redir_to_guce = any(isinstance(info, dict) and info.get("location","",) and "guce.yahoo.com" in info.get("location","") for _,_,_,info,_ in RESULTS if _=="HTTP")
    if redir_to_guce:
        print("- Ter-redirect ke guce.yahoo.com (consent).")
        print("  ‚ûú Buka https://www.yahoo.com di browser (set consent/cookies), lalu jalankan ulang.")
    else:
        print("- HTTP ke endpoint Yahoo gagal/time-out.")
        print("  ‚ûú Cek koneksi umum & coba lagi beberapa menit (bisa throttling).")
if yf_fail and not http_fail:
    print("- yfinance gagal, tapi HTTP oke ‚Üí kemungkinan rate limit.")
    print("  ‚ûú Kecilkan batch, tambah jeda, pakai slow mode (1-per-1), atau lanjut saat off-peak.")
if not (dns_fail or http_fail or yf_fail):
    print("- Semua cek OK ‚úÖ ‚Äî lanjutkan pipeline unduhan.")


# GET ACTIVE EMITEN

In [1]:
# === FINAL CLEANER (pakai config kamu) ===
# progress bar + cache + retry/backoff + slow-mode; output & cache di folder emiten/
!pip install -q yfinance pandas numpy tqdm openpyxl xlrd

import os, re, time, random, warnings
import numpy as np, pandas as pd, yfinance as yf
from datetime import datetime
from pathlib import Path
from IPython.display import display
from tqdm.auto import tqdm
warnings.filterwarnings("ignore")

# ===== CONFIG (ambil punyamu) =====
INPUT_PATH         = "candidates_from_excel.csv"
PREFER_EXCEL       = False
FOLDER_OUT         = "emiten"

LOOKBACK_DAYS      = 30         # cek aktivitas 30 hari terakhir
MIN_NONZERO_DAYS   = 15
MIN_PCT_NONZERO    = 0.50
MAX_CONSEC_ZERO    = 10
MIN_MED_VALUE_90D  = 7.5e9
MIN_PRICE_FLOOR    = 75
MIN_TURNOVER       = 0.0005

DL_PERIOD_FOR_ACTIVITY = 60     # -> '60d'
DL_PERIOD_FOR_LIQ      = 100    # -> '100d'
CHUNK_SIZE_MAIN        = 5
PAUSE_MAIN             = 2.0
CHUNK_SIZE_SECOND      = 10
PAUSE_SECOND           = 1.5
MAX_RETRIES            = 3
RETRY_BACKOFF          = 1.6
TIMEOUT_SEC            = 45
SLOWMODE_COOLDOWN      = 120    # detik; jeda panjang saat rate-limit

# ===== Setup output =====
DATE_TAG = datetime.now().strftime("%Y%m%d")
os.makedirs(FOLDER_OUT, exist_ok=True)
OUTPUT_ACTIVE_CSV  = os.path.join(FOLDER_OUT, f"candidates_active_filtered_{DATE_TAG}.csv")
OUTPUT_FULL_CSV    = os.path.join(FOLDER_OUT, f"candidates_full_with_flags_{DATE_TAG}.csv")

# ===== Helpers =====
def extract_tickers_from_df(df):
    tickers = set()
    for c in df.columns:
        s = df[c].astype(str).str.upper().str.strip()
        extracted = s.str.extract(r'\b([A-Z]{2,5}(?:\.JK)?)\b')[0].dropna()
        for sym in extracted:
            base = sym.replace(".JK","")
            if 2 <= len(base) <= 5 and base.isalpha():
                tickers.add(sym if sym.endswith(".JK") else f"{sym}.JK")
    return sorted(tickers)

def load_candidates(path, prefer_excel=False):
    p = path.lower()
    if not os.path.exists(path):
        raise FileNotFoundError(f"File input tidak ada: {path}")
    if prefer_excel and (p.endswith(".xlsx") or p.endswith(".xls")):
        try:
            engine = "openpyxl" if p.endswith(".xlsx") else "xlrd"
            xl = pd.ExcelFile(path, engine=engine)
            codes=set()
            for sh in xl.sheet_names:
                df = xl.parse(sh)
                codes |= set(extract_tickers_from_df(df))
            return sorted(codes)
        except Exception as e:
            print(f"‚ö†Ô∏è Gagal baca Excel ({e}), fallback CSV‚Ä¶")
    df = pd.read_csv(path)
    cols_lower = [c.lower() for c in df.columns]
    if "ticker" in cols_lower:
        s = df[df.columns[cols_lower.index("ticker")]]
        return extract_tickers_from_df(pd.DataFrame({"ticker": s}))
    if "code" in cols_lower:
        s = df[df.columns[cols_lower.index("code")]]
        return extract_tickers_from_df(pd.DataFrame({"code": s}))
    return extract_tickers_from_df(df)

def _slice_px(px, t, batch_len):
    if isinstance(px.columns, pd.MultiIndex):
        try: return px[t].dropna()
        except Exception: return pd.DataFrame()
    else:
        return px.dropna() if batch_len == 1 else pd.DataFrame()

def _period_arg(period):  # int -> 'Nd'
    return period if isinstance(period, str) else f"{int(period)}d"

def _cache_paths(base_dir, period, ticker):
    cache_dir = Path(base_dir) / f"cache_{period}"
    cache_dir.mkdir(parents=True, exist_ok=True)
    return cache_dir, cache_dir / f"{ticker}.csv"

def _save_cache(df, path_csv):
    try: df.to_csv(path_csv, index=True)
    except Exception: pass

def _load_cache(path_csv):
    try:
        df = pd.read_csv(path_csv, parse_dates=True, index_col=0)
        req = {"Open","High","Low","Close","Volume"}
        return df if req.issubset(df.columns) else pd.DataFrame()
    except Exception:
        return pd.DataFrame()

def download_panel(
    tickers, period_days,
    cache_base="emiten",
    chunk_main=15, pause_main=1.2,
    chunk_second=10, pause_second=1.5,
    max_retries=3, backoff=1.6,
    timeout=45, jitter=0.3,
    slowmode_cooldown=120
):
    period = _period_arg(period_days)
    out = {}
    # cache hit
    cached = 0
    for t in tickers:
        _, cpath = _cache_paths(cache_base, period, t)
        df = _load_cache(cpath)
        if not df.empty:
            out[t] = df; cached += 1
    if cached: print(f"üíæ Cache hit: {cached}/{len(tickers)}")

    remaining = [t for t in tickers if t not in out]
    if not remaining: return out

    # main pass batched (threads=False)
    batches = list(range(0, len(remaining), chunk_main))
    pbar = tqdm(batches, desc=f"Downloading {period} OHLCV (main)", unit="batch")
    rate_limited = False

    for start in pbar:
        batch = [t for t in remaining[start:start+chunk_main] if t not in out]
        if not batch: continue
        attempt = 0
        while True:
            try:
                px = yf.download(
                    batch, period=period, interval="1d",
                    auto_adjust=False, group_by="ticker",
                    progress=False, threads=False, timeout=timeout
                )
                break
            except Exception as e:
                attempt += 1
                if attempt > max_retries:
                    rate_limited = True
                    tqdm.write(f"[main] batch failed: {type(e).__name__} ‚Üí switch to slow mode")
                    px = pd.DataFrame(); break
                sleep_s = pause_main * (backoff ** (attempt-1)) + random.uniform(0, jitter)
                tqdm.write(f"[main] retry {attempt}/{max_retries} after {sleep_s:.1f}s ({type(e).__name__})")
                time.sleep(sleep_s)
        for t in batch:
            try: df = _slice_px(px, t, len(batch))
            except Exception: df = pd.DataFrame()
            if not df.empty:
                out[t] = df
                _, cpath = _cache_paths(cache_base, period, t)
                _save_cache(df, cpath)
        time.sleep(pause_main + random.uniform(0, jitter))

    # slow mode sequential for remaining
    still = [t for t in tickers if t not in out]
    if still:
        if rate_limited:
            tqdm.write(f"üïí Rate-limited. Cooling down {slowmode_cooldown}s before slow mode‚Ä¶")
            time.sleep(slowmode_cooldown)
        pbar2 = tqdm(still, desc="Slow mode (1-by-1)", unit="ticker")
        for t in pbar2:
            _, cpath = _cache_paths(cache_base, period, t)
            df = _load_cache(cpath)
            if not df.empty:
                out[t] = df; continue
            attempt = 0; pause = pause_main
            while True:
                try:
                    px = yf.download(t, period=period, interval="1d",
                                     auto_adjust=False, progress=False,
                                     threads=False, timeout=timeout)
                    df = _slice_px(px, t, 1); break
                except Exception as e:
                    attempt += 1
                    if attempt > (max_retries + 1):
                        df = pd.DataFrame(); break
                    sleep_s = pause * (backoff ** (attempt-1)) + random.uniform(0, jitter)
                    pbar2.set_postfix_str(f"retry {attempt} ({type(e).__name__})")
                    time.sleep(sleep_s)
            if not df.empty:
                out[t] = df; _save_cache(df, cpath)
            else:
                time.sleep(pause_main + random.uniform(0, jitter))

    missing = [t for t in tickers if t not in out or getattr(out[t], "empty", True)]
    if missing:
        fail_csv = Path(cache_base) / f"failed_{period}_{DATE_TAG}.csv"
        pd.DataFrame({"ticker": missing}).to_csv(fail_csv, index=False)
        print(f"‚ö†Ô∏è Gagal final: {len(missing)} tickers. Disimpan ke {fail_csv}")
    else:
        print("‚úÖ Semua ticker berhasil/cached.")
    return out

def fetch_meta(tickers):
    rows=[]
    for t in tickers:
        tk = yf.Ticker(t)
        try: info = tk.info or {}
        except Exception: info = {}
        rows.append({
            "ticker": t,
            "marketCap": info.get("marketCap"),
            "sector": info.get("sector"),
            "industry": info.get("industry"),
            "sharesOut": info.get("sharesOutstanding") or info.get("floatShares")
        })
    return pd.DataFrame(rows)

# ===== Run =====
CANDIDATES_ALL = load_candidates(INPUT_PATH, prefer_excel=PREFER_EXCEL)
print(f"üì• Total kandidat masuk: {len(CANDIDATES_ALL)}")
if not CANDIDATES_ALL:
    raise SystemExit("Daftar kandidat kosong.")

print("‚è¨ Unduh OHLCV aktivitas‚Ä¶")
px_act = download_panel(
    CANDIDATES_ALL, DL_PERIOD_FOR_ACTIVITY,
    cache_base=FOLDER_OUT,
    chunk_main=CHUNK_SIZE_MAIN, pause_main=PAUSE_MAIN,
    chunk_second=CHUNK_SIZE_SECOND, pause_second=PAUSE_SECOND,
    max_retries=MAX_RETRIES, backoff=RETRY_BACKOFF,
    timeout=TIMEOUT_SEC, slowmode_cooldown=SLOWMODE_COOLDOWN
)

print("‚è¨ Unduh OHLCV likuiditas‚Ä¶")
px_liq = download_panel(
    CANDIDATES_ALL, DL_PERIOD_FOR_LIQ,
    cache_base=FOLDER_OUT,
    chunk_main=CHUNK_SIZE_MAIN, pause_main=PAUSE_MAIN,
    chunk_second=CHUNK_SIZE_SECOND, pause_second=PAUSE_SECOND,
    max_retries=MAX_RETRIES, backoff=RETRY_BACKOFF,
    timeout=TIMEOUT_SEC, slowmode_cooldown=SLOWMODE_COOLDOWN
)

print("‚ÑπÔ∏è Ambil metadata‚Ä¶")
meta = fetch_meta(CANDIDATES_ALL)

# ===== Hitung metrik & klasifikasi =====
rows=[]
for t in CANDIDATES_ALL:
    dfa = px_act.get(t, pd.DataFrame())
    dfl = px_liq.get(t, pd.DataFrame())

    last_close = float(dfa["Close"].iloc[-1]) if not dfa.empty else np.nan
    nonzero_days = 0; pct_nonzero = 0.0; max_zero_run = LOOKBACK_DAYS
    if not dfa.empty:
        look = dfa.tail(LOOKBACK_DAYS)
        if not look.empty:
            v = look["Volume"].fillna(0)
            nonzero_days = int((v>0).sum())
            pct_nonzero  = float(nonzero_days/len(look))
            runs = (v==0).astype(int)
            if runs.any():
                grp = (runs != runs.shift()).cumsum()
                max_zero_run = int(runs.groupby(grp).sum().max())
            else:
                max_zero_run = 0

    med_value_90d = 0.0; med_vol_90d = 0.0
    if not dfl.empty:
        val = (dfl["Close"]*dfl["Volume"])
        med_value_90d = float(val.rolling(90).median().dropna().iloc[-1]) if len(val)>=90 else float(val.median())
        med_vol_90d   = float(dfl["Volume"].rolling(90).median().dropna().iloc[-1]) if len(dfl)>=90 else float(dfl["Volume"].median())

    rows.append({
        "ticker": t,
        "last_close": last_close,
        "nonzero_days_30d": nonzero_days,
        "pct_nonzero_30d": round(pct_nonzero,3),
        "max_consec_zero_30d": max_zero_run,
        "med_value_90d": med_value_90d,
        "med_volume_90d": med_vol_90d,
    })

feat = pd.DataFrame(rows)
df = meta.merge(feat, on="ticker", how="right")
df["turnover_med"] = np.where(df["sharesOut"].fillna(0)>0, df["med_volume_90d"]/df["sharesOut"], np.nan)

def classify(r):
    # full zero / sangat tidak aktif (window 30d)
    if r["nonzero_days_30d"] == 0 or r["pct_nonzero_30d"] < 0.1 or r["max_consec_zero_30d"] >= LOOKBACK_DAYS:
        return "SUSPECT_SUSPENDED", "no trading in window"
    reasons=[]
    if not pd.isna(r["last_close"]) and r["last_close"] < MIN_PRICE_FLOOR: reasons.append("price < floor")
    if r["pct_nonzero_30d"] < MIN_PCT_NONZERO: reasons.append("pct_nonzero < threshold")
    if r["nonzero_days_30d"] < MIN_NONZERO_DAYS: reasons.append("nonzero_days < threshold")
    if r["max_consec_zero_30d"] > MAX_CONSEC_ZERO: reasons.append("max zero run too long")
    if r["med_value_90d"] < MIN_MED_VALUE_90D: reasons.append("med_value_90d < min")
    if not pd.isna(r["turnover_med"]) and r["turnover_med"] < MIN_TURNOVER: reasons.append("turnover < min")
    return ("DORMANT", "; ".join(reasons)) if reasons else ("ACTIVE","pass")

lab = df.apply(classify, axis=1, result_type="expand")
lab.columns = ["status","why"]
df = pd.concat([df, lab], axis=1).sort_values(["status","med_value_90d"], ascending=[True,False]).reset_index(drop=True)

# ===== Save =====
df.to_csv(OUTPUT_FULL_CSV, index=False, encoding="utf-8")
active = df[df["status"]=="ACTIVE"][["ticker"]]
active.to_csv(OUTPUT_ACTIVE_CSV, index=False, encoding="utf-8")

print(f"\n‚úÖ Disaring: {len(df)} total | ACTIVE: {len(active)} | DORMANT: {len(df[df.status=='DORMANT'])} | SUSPECT_SUSPENDED: {len(df[df.status=='SUSPECT_SUSPENDED'])}")
print(f"üíæ Saved (full flags): {OUTPUT_FULL_CSV}")
print(f"üíæ Saved (active-only): {OUTPUT_ACTIVE_CSV}")
display(df.head(20))


üì• Total kandidat masuk: 954
‚è¨ Unduh OHLCV aktivitas‚Ä¶


Downloading 60d OHLCV (main):   0%|          | 0/191 [00:00<?, ?batch/s]

‚úÖ Semua ticker berhasil/cached.
‚è¨ Unduh OHLCV likuiditas‚Ä¶


Downloading 100d OHLCV (main):   0%|          | 0/191 [00:00<?, ?batch/s]

‚úÖ Semua ticker berhasil/cached.
‚ÑπÔ∏è Ambil metadata‚Ä¶

‚úÖ Disaring: 954 total | ACTIVE: 113 | DORMANT: 748 | SUSPECT_SUSPENDED: 93
üíæ Saved (full flags): emiten/candidates_full_with_flags_20250811.csv
üíæ Saved (active-only): emiten/candidates_active_filtered_20250811.csv


Unnamed: 0,ticker,marketCap,sector,industry,sharesOut,last_close,nonzero_days_30d,pct_nonzero_30d,max_consec_zero_30d,med_value_90d,med_volume_90d,turnover_med,status,why
0,BBCA.JK,1053762000000000.0,Financial Services,Banks - Regional,123247000000.0,8550.0,30,1.0,0,805783200000.0,92770800.0,0.000753,ACTIVE,pass
1,BBRI.JK,577439800000000.0,Financial Services,Banks - Regional,151559000000.0,3810.0,30,1.0,0,782751800000.0,196737900.0,0.001298,ACTIVE,pass
2,BMRI.JK,440533200000000.0,Financial Services,Banks - Regional,93333300000.0,4720.0,30,1.0,0,738986700000.0,150437000.0,0.001612,ACTIVE,pass
3,ANTM.JK,70650550000000.0,Basic Materials,Gold,24030800000.0,2940.0,30,1.0,0,514042700000.0,189333000.0,0.007879,ACTIVE,pass
4,BRPT.JK,220205300000000.0,Basic Materials,Chemicals,93704400000.0,2350.0,30,1.0,0,288989900000.0,168220200.0,0.001795,ACTIVE,pass
5,TLKM.JK,296196000000000.0,Communication Services,Telecom Services,99062200000.0,2990.0,30,1.0,0,257218700000.0,96962450.0,0.000979,ACTIVE,pass
6,BBNI.JK,156478600000000.0,Financial Services,Banks - Regional,578684000.0,4200.0,30,1.0,0,240846300000.0,57482700.0,0.099333,ACTIVE,pass
7,BRMS.JK,62952100000000.0,Basic Materials,Other Industrial Metals & Mining,25570200000.0,444.0,30,1.0,0,216173700000.0,572995800.0,0.022409,ACTIVE,pass
8,PTRO.JK,37419060000000.0,Basic Materials,Other Industrial Metals & Mining,10086000000.0,3710.0,30,1.0,0,191412400000.0,62869750.0,0.006233,ACTIVE,pass
9,ASII.JK,198369600000000.0,Industrials,Conglomerates,40483600000.0,4900.0,30,1.0,0,183947900000.0,38560150.0,0.000952,ACTIVE,pass


# BANDARMOLOGY V.1

In [None]:
# === ONE-CELL PIPELINE (BRIDGE) ‚Äî Gunakan roster aktif emiten/‚Ä¶_YYYYMMDD.csv ===
# cepat dan hemat request (pakai cache 6mo), hanya fallback download tipis untuk yang belum tercache.
!pip install -q yfinance pandas numpy

import os, glob, json, time, warnings
import numpy as np, pandas as pd, yfinance as yf
from datetime import datetime
from pathlib import Path
from IPython.display import display

warnings.filterwarnings("ignore")

# ---------- CONFIG ----------
EMITEN_DIR         = "emiten"
ACTIVE_PREFIX      = "candidates_active_filtered_"
CACHE_DIRS_TRY     = ["cache_6mo", "cache_100d"]   # urutan prioritas baca cache
CACHE_WRITE_DIR    = "cache_6mo"                   # kalau fallback download, simpan di sini
UNIVERSE_TOPN      = 10
TOPK_FOR_META      = 200       # ambil meta hanya utk top-K likuid (hemat)
VALUE_MIN          = 5.0e9     # ~ Rp 5 miliar median value 90D
VALUE_MAX          = 8.0e10    # ~ Rp 80 miliar (turunkan dari 150 miliar)
CAP_Q_MAX          = 0.75      # buang top 25% market cap (approx blue chips)
MIN_TURNOVER       = 0.001     # ‚â• 0.1% median Turnover (Vol/SharesOut)
MIN_PRICE          = 200
INDUSTRY_EX_RE     = r"Bank|Capital Markets|Insurance|Mortgage|Credit"
# Fallback download untuk ticker yg belum ada di cache:
FALLBACK_DOWNLOAD_MISSING = True
LIMIT_MISSING_DOWNLOADS   = 120   # batasi agar aman dari rate-limit
FALLBACK_PERIOD           = "6mo" # ringan, cukup untuk median 90D
YF_TIMEOUT                = 45

# ---------- Resolve file aktif terbaru ----------
def resolve_latest_active(emiten_dir, prefix):
    today = datetime.now().strftime("%Y%m%d")
    today_path = os.path.join(emiten_dir, f"{prefix}{today}.csv")
    if os.path.exists(today_path):
        return today_path
    cand = sorted(glob.glob(os.path.join(emiten_dir, f"{prefix}*.csv")))
    if not cand:
        raise FileNotFoundError("Tidak menemukan file aktif di folder emiten/. Jalankan cleaner dulu.")
    return cand[-1]

ACTIVE_PATH = resolve_latest_active(EMITEN_DIR, ACTIVE_PREFIX)
DATE_TAG = Path(ACTIVE_PATH).stem.rsplit("_", 1)[-1]  # ambil YYYYMMDD dari nama file
print(f"üìÑ Source aktif: {ACTIVE_PATH}")

# ---------- Load candidates dari file aktif ----------
df_act = pd.read_csv(ACTIVE_PATH)
if "ticker" not in [c.lower() for c in df_act.columns]:
    raise ValueError("File aktif harus punya kolom 'ticker'.")
tickers = (
    df_act[df_act.columns[[c.lower() for c in df_act.columns].index("ticker")]]
      .astype(str).str.strip().str.upper()
      .apply(lambda x: x if x.endswith(".JK") else f"{x}.JK")
      .dropna().drop_duplicates().tolist()
)
print(f"Total kandidat aktif: {len(tickers)}")

# ---------- Cache helpers ----------
def read_cache_df(t, base_dir=EMITEN_DIR, try_dirs=CACHE_DIRS_TRY):
    for sub in try_dirs:
        p = Path(base_dir) / sub / f"{t}.csv"
        if p.exists():
            try:
                df = pd.read_csv(p, index_col=0, parse_dates=True)
                if {"Open","High","Low","Close","Volume"}.issubset(df.columns):
                    return df
            except Exception:
                pass
    return pd.DataFrame()

def write_cache_df(t, df, base_dir=EMITEN_DIR, subdir=CACHE_WRITE_DIR):
    d = Path(base_dir) / subdir
    d.mkdir(parents=True, exist_ok=True)
    (d / f"{t}.csv").write_text(df.to_csv())

# ---------- Liquidity panel (cache-first, fallback optional) ----------
def liquidity_panel_from_cache(tickers, fallback=FALLBACK_DOWNLOAD_MISSING, limit_dl=LIMIT_MISSING_DOWNLOADS):
    got, missing = {}, []
    for t in tickers:
        df = read_cache_df(t)
        if df.empty:
            missing.append(t)
        else:
            got[t] = df

    if fallback and missing:
        # batasi agar aman
        missing = missing[:limit_dl]
        print(f"‚ö†Ô∏è {len(missing)} belum ada cache ‚Äì fallback download tipis ({FALLBACK_PERIOD})‚Ä¶")
        for t in missing:
            try:
                px = yf.download(t, period=FALLBACK_PERIOD, interval="1d", auto_adjust=False,
                                 progress=False, threads=False, timeout=YF_TIMEOUT)
                if isinstance(px.columns, pd.MultiIndex):
                    df = px[t].dropna()
                else:
                    df = px.dropna()
                if not df.empty:
                    got[t] = df
                    write_cache_df(t, df)
                time.sleep(0.4)
            except Exception:
                pass

    rows=[]
    for t, df in got.items():
        if df.empty:
            continue
        med_value = float((df["Close"]*df["Volume"]).rolling(90).median().dropna().iloc[-1]
                          if len(df)>=90 else (df["Close"]*df["Volume"]).median())
        last_close = float(df["Close"].iloc[-1])
        med_vol90  = float(df["Volume"].rolling(90).median().dropna().iloc[-1]
                           if len(df)>=90 else df["Volume"].median())
        rows.append({"ticker": t, "med_value": med_value, "last_close": last_close, "med_volume90": med_vol90})
    return pd.DataFrame(rows)

# ---------- Meta fetch (hemat: hanya Top-K likuid) ----------
def fetch_meta_subset(tickers):
    rows=[]
    for t in tickers:
        tk = yf.Ticker(t)
        try:
            info = tk.info or {}
        except Exception:
            info = {}
        rows.append({
            "ticker": t,
            "marketCap": info.get("marketCap"),
            "sector": info.get("sector"),
            "industry": info.get("industry"),
            "sharesOut": info.get("sharesOutstanding") or info.get("floatShares")
        })
        time.sleep(0.05)  # kecil, biar santai
    return pd.DataFrame(rows)

# ---------- Build universe ----------
def build_universe_from_cache(all_tickers):
    liq = liquidity_panel_from_cache(all_tickers)
    if liq.empty:
        print("‚ö†Ô∏è Liquidity panel kosong (cache belum siap & fallback off)."); 
        return [], pd.DataFrame()
    # pilih Top-K likuid untuk diambil meta
    topk = liq.sort_values("med_value", ascending=False).head(TOPK_FOR_META)["ticker"].tolist()
    meta = fetch_meta_subset(topk)
    df = liq.merge(meta, on="ticker", how="left")
    # exclude finansial berat
    df = df[~df["industry"].fillna("").str.contains(INDUSTRY_EX_RE, regex=True)]
    # buang top-25% market cap
    if df["marketCap"].notna().any():
        cap_cut = df["marketCap"].dropna().quantile(CAP_Q_MAX)
        df = df[(df["marketCap"].isna()) | (df["marketCap"] <= cap_cut)]
    # turnover median aproksimasi (jika sharesOut ada)
    df["turnover"] = np.where(df["sharesOut"].fillna(0)>0, df["med_volume90"]/df["sharesOut"], np.nan)
    # filter likuiditas & harga
    keep = (df["med_value"].between(VALUE_MIN, VALUE_MAX)) & (df["last_close"]>=MIN_PRICE)
    # kalau turnover tersedia, terapkan MIN_TURNOVER; kalau NaN, biarkan (jangan dieliminasi)
    keep &= df["turnover"].isna() | (df["turnover"]>=MIN_TURNOVER)
    df = df[keep]
    if df.empty:
        return [], df
    topn = df.sort_values("med_value", ascending=False).head(UNIVERSE_TOPN)["ticker"].tolist()
    return topn, df.sort_values("med_value", ascending=False).reset_index(drop=True)

# ---------- Bandarmology metrics ----------
def atr(df, n=14):
    h,l,c = df['High'], df['Low'], df['Close']
    tr = np.maximum(h-l, np.maximum((h-c.shift()).abs(), (l-c.shift()).abs()))
    return tr.rolling(n).mean()

def obv(df):
    vol = df['Volume'].fillna(0)
    direction = np.sign(df['Close'].diff()).fillna(0)
    return (direction*vol).cumsum()

def adl(df):
    h,l,c,v = df['High'], df['Low'], df['Close'], df['Volume']
    rng = (h - l).replace(0, np.nan)
    clv = ((c - l) - (h - c)) / rng
    return (clv.fillna(0) * v).cumsum()

def percentile_rank(series, lookback=120):
    def _pr(x):
        s = pd.Series(x)
        return s.rank(pct=True).iloc[-1]
    return series.rolling(lookback, min_periods=lookback).apply(_pr, raw=False)

def label_row(r):
    status, reasons, score = "Akumulasi Lanjut", [], 50
    if r['atr_pctile'] <= 0.35: score += 20; reasons.append("ATR%ile rendah")
    if r['vol_ma5_ma20'] < 0.7: score += 10; reasons.append("Volume dry-up")
    if r['obv_slope'] > 0: score += 15; reasons.append("OBV naik")
    if r['adl_slope'] > 0: score += 10; reasons.append("ADL naik")
    if (r['breakout_55'] and r['vol_spike'] and r['near_high']):
        score += 25; reasons += ["Breakout 55D","Vol spike","Close near high"]
    if r['above_emas']: score += 10; reasons.append("Di atas EMA20/50")
    if r['upthrust']: score -= 15; reasons.append("Upthrust")
    if r['obv_div']: score -= 10; reasons.append("Divergensi OBV")
    score = int(max(0, min(100, score)))
    if (r['breakout_55'] and r['vol_spike'] and r['near_high'] and
        r['obv_slope']>0 and r['adl_slope']>0 and r['above_emas']):
        status = "Akhir Akumulasi ‚Äì Siap Markup"
    elif r['upthrust'] or r['obv_div']:
        status = "Distribusi Dini"
    return pd.Series({"status": status, "score": score, "reasons": ", ".join(reasons)})

def scan_topn(tickers):
    if not tickers: return pd.DataFrame()
    # kecil & aman (<= TopN): 420d untuk pola Wyckoff + EMA/OBV
    try:
        data = yf.download(tickers, period="420d", interval="1d",
                           auto_adjust=False, group_by='ticker',
                           progress=False, threads=False, timeout=60)
    except Exception:
        data = pd.DataFrame()
    rows=[]
    for t in tickers:
        try:
            df = data[t].dropna().copy() if isinstance(data.columns, pd.MultiIndex) else data.dropna().copy()
        except Exception:
            df = pd.DataFrame()
        if len(df) < 120:  # butuh history cukup
            continue
        df['ATR14'] = atr(df, 14)
        df['ATRp']  = (df['ATR14'] / df['Close']).replace([np.inf, -np.inf], np.nan)
        df['ATRp_pctile'] = percentile_rank(df['ATRp'])
        df['OBV'] = obv(df); df['ADL'] = adl(df)
        df['OBV_slope20'] = df['OBV'].diff(20); df['ADL_slope20'] = df['ADL'].diff(20)
        df['EMA20'] = df['Close'].ewm(span=20).mean(); df['EMA50'] = df['Close'].ewm(span=50).mean()
        df['above_emas'] = (df['Close']>df['EMA20']) & (df['Close']>df['EMA50'])
        df['HH55'] = df['High'].rolling(55).max(); df['LL55'] = df['Low'].rolling(55).min()
        df['breakout_55'] = df['Close'] > df['HH55'].shift(1)
        df['vol_ma20'] = df['Volume'].rolling(20).mean(); df['vol_ma5']  = df['Volume'].rolling(5).mean()
        df['vol_ma5_ma20'] = (df['vol_ma5']/df['vol_ma20']).replace([np.inf,-np.inf], np.nan)
        df['vol_spike'] = df['Volume'] > 1.8*df['vol_ma20']
        df['near_high'] = (df['High'] - df['Close']) <= 0.2*df['ATR14']
        rng = (df['High'] - df['Low']).replace(0, np.nan)
        lower_tail = (df[['Close','Open']].min(axis=1) - df['Low']).abs()
        df['spring_like'] = (df['Low'] < df['LL55'].shift(1)) & ((lower_tail / rng) >= 0.6)
        upper_tail = (df['High'] - df[['Close','Open']].max(axis=1)).abs()
        df['upthrust'] = (df['High'] > df['HH55'].shift(1)) & (df['Close'] < df['HH55'].shift(1)) & ((upper_tail / rng) >= 0.5) & (df['Volume'] > 1.5*df['vol_ma20'])
        df['price_slope20'] = df['Close'].diff(20); df['obv_div'] = (df['price_slope20']>0) & (df['OBV_slope20']<=0)

        last = df.iloc[-1]
        feat = {
            "ticker": t, "date": df.index[-1].date().isoformat(),
            "close": round(float(last['Close']),2),
            "value_traded": float(last['Close']*last['Volume']),
            "atr_pctile": float(last['ATRp_pctile']) if pd.notna(last['ATRp_pctile']) else 1.0,
            "vol_ma5_ma20": float(last['vol_ma5_ma20']) if pd.notna(last['vol_ma5_ma20']) else 1.0,
            "obv_slope": float(last['OBV_slope20']) if pd.notna(last['OBV_slope20']) else 0.0,
            "adl_slope": float(last['ADL_slope20']) if pd.notna(last['ADL_slope20']) else 0.0,
            "breakout_55": bool(last['breakout_55']) if pd.notna(last['breakout_55']) else False,
            "vol_spike": bool(last['vol_spike']) if pd.notna(last['vol_spike']) else False,
            "near_high": bool(last['near_high']) if pd.notna(last['near_high']) else False,
            "above_emas": bool(last['above_emas']) if pd.notna(last['above_emas']) else False,
            "spring_like": bool(last['spring_like']) if pd.notna(last['spring_like']) else False,
            "upthrust": bool(last['upthrust']) if pd.notna(last['upthrust']) else False,
            "obv_div": bool(last['obv_div']) if pd.notna(last['obv_div']) else False,
        }
        lab = label_row(pd.Series(feat))
        rows.append({**feat, **lab.to_dict()})
    out = pd.DataFrame(rows)
    if out.empty: return out
    status_rank = {"Akhir Akumulasi ‚Äì Siap Markup":0, "Akumulasi Lanjut":1, "Distribusi Dini":2}
    out['status_rank'] = out['status'].map(status_rank)
    return out.sort_values(by=['status_rank','score','value_traded'],
                           ascending=[True,False,False]).reset_index(drop=True)

# ---------- RUN ----------
print("üèóÔ∏è Bangun universe dari cache‚Ä¶")
topN, uni_df = build_universe_from_cache(tickers)
display(uni_df.head(15))
print(f"Top {len(topN)} untuk scan:", topN)

print("\nüìà Scan Bandarmology (TopN)‚Ä¶")
df_watch = scan_topn(topN)

# ---------- SAVE ----------
out_csv  = os.path.join(EMITEN_DIR, f"watchlist_bandarmology_{DATE_TAG}.csv")
out_json = os.path.join(EMITEN_DIR, f"watchlist_bandarmology_{DATE_TAG}.json")

if df_watch.empty:
    print("‚ö†Ô∏è Hasil kosong. Longgarkan filter (VALUE_MIN/MAX, MIN_PRICE, MIN_TURNOVER) atau tambah TopN/TOPK_FOR_META.")
else:
    cols_show = ['date','ticker','close','status','score','reasons']
    display(df_watch[cols_show])
    df_watch.to_csv(out_csv, index=False)
    payload = {
        "as_of": DATE_TAG,
        "universe_topN": topN,
        "watchlist": df_watch[cols_show].to_dict(orient="records")
    }
    with open(out_json, "w", encoding="utf-8") as f:
        json.dump(payload, f, ensure_ascii=False, indent=2)
    print(f"\nüíæ Saved: {out_csv} & {out_json}")


üìÑ Source aktif: emiten/candidates_active_filtered_20250811.csv
Total kandidat aktif: 113
üèóÔ∏è Bangun universe dari cache‚Ä¶


Unnamed: 0,ticker,med_value,last_close,med_volume90,marketCap,sector,industry,sharesOut,turnover
0,WIFI.JK,122069100000.0,2700.0,60495950.0,14386171215872,Technology,Information Technology Services,5308550144,0.011396
1,RAJA.JK,70487440000.0,2610.0,32716050.0,13019405877248,Utilities,Utilities - Regulated Gas,4227079936,0.00774
2,RATU.JK,65992530000.0,7025.0,9925400.0,20091370471424,Energy,Oil & Gas E&P,2715049984,0.003656
3,TOBA.JK,59693480000.0,1045.0,78696850.0,9248288276480,Energy,Thermal Coal,8257399808,0.00953
4,DEWA.JK,59366880000.0,224.0,366053050.0,9276726706176,Energy,Thermal Coal,40687398912,0.008997
5,TCPI.JK,53534040000.0,6000.0,9341750.0,29874999787520,Industrials,Marine Shipping,5000000000,0.001868
6,PSAB.JK,51100210000.0,410.0,141080700.0,10584000036864,Basic Materials,Gold,26460000256,0.005332
7,MEDC.JK,46616410000.0,1240.0,38382800.0,30113259323392,Energy,Oil & Gas E&P,24682999808,0.001555
8,JPFA.JK,43523270000.0,1585.0,26508300.0,18313627303936,Consumer Defensive,Farm Products,8716080128,0.003041
9,PTBA.JK,37346190000.0,2410.0,14204200.0,28095136399360,Energy,Thermal Coal,11514399744,0.001234


Top 10 untuk scan: ['WIFI.JK', 'RAJA.JK', 'RATU.JK', 'TOBA.JK', 'DEWA.JK', 'TCPI.JK', 'PSAB.JK', 'MEDC.JK', 'JPFA.JK', 'PTBA.JK']

üìà Scan Bandarmology (TopN)‚Ä¶


Unnamed: 0,date,ticker,close,status,score,reasons
0,2025-08-12,TCPI.JK,5975.0,Akumulasi Lanjut,100,"ATR%ile rendah, OBV naik, ADL naik, Di atas EM..."
1,2025-08-12,RATU.JK,7400.0,Akumulasi Lanjut,95,"ATR%ile rendah, OBV naik, Di atas EMA20/50"
2,2025-08-12,TOBA.JK,1120.0,Akumulasi Lanjut,95,"Volume dry-up, OBV naik, ADL naik, Di atas EMA..."
3,2025-08-12,DEWA.JK,228.0,Akumulasi Lanjut,95,"Volume dry-up, OBV naik, ADL naik, Di atas EMA..."
4,2025-08-12,JPFA.JK,1575.0,Akumulasi Lanjut,80,"ATR%ile rendah, Volume dry-up"
5,2025-08-12,RAJA.JK,3080.0,Akumulasi Lanjut,75,"OBV naik, Di atas EMA20/50"
6,2025-08-12,PSAB.JK,400.0,Akumulasi Lanjut,70,ATR%ile rendah
7,2025-08-12,PTBA.JK,2440.0,Akumulasi Lanjut,70,ATR%ile rendah
8,2025-08-12,MEDC.JK,1220.0,Akumulasi Lanjut,65,OBV naik
9,2025-08-12,WIFI.JK,2710.0,Distribusi Dini,60,"ADL naik, Di atas EMA20/50, Divergensi OBV"



üíæ Saved: emiten/watchlist_bandarmology_20250811.csv & emiten/watchlist_bandarmology_20250811.json
