In [13]:
%pip install beautifulsoup4 requests pandas



In [33]:
import requests
from bs4 import BeautifulSoup
import pandas as pd # Import pandas here as it's used later

# === URL Target ===
TARGET_URL = "https://bromotenggersemeru.id/"
AJAX_TARGET_URL = "https://bromotenggersemeru.id/website/home/get_view" # Endpoint for AJAX data

# === Headers untuk meniru permintaan browser ===
headers = {
    'X-Requested-With': 'XMLHttpRequest',
    'Referer': TARGET_URL, # Referer ke halaman utama mungkin diperlukan
    # Add other headers if identified as necessary from browser inspection
    # 'User-Agent': 'Mozilla/5.0...' # Ganti dengan User-Agent browser Anda jika perlu
}

# Step 1: Fetch initial page HTML to find month/year options
print("1. Fetching initial page HTML to find month/year options...")
try:
    initial_response = requests.get(TARGET_URL, timeout=10) # Use GET for the initial page

    if initial_response.status_code != 200:
        print(f"❌ Gagal mendapatkan respons halaman awal. Status: {initial_response.status_code}")
        extracted_year_month_options = [] # Ensure variable is defined on error
    else:
        print("✅ Respons halaman awal berhasil diterima.")
        initial_soup = BeautifulSoup(initial_response.text, 'html.parser')

        # Step 2: Extract year_month options from the form
        print("\n2. Extracting year_month options from the form...")
        year_month_select = initial_soup.find('select', {'name': 'year_month'})

        if year_month_select:
            options = year_month_select.find_all('option')
            year_month_values = [option['value'] for option in options if 'value' in option.attrs and option['value']]
            print(f"✅ Ditemukan {len(year_month_values)} opsi year_month:")
            print(year_month_values)
            extracted_year_month_options = year_month_values
        else:
            print("⚠️ Dropdown 'year_month' tidak ditemukan di halaman awal.")
            extracted_year_month_options = [] # Ensure variable is defined if not found

except requests.exceptions.RequestException as e:
    print(f"❌ Terjadi kesalahan koneksi saat mengambil halaman awal: {e}")
    extracted_year_month_options = [] # Ensure variable is defined on error


# Step 3 & 4: Iterate through options, make POST requests, and scrape data
all_capacity_data = [] # List to store data from all months

if extracted_year_month_options:
    print("\n3. Iterating through year_month options and fetching data...")
    for year_month in extracted_year_month_options:
        print(f"\n  > Fetching data for: {year_month}")

        # --- Construct Payload for current month ---
        current_payload = {
            'action': 'kapasitas',
            'id_site': '4', # Periksa id_site di tangkapan layar payload browser Anda (biasanya itu 4 | 8 | 7)
            'year_month': year_month # Use the current year_month from the loop
        }

        # --- Make POST Request to the AJAX endpoint ---
        try:
            response = requests.post(AJAX_TARGET_URL, data=current_payload, headers=headers, timeout=10)

            if response.status_code != 200:
                print(f"    ❌ Gagal mendapatkan respons untuk {year_month}. Status: {response.status_code}")
                # Optional: Print response text on error for debugging
                # print(f"    --- Respons Gagal (untuk debugging) untuk {year_month} ---")
                # print(response.text)
                # print("    ----------------------------------------------")
                continue # Skip to the next month if request fails

            # print(f"    ✅ Respons berhasil diterima untuk {year_month}.")
            # Optional: Print raw HTML response for each month for debugging
            # print("\n    --- Respons HTML Mentah (untuk debugging) ---")
            # print(response.text[:500]) # Print only first 500 chars
            # print("    ----------------------------------------------\n")


            # --- Scraping Data from Response ---
            soup = BeautifulSoup(response.text, 'html.parser')

            # The response from this endpoint likely contains just the HTML snippet of the table
            # Try finding the table directly first, then the container if needed
            table = soup.find('table')

            if table:
                print(f"    ✅ Tabel data ditemukan untuk {year_month}.")
                # Find all data rows in tbody
                tbody = table.find('tbody')
                if tbody:
                    rows = tbody.find_all('tr')

                    month_data = []
                    for row in rows:
                        columns = row.find_all('td')
                        # Ensure row has at least 2 columns (Tanggal and Status)
                        if len(columns) >= 2:
                            tanggal = columns[0].text.strip()
                            # Get text from second column, clean <span> tags and the number '1'
                            status_raw = columns[1].text.strip()
                            status = status_raw.replace('1', '').strip() # Clean number '1'
                            # Add Period info (Month/Year)
                            month_data.append({'Tanggal': tanggal, 'Status': status, 'Periode': year_month})

                    if month_data:
                        all_capacity_data.extend(month_data) # Add data to the main list
                        print(f"    ✅ Berhasil scraping {len(month_data)} data untuk {year_month}.")
                    else:
                         print(f"    ⚠️ Tabel ditemukan untuk {year_month}, namun tidak ada data baris yang valid di tbody.")
                else:
                     print(f"    ⚠️ tbody tidak ditemukan di dalam tabel untuk {year_month}.")
            else:
                # This case is expected if the server returns "Quota belum tersedia" or similar
                print(f"    ⚠️ Tabel data tidak ditemukan di respons untuk {year_month}. Mungkin kuota belum tersedia atau responsnya berbeda.")


        except requests.exceptions.RequestException as e:
            print(f"    ❌ Terjadi kesalahan koneksi saat mengambil data untuk {year_month}: {e}")
            continue # Continue to the next month on connection error

    print("\n✅ Selesai mengambil data untuk semua periode yang tersedia.")

else:
    print("\nTidak ada opsi year_month yang ditemukan. Tidak bisa melanjutkan.")

# Now all_capacity_data contains data from all successfully scraped months
# The next step (Step 5) is to process/display all_capacity_data, which will be in the next cell.

1. Fetching initial page HTML to find month/year options...
✅ Respons halaman awal berhasil diterima.

2. Extracting year_month options from the form...
✅ Ditemukan 7 opsi year_month:
['2025-09', '2025-10', '2025-11', '2025-12', '2026-01', '2026-02', '2026-03']

3. Iterating through year_month options and fetching data...

  > Fetching data for: 2025-09
    ✅ Tabel data ditemukan untuk 2025-09.
    ✅ Berhasil scraping 2 data untuk 2025-09.

  > Fetching data for: 2025-10
    ✅ Tabel data ditemukan untuk 2025-10.
    ✅ Berhasil scraping 14 data untuk 2025-10.

  > Fetching data for: 2025-11
    ⚠️ Tabel data tidak ditemukan di respons untuk 2025-11. Mungkin kuota belum tersedia atau responsnya berbeda.

  > Fetching data for: 2025-12
    ⚠️ Tabel data tidak ditemukan di respons untuk 2025-12. Mungkin kuota belum tersedia atau responsnya berbeda.

  > Fetching data for: 2026-01
    ⚠️ Tabel data tidak ditemukan di respons untuk 2026-01. Mungkin kuota belum tersedia atau responsnya berbed

In [34]:
# Step 5: Combine and process data
if all_capacity_data:
    # Convert the list of dictionaries into a pandas DataFrame for easier processing
    df_capacity = pd.DataFrame(all_capacity_data)

    print("\n--- Data Kapasitas Terkumpul ---")
    display(df_capacity)

    # Optional: Further processing or analysis can be done here
    # For example, checking the number of 'Kuota Penuh' days per month
    # full_days_per_month = df_capacity[df_capacity['Status'] == 'Kuota Penuh'].groupby('Periode').size().reset_index(name='Jumlah Kuota Penuh')
    # print("\nJumlah Hari 'Kuota Penuh' per Periode:")
    # display(full_days_per_month)

else:
    print("\nTidak ada data kapasitas yang berhasil dikumpulkan.")

# Step 6: Finish task - Data is displayed or ready for further use/saving.
print("\nTask finished. Data displayed above.")


--- Data Kapasitas Terkumpul ---


Unnamed: 0,Tanggal,Status,Periode
0,"Minggu, 28 September 2025",527,2025-09
1,"Senin, 29 September 2025",829,2025-09
2,"Kamis, 2 Oktober 2025",2727,2025-10
3,"Jum`at, 3 Oktober 2025",2685,2025-10
4,"Sabtu, 4 Oktober 2025",2673,2025-10
5,"Minggu, 5 Oktober 2025",2658,2025-10
6,"Senin, 6 Oktober 2025",278,2025-10
7,"Selasa, 7 Oktober 2025",2739,2025-10
8,"Rabu, 8 Oktober 2025",2738,2025-10
9,"Kamis, 9 Oktober 2025",2600,2025-10



Task finished. Data displayed above.
