In [4]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import re
import warnings 


def fetch_data_for_date(date: str):
    warnings.filterwarnings("ignore")
    """
    Fetch data for a given date from the page, with dynamic handling of form input fields.
    """
    url = "https://bildirim.epdk.gov.tr/bildirim-portal/faces/pages/tarife/petrol/yonetim/bultenSorgula.xhtml"

    # Step 1: Initial request to get the dynamic ViewState value
    initial_response = requests.get(url, verify=False)
    if initial_response.status_code == 200:
        # Parse the ViewState value from the HTML
        soup = BeautifulSoup(initial_response.text, 'lxml')
        view_state = soup.find("input", {"name": "javax.faces.ViewState"})["value"]
        
        # Step 2: Find the dynamic input field id (e.g., bultenKriterleriForm:j_idt35_input)
        input_field = soup.find("input", {"type": "text", "id": re.compile(r"bultenKriterleriForm:j_idt\d+")})
        if input_field:
            dynamic_id = input_field["id"]
        else:
            return None
    else:
        return None

    payload = {
        "javax.faces.partial.ajax": "true",
        "javax.faces.partial.execute": "@all",
        "javax.faces.partial.render": "bultenSorguSonucu messages",
        "bultenKriterleriForm:j_idt35": "bultenKriterleriForm:j_idt35",
        "bultenKriterleriForm": "bultenKriterleriForm",
        "javax.faces.ViewState": view_state,
    }

    
    if date == date:  # Static condition for a specific date
        payload["bultenKriterleriForm:j_idt30_input"] = date
    else:
        dynamic_field_id = dynamic_id.split(":")[1]  # Extract the dynamic part like j_idt35
        payload[f"bultenKriterleriForm:{dynamic_field_id}"] = date  # Dynamically set the correct field ID

    headers = {
        "Content-Type": "application/x-www-form-urlencoded",
    }

    # Step 4: Send the POST request with the dynamic payload
    response = requests.post(url, data=payload, headers=headers, verify=False)

    # Step 5: Process the response
    if response.status_code == 200:
        print("Data successfully retrieved!")
        soup = BeautifulSoup(response.text, 'lxml')
        try:
            dfs = pd.read_html(response.text)
            
            if dfs:
                print(f"Extracted {len(dfs)} tables.")
                
                
                for i, df in enumerate(dfs):
                   
                    df.columns = df.iloc[0]  
                    df = df.drop(0)  

        
                    if list(df.columns) == ["Yakıt Tipi", "Fiyat", "Birim"]:
                        print(f"Table {i} contains the required columns!")
                        
                       
                        df["Fiyat"] = pd.to_numeric(df["Fiyat"], errors="coerce") 
                        df = df.dropna(subset=["Fiyat"])

                        return df
                        break
                else:
                    print("No table contains the exact required columns.")
            else:
                print("No tables found in the response.")
        except ValueError as e:
            print(f"Error reading tables: {e}")
    else:
        print(f"Failed to retrieve data. Status code: {response.status_code}")



df=fetch_data_for_date("09.02.2017")


Data successfully retrieved!
Extracted 6 tables.
Table 4 contains the required columns!


In [5]:
df

Unnamed: 0,Yakıt Tipi,Fiyat,Birim
1,Kurşunsuz Benzin 95 Oktan,5.38916,Litre
2,Kurşunsuz Benzin 95 Oktan (Diğer),5.39798,Litre
3,Motorin,4.63584,Litre
4,Motorin (Biodizel ihtiva eden),4.64333,Litre
5,Motorin (Diğer),4.65574,Litre
6,Kalorifer Yakıtı (Kükürt Oranı %0.1'i geçen an...,2.77225,Kilogram
7,Fuel Oil (Kükürt Oranı %0.1'i geçen ancak %1'i...,2.57792,Kilogram
8,Yüksek Kükürtlü Fuel Oil (Kükürt oranı %1'i ge...,2.262,Kilogram
9,Gazyağı,3.98712,Litre
