In [2]:
# the library comes built-in with colab
import requests

url = "http://www.google.com"
# making a GET request
res = requests.get(url)

# success code
res.status_code # 200 is okay

# returns the HTML format of the search page
res.content

200

In [63]:
import os
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

# r for python to treat it as raw string : not interpreting /
# Choose Chrome Browse
driver = webdriver.Chrome(service=Service(r"C:\Users\Lenovo\Downloads\chromedriver-win64\chromedriver.exe"))

In [66]:
# open the target website
driver.get("https://data.tuik.gov.tr/Kategori/GetKategori?p=Enflasyon-ve-Fiyat-106")  # Replace with the actual URL

# wait for the page to load
time.sleep(5) 

# locate and click the "Tüketici Fiyat Endeksi (TÜFE)" checkbox using its ID
tufe_checkbox = driver.find_element(By.ID, "subCategory_1014")
tufe_checkbox.click()  # Simulate a click to select the checkbox

# list of years to select
years = ["2024", "2023", "2022"]

# loop through each year and select the corresponding checkbox
for year in years:
    # locate the checkbox by its value
    year_checkbox = driver.find_element(By.XPATH, f"//input[@type='checkbox' and @value='{year}']")
    if not year_checkbox.is_selected():  # it isn't already selected
        year_checkbox.click() 
        print(f"Selected year: {year}")

time.sleep(5)   

# locate the button using its class name
search_button = driver.find_element(By.CLASS_NAME, "btn-outline-primary")
search_button.click()

time.sleep(5)   

title_list = [];

title_list = collect_pages(driver, title_list)
get_titles(title_list)

# close the WebDriver
driver.quit()

Selected year: 2024
Selected year: 2023
Selected year: 2022
Processing Page 1...
Processing Page 2...
Processing Page 3...
Row 1:
Title: Tüketici fiyat endeksi (TÜFE) yıllık %47,09, aylık %2,24 arttı
Link: Tüketici Fiyat Endeksi, Kasım 2024
--------------------------------------------------
Row 2:
Title: Tüketici fiyat endeksi (TÜFE) yıllık %48,58, aylık %2,88 arttı
Link: Tüketici Fiyat Endeksi, Ekim 2024
--------------------------------------------------
Row 3:
Title: Tüketici fiyat endeksi (TÜFE) yıllık %49,38, aylık %2,97 arttı
Link: Tüketici Fiyat Endeksi, Eylül 2024
--------------------------------------------------
Row 4:
Title: Tüketici fiyat endeksi (TÜFE) yıllık %51,97, aylık %2,47 arttı
Link: Tüketici Fiyat Endeksi, Ağustos 2024
--------------------------------------------------
Row 5:
Title: Tüketici fiyat endeksi (TÜFE) yıllık %61,78, aylık %3,23 arttı
Link: Tüketici Fiyat Endeksi, Temmuz 2024
--------------------------------------------------
Row 6:
Title: Tüketici fiyat e

In [67]:
def collect_pages(driver, title_list):
    # iterate through pages
    for page_num in range(1, 4):
        print(f"Processing Page {page_num}...")

        try:
            # wait for the page content to load
            WebDriverWait(driver, 10).until(
                EC.presence_of_all_elements_located((By.ID, "bultenTable"))
            )

            # there were two rows needed to get into : first is the header of the search list
            table = driver.find_element(By.ID, "bultenTable")

            # locate all <tr> elements inside the <tbody> of the table
            table_rows = table.find_elements(By.XPATH, ".//tbody/tr")
            
            # append
            for row in table_rows:
                try:
                    # locate the title within the row
                    title_element = row.find_element(By.CLASS_NAME, "text-secondary")
                    title_text = title_element.text

                    # optionally, locate a specific link within the row
                    link_element = row.find_element(By.TAG_NAME, "a")
                    link_text = link_element.text

                    # append extracted data to title_list
                    title_list.append({
                        "title": title_text,
                        "link": link_text
                    })
                except Exception as e:
                    print(f"Error extracting data from row: {e}")
                    
                    
            # "Next" button if not on the last page
            if page_num < 3:
                next_button = driver.find_element(By.LINK_TEXT, "Sonraki")
                next_button.click()

                time.sleep(2)  # Or use explicit wait as necessary

        except Exception as e:
            print(f"Error on Page {page_num}: {e}")
    return title_list


In [65]:
def get_titles(title_list):   
    for index, data in enumerate(title_list, start=1):
        print(f"Row {index}:")
        print(f"Title: {data['title']}")
        print(f"Link: {data['link']}")
        print("-" * 50)


In [78]:
import csv
import re

output_file = "monthly_inflation_rates_list_2022_to_2024"

# function to extract annual and monthly rates
def extract_rates(text):
    match = re.search(r"yıllık %([\d.,]+).*?aylık %([\d.,]+)", text)
    if match:
        annual_rate = match.group(1).replace(",", ".")  # Convert to a float-compatible string
        monthly_rate = match.group(2).replace(",", ".")  # Convert to a float-compatible string
        return annual_rate, monthly_rate
    return None, None


def extract_date(text):
    # Match "Month Year" pattern at the end of the text
    # Regex pattern
    """
    ,\s*: Matches a comma followed by optional whitespace.
([A-Za-zÇçĞğİıÖöŞşÜü\s]+): Matches the month name (including Turkish characters).
(\d{4}): Matches the 4-digit year.
$: Ensures the match is at the end of the string
    """
    match = re.search(r",\s*([A-Za-zÇçĞğİıÖöŞşÜü\s]+)\s(\d{4})$", text)
    if match:
        month = match.group(1).strip()  # Extract the month
        year = match.group(2).strip()  # Extract the year
        return f"{month}-{year}"
    return None  # Return None if no match is found

# write to CSV
with open(output_file, mode="w", encoding="utf-8", newline="") as file:
    writer = csv.writer(file)
    # write header
    writer.writerow(["Annual Inflation Rate", "Monthly Inflation Rate", "Date"])

    for item in title_list:
        title = item.get("title", "")
        link = item.get("link", "")

        # extract rates
        annual_rate, monthly_rate = extract_rates(title)
        date = extract_date(link)

        if annual_rate and monthly_rate:  # Only include valid rows
            # as "month_name-year: title, link"
            row = [
                annual_rate,
                monthly_rate,
                date
            ]
            writer.writerow(row)

print(f"Data successfully written to {output_file}")

Data successfully written to monthly_inflation_rates_list_2022_to_2024
