# The notebook from which I extracted the data.

Maltepe Links with vendor_id

In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
import time
import csv
import re

# Chrome settings
chrome_options = Options()
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                            "AppleWebKit/537.36 (KHTML, like Gecko) "
                            "Chrome/127.0.0.0 Safari/537.36")
# chrome_options.add_argument("--headless")

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)

url = "https://www.yemeksepeti.com/restaurants/new?lng=29.1306017790985&lat=40.92470845978729&vertical=restaurants&cuisines=1090"
driver.get(url)
time.sleep(7)

print("Sayfa yüklendi, restoranlar toplanıyor...")

# Scroll simulation
scroll_pause = 4
scroll_step = 1000
max_scrolls = 120

last_height = driver.execute_script("return document.body.scrollHeight")
scrolls_done = 0

while scrolls_done < max_scrolls:
    driver.execute_script(f"window.scrollBy(0, {scroll_step});")
    time.sleep(scroll_pause)
    new_height = driver.execute_script("return document.body.scrollHeight")

    if new_height == last_height:
        print("Sayfanın sonuna ulaşıldı ✅")
        break
    last_height = new_height
    scrolls_done += 1
    print(f"{scrolls_done}. scroll tamamlandı...")

print("Tüm restoran kartları yüklendi, veriler çekiliyor...")
time.sleep(2)

# Find restaurant cards
cards = driver.find_elements(By.CSS_SELECTOR, 'a[data-testid^="vendor-tile-new-link-"]')
print(f"{len(cards)} restoran bulundu.\n")

# CSV file
filename = "../data/raw/links/maltepe_links/maltepe_waffle_restaurants.csv"
headers = [
    "vendor_id", "restaurant_name", "link", "img", "rating", "review_count",
    "price_range", "min_order", "cuisine_type",
    "delivery_time", "delivery_type"
]

with open(filename, "w", newline="", encoding="utf-8") as f:
    writer = csv.writer(f)
    writer.writerow(headers)

# Get the information from each restaurant card.
for i, card in enumerate(cards, 1):
    try:
        name = card.find_element(By.CSS_SELECTOR, "div.vendor-name").get_attribute("title")
        link = card.get_attribute("href")
        img = ""
        rating = ""
        review_count = ""
        price_range = ""
        min_order = ""
        cuisine_type = ""
        delivery_time = ""
        delivery_type = ""
        vendor_id = ""

        # Get the vendor ID from the link.
        try:
            match = re.search(r"/restaurant/([^/]+)/", link)
            if match:
                vendor_id = match.group(1)
        except:
            pass

        # Image
        try:
            img = card.find_element(By.CSS_SELECTOR, "img").get_attribute("src")
        except:
            pass

        # Rating and review
        try:
            rating = card.find_element(By.CSS_SELECTOR, ".bds-c-rating__label-primary").text
        except:
            pass
        try:
            review_count = card.find_element(By.CSS_SELECTOR, ".bds-c-rating__label-secondary").text
        except:
            pass

        # Information lines
        info_items = card.find_elements(By.CSS_SELECTOR, ".vendor-info-row-text .sanitized-row-text")
        if len(info_items) > 0:
            price_range = info_items[0].text if len(info_items) > 0 else ""
            min_order = info_items[1].text if len(info_items) > 1 else ""
            cuisine_type = info_items[2].text if len(info_items) > 2 else ""
            delivery_time = info_items[3].text if len(info_items) > 3 else ""
            delivery_type = info_items[4].text if len(info_items) > 4 else ""

        row = [
            vendor_id, name, link, img, rating, review_count,
            price_range, min_order, cuisine_type, delivery_time, delivery_type
        ]

        with open(filename, "a", newline="", encoding="utf-8") as f:
            writer = csv.writer(f)
            writer.writerow(row)

        print(f"[{i}/{len(cards)}] Kaydedildi: {name} ({vendor_id})")

    except Exception as e:
        print(f"Hata oluştu: {e}")

print(f"\n✅ Tüm {len(cards)} restoran bilgileri {filename} dosyasına kaydedildi.")
driver.quit()

Sayfa yüklendi, restoranlar toplanıyor...
Sayfanın sonuna ulaşıldı ✅
Tüm restoran kartları yüklendi, veriler çekiliyor...
43 restoran bulundu.

[1/43] Kaydedildi: Pwb Peanut Waffle Belgium (trbe)
[2/43] Kaydedildi: Ruby Patisserie (dwle)
[3/43] Kaydedildi: Always Waffle (a8yy)
[4/43] Kaydedildi: Waffpir Waffle & Kumpir (zs1i)
[5/43] Kaydedildi: Keyf-i Kumpir & Waffle (raoo)
[6/43] Kaydedildi: Ortaköylü Kumpir Waffle (h7ph)
[7/43] Kaydedildi: Mina Waffle (ut4g)
[8/43] Kaydedildi: Waffee Waffle (wu45)
[9/43] Kaydedildi: Peanut Waffle Belgium (u0zq)
[10/43] Kaydedildi: Lotus Croissant & Breakfast (eil1)
[11/43] Kaydedildi: Bitez Dondurma Waffle (v65d)
[12/43] Kaydedildi: Peanut Waffle Belgium (mrtr)
[13/43] Kaydedildi: Shuffle Waffle Belgium (ljgb)
[14/43] Kaydedildi: Granny's Waffles & Kumpir (daa5)
[15/43] Kaydedildi: Waffle Time (denz)
[16/43] Kaydedildi: 112 Kumpir & Waffle (qlsq)
[17/43] Kaydedildi: Delice Kumpir & Waffle Dünyası (ch2f)
[18/43] Kaydedildi: Maltepe Tostçusu Waffle & M

Beyoğlu Links with vendor_id

In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
import time
import csv
import re

# Chrome settings
chrome_options = Options()
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                            "AppleWebKit/537.36 (KHTML, like Gecko) "
                            "Chrome/127.0.0.0 Safari/537.36")
# chrome_options.add_argument("--headless")

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)

url = "https://www.yemeksepeti.com/restaurants/new?lng=28.9672834105591&lat=41.03933841010486&vertical=restaurants&cuisines=1090"
driver.get(url)
time.sleep(7)

print("Sayfa yüklendi, restoranlar toplanıyor...")

# Scroll simulation
scroll_pause = 4
scroll_step = 1000
max_scrolls = 120

last_height = driver.execute_script("return document.body.scrollHeight")
scrolls_done = 0

while scrolls_done < max_scrolls:
    driver.execute_script(f"window.scrollBy(0, {scroll_step});")
    time.sleep(scroll_pause)
    new_height = driver.execute_script("return document.body.scrollHeight")

    if new_height == last_height:
        print("Sayfanın sonuna ulaşıldı ✅")
        break
    last_height = new_height
    scrolls_done += 1
    print(f"{scrolls_done}. scroll tamamlandı...")

print("Tüm restoran kartları yüklendi, veriler çekiliyor...")
time.sleep(2)

# Find restaurant cards
cards = driver.find_elements(By.CSS_SELECTOR, 'a[data-testid^="vendor-tile-new-link-"]')
print(f"{len(cards)} restoran bulundu.\n")

# CSV file
filename = "../data/raw/links/beyoglu_links/beyoglu_waffle_restaurants.csv"
headers = [
    "vendor_id", "restaurant_name", "link", "img", "rating", "review_count",
    "price_range", "min_order", "cuisine_type",
    "delivery_time", "delivery_type"
]

with open(filename, "w", newline="", encoding="utf-8") as f:
    writer = csv.writer(f)
    writer.writerow(headers)

# Get the information from each restaurant card.
for i, card in enumerate(cards, 1):
    try:
        name = card.find_element(By.CSS_SELECTOR, "div.vendor-name").get_attribute("title")
        link = card.get_attribute("href")
        img = ""
        rating = ""
        review_count = ""
        price_range = ""
        min_order = ""
        cuisine_type = ""
        delivery_time = ""
        delivery_type = ""
        vendor_id = ""

        # Vendor ID
        try:
            match = re.search(r"/restaurant/([^/]+)/", link)
            if match:
                vendor_id = match.group(1)
        except:
            pass

        # Image
        try:
            img = card.find_element(By.CSS_SELECTOR, "img").get_attribute("src")
        except:
            pass

        # Rating and review
        try:
            rating = card.find_element(By.CSS_SELECTOR, ".bds-c-rating__label-primary").text
        except:
            pass
        try:
            review_count = card.find_element(By.CSS_SELECTOR, ".bds-c-rating__label-secondary").text
        except:
            pass

        # Information lines
        info_items = card.find_elements(By.CSS_SELECTOR, ".vendor-info-row-text .sanitized-row-text")
        if len(info_items) > 0:
            price_range = info_items[0].text if len(info_items) > 0 else ""
            min_order = info_items[1].text if len(info_items) > 1 else ""
            cuisine_type = info_items[2].text if len(info_items) > 2 else ""
            delivery_time = info_items[3].text if len(info_items) > 3 else ""
            delivery_type = info_items[4].text if len(info_items) > 4 else ""

        row = [
            vendor_id, name, link, img, rating, review_count,
            price_range, min_order, cuisine_type, delivery_time, delivery_type
        ]

        with open(filename, "a", newline="", encoding="utf-8") as f:
            writer = csv.writer(f)
            writer.writerow(row)

        print(f"[{i}/{len(cards)}] Kaydedildi: {name} ({vendor_id})")

    except Exception as e:
        print(f"Hata oluştu: {e}")

print(f"\n✅ Tüm {len(cards)} restoran bilgileri {filename} dosyasına kaydedildi.")
driver.quit()

Sayfa yüklendi, restoranlar toplanıyor...
Sayfanın sonuna ulaşıldı ✅
Tüm restoran kartları yüklendi, veriler çekiliyor...
18 restoran bulundu.

[1/18] Kaydedildi: Ziyafet Tavuklu Pilav (at6y)
[2/18] Kaydedildi: Chocolate Waffle (kvow)
[3/18] Kaydedildi: Always Waffle (h177)
[4/18] Kaydedildi: Wowmix (eu3g)
[5/18] Kaydedildi: Pwb Peanut Waffle Belgium (mu9g)
[6/18] Kaydedildi: Happy Waffle (r4f0)
[7/18] Kaydedildi: Wafflecım 777 (xqsr)
[8/18] Kaydedildi: Lale Waffle & Kumpir (zzsd)
[9/18] Kaydedildi: Safiş Kumpir & Waffle (zzpu)
[10/18] Kaydedildi: Crispy Waffle (ev51)
[11/18] Kaydedildi: Bunny Waffle (si93)
[12/18] Kaydedildi: Waffle House Special (kuvc)
[13/18] Kaydedildi: One Sweet (wrj2)
[14/18] Kaydedildi: A Love Waffle (rb2f)
[15/18] Kaydedildi: Nuray Waffle (ofru)
[16/18] Kaydedildi: Waffle Store (l0oq)
[17/18] Kaydedildi: !Classo Dondurma & Waffle (byr7)
[18/18] Kaydedildi: Granny's Waffles & Kumpir (kz3x)

✅ Tüm 18 restoran bilgileri ../data/raw/beyoglu_linkler/beyoglu_waffle_r

Beşiktaş Links with vendor_id

In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
import time
import csv
import re

# Chrome settings
chrome_options = Options()
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                            "AppleWebKit/537.36 (KHTML, like Gecko) "
                            "Chrome/127.0.0.0 Safari/537.36")
# chrome_options.add_argument("--headless")

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)

url = "https://www.yemeksepeti.com/restaurants/new?lng=29.02681888623049&lat=41.068357160446595&vertical=restaurants&cuisines=1090"
driver.get(url)
time.sleep(7)

print("Sayfa yüklendi, restoranlar toplanıyor...")

# Scroll simulation
scroll_pause = 4
scroll_step = 1000
max_scrolls = 120

last_height = driver.execute_script("return document.body.scrollHeight")
scrolls_done = 0

while scrolls_done < max_scrolls:
    driver.execute_script(f"window.scrollBy(0, {scroll_step});")
    time.sleep(scroll_pause)
    new_height = driver.execute_script("return document.body.scrollHeight")

    if new_height == last_height:
        print("Sayfanın sonuna ulaşıldı ✅")
        break
    last_height = new_height
    scrolls_done += 1
    print(f"{scrolls_done}. scroll tamamlandı...")

print("Tüm restoran kartları yüklendi, veriler çekiliyor...")
time.sleep(2)

# Find restaurant cards
cards = driver.find_elements(By.CSS_SELECTOR, 'a[data-testid^="vendor-tile-new-link-"]')
print(f"{len(cards)} restoran bulundu.\n")

# CSV file
filename = "../data/raw/links/besiktas_links/besiktas_waffle_restaurants.csv"
headers = [
    "vendor_id", "restaurant_name", "link", "img", "rating", "review_count",
    "price_range", "min_order", "cuisine_type",
    "delivery_time", "delivery_type"
]

with open(filename, "w", newline="", encoding="utf-8") as f:
    writer = csv.writer(f)
    writer.writerow(headers)

# Get the information from each restaurant card.
for i, card in enumerate(cards, 1):
    try:
        name = card.find_element(By.CSS_SELECTOR, "div.vendor-name").get_attribute("title")
        link = card.get_attribute("href")
        img = ""
        rating = ""
        review_count = ""
        price_range = ""
        min_order = ""
        cuisine_type = ""
        delivery_time = ""
        delivery_type = ""
        vendor_id = ""

        # Vendor ID
        try:
            match = re.search(r"/restaurant/([^/]+)/", link)
            if match:
                vendor_id = match.group(1)
        except:
            pass

        # Image
        try:
            img = card.find_element(By.CSS_SELECTOR, "img").get_attribute("src")
        except:
            pass

        # Rating and review
        try:
            rating = card.find_element(By.CSS_SELECTOR, ".bds-c-rating__label-primary").text
        except:
            pass
        try:
            review_count = card.find_element(By.CSS_SELECTOR, ".bds-c-rating__label-secondary").text
        except:
            pass

        # Information lines
        info_items = card.find_elements(By.CSS_SELECTOR, ".vendor-info-row-text .sanitized-row-text")
        if len(info_items) > 0:
            price_range = info_items[0].text if len(info_items) > 0 else ""
            min_order = info_items[1].text if len(info_items) > 1 else ""
            cuisine_type = info_items[2].text if len(info_items) > 2 else ""
            delivery_time = info_items[3].text if len(info_items) > 3 else ""
            delivery_type = info_items[4].text if len(info_items) > 4 else ""

        row = [
            vendor_id, name, link, img, rating, review_count,
            price_range, min_order, cuisine_type, delivery_time, delivery_type
        ]

        with open(filename, "a", newline="", encoding="utf-8") as f:
            writer = csv.writer(f)
            writer.writerow(row)

        print(f"[{i}/{len(cards)}] Kaydedildi: {name} ({vendor_id})")

    except Exception as e:
        print(f"Hata oluştu: {e}")

print(f"\n✅ Tüm {len(cards)} restoran bilgileri {filename} dosyasına kaydedildi.")
driver.quit()

Sayfa yüklendi, restoranlar toplanıyor...
Sayfanın sonuna ulaşıldı ✅
Tüm restoran kartları yüklendi, veriler çekiliyor...
18 restoran bulundu.

[1/18] Kaydedildi: Peanut Waffle Belgium (t00u)
[2/18] Kaydedildi: Always Waffle (h177)
[3/18] Kaydedildi: Always Waffle (n6eg)
[4/18] Kaydedildi: Bebek Susam Waffle & Kumpir (sajr)
[5/18] Kaydedildi: Kanyon Waffle (w4dx)
[6/18] Kaydedildi: Granny's Waffles & Kumpir (ad13)
[7/18] Kaydedildi: Bebek Susam Waffle & Kumpir (achj)
[8/18] Kaydedildi: Crispy Waffle (ev51)
[9/18] Kaydedildi: Anka Waffle (eo2f)
[10/18] Kaydedildi: One Sweet (wrj2)
[11/18] Kaydedildi: Asia Waffle (g1hb)
[12/18] Kaydedildi: Favori Waffle (xixg)
[13/18] Kaydedildi: Bunny Waffle (si93)
[14/18] Kaydedildi: A Love Waffle (rb2f)
[15/18] Kaydedildi: Etiler Waffle (afsl)
[16/18] Kaydedildi: Balkon Waffle (u189)
[17/18] Kaydedildi: Gossip Waffle (nua4)
[18/18] Kaydedildi: Müpto Waffle (a99h)

✅ Tüm 18 restoran bilgileri ../data/raw/links/besiktas_linkler/besiktas_waffle_restoranl

merge links

In [None]:
import pandas as pd
import os

csv_files = [
    ("../data/raw/links/maltepe_links/maltepe_balik_restaurants.csv", "Maltepe"),
    ("../data/raw/links/maltepe_links/maltepe_burger_restaurants.csv", "Maltepe"),
    ("../data/raw/links/maltepe_links/maltepe_cigkofte_restaurants.csv", "Maltepe"),
    ("../data/raw/links/maltepe_links/maltepe_dondurma_restaurants.csv", "Maltepe"),
    ("../data/raw/links/maltepe_links/maltepe_doner_restaurants.csv", "Maltepe"),
    ("../data/raw/links/maltepe_links/maltepe_dunyamutfagi_restaurants.csv", "Maltepe"),
    ("../data/raw/links/maltepe_links/maltepe_evyemekleri_restaurants.csv", "Maltepe"),
    ("../data/raw/links/maltepe_links/maltepe_kahvalti_restaurants.csv", "Maltepe"),
    ("../data/raw/links/maltepe_links/maltepe_kahve_restaurants.csv", "Maltepe"),
    ("../data/raw/links/maltepe_links/maltepe_kebap_restaurants.csv", "Maltepe"),
    ("../data/raw/links/maltepe_links/maltepe_kofte_restaurants.csv", "Maltepe"),
    ("../data/raw/links/maltepe_links/maltepe_kokorec_restaurants.csv", "Maltepe"),
    ("../data/raw/links/maltepe_links/maltepe_kumpir_restaurants.csv", "Maltepe"),
    ("../data/raw/links/maltepe_links/maltepe_makarna_restaurants.csv", "Maltepe"),
    ("../data/raw/links/maltepe_links/maltepe_manti_restaurants.csv", "Maltepe"),
    ("../data/raw/links/maltepe_links/maltepe_meze_restaurants.csv", "Maltepe"),
    ("../data/raw/links/maltepe_links/maltepe_pastane_restaurants.csv", "Maltepe"),
    ("../data/raw/links/maltepe_links/maltepe_pide_restaurants.csv", "Maltepe"),
    ("../data/raw/links/maltepe_links/maltepe_pilav_restaurants.csv", "Maltepe"),
    ("../data/raw/links/maltepe_links/maltepe_pizza_restaurants.csv", "Maltepe"),
    ("../data/raw/links/maltepe_links/maltepe_salata_restaurants.csv", "Maltepe"),
    ("../data/raw/links/maltepe_links/maltepe_steak_restaurants.csv", "Maltepe"),
    ("../data/raw/links/maltepe_links/maltepe_tantuni_restaurants.csv", "Maltepe"),
    ("../data/raw/links/maltepe_links/maltepe_tatli_restaurants.csv", "Maltepe"),
    ("../data/raw/links/maltepe_links/maltepe_tavuk_restaurants.csv", "Maltepe"),
    ("../data/raw/links/maltepe_links/maltepe_tost_restaurants.csv", "Maltepe"),
    ("../data/raw/links/maltepe_links/maltepe_uzakdogu_restaurants.csv", "Maltepe"),
    ("../data/raw/links/maltepe_links/maltepe_vejetaryen_restaurants.csv", "Maltepe"),
    ("../data/raw/links/maltepe_links/maltepe_waffle_restaurants.csv", "Maltepe"),
    ("../data/raw/links/besiktas_links/besiktas_balik_restaurants.csv", "Beşiktaş"),
    ("../data/raw/links/besiktas_links/besiktas_burger_restaurants.csv", "Beşiktaş"),
    ("../data/raw/links/besiktas_links/besiktas_cigkofte_restaurants.csv", "Beşiktaş"),
    ("../data/raw/links/besiktas_links/besiktas_dondurma_restaurants.csv", "Beşiktaş"),
    ("../data/raw/links/besiktas_links/besiktas_doner_restaurants.csv", "Beşiktaş"),
    ("../data/raw/links/besiktas_links/besiktas_dunyamutfagi_restaurants.csv", "Beşiktaş"),
    ("../data/raw/links/besiktas_links/besiktas_evyemekleri_restaurants.csv", "Beşiktaş"),
    ("../data/raw/links/besiktas_links/besiktas_kahvalti_restaurants.csv", "Beşiktaş"),
    ("../data/raw/links/besiktas_links/besiktas_kahve_restaurants.csv", "Beşiktaş"),
    ("../data/raw/links/besiktas_links/besiktas_kebap_restaurants.csv", "Beşiktaş"),
    ("../data/raw/links/besiktas_links/besiktas_kofte_restaurants.csv", "Beşiktaş"),
    ("../data/raw/links/besiktas_links/besiktas_kokorec_restaurants.csv", "Beşiktaş"),
    ("../data/raw/links/besiktas_links/besiktas_kumpir_restaurants.csv", "Beşiktaş"),
    ("../data/raw/links/besiktas_links/besiktas_makarna_restaurants.csv", "Beşiktaş"),
    ("../data/raw/links/besiktas_links/besiktas_manti_restaurants.csv", "Beşiktaş"),
    ("../data/raw/links/besiktas_links/besiktas_meze_restaurants.csv", "Beşiktaş"),
    ("../data/raw/links/besiktas_links/besiktas_pastane_restaurants.csv", "Beşiktaş"),
    ("../data/raw/links/besiktas_links/besiktas_pide_restaurants.csv", "Beşiktaş"),
    ("../data/raw/links/besiktas_links/besiktas_pilav_restaurants.csv", "Beşiktaş"),
    ("../data/raw/links/besiktas_links/besiktas_pizza_restaurants.csv", "Beşiktaş"),
    ("../data/raw/links/besiktas_links/besiktas_salata_restaurants.csv", "Beşiktaş"),
    ("../data/raw/links/besiktas_links/besiktas_steak_restaurants.csv", "Beşiktaş"),
    ("../data/raw/links/besiktas_links/besiktas_tantuni_restaurants.csv", "Beşiktaş"),
    ("../data/raw/links/besiktas_links/besiktas_tatli_restaurants.csv", "Beşiktaş"),
    ("../data/raw/links/besiktas_links/besiktas_tavuk_restaurants.csv", "Beşiktaş"),
    ("../data/raw/links/besiktas_links/besiktas_tost_restaurants.csv", "Beşiktaş"),
    ("../data/raw/links/besiktas_links/besiktas_uzakdogu_restaurants.csv", "Beşiktaş"),
    ("../data/raw/links/besiktas_links/besiktas_vejetaryen_restaurants.csv", "Beşiktaş"),
    ("../data/raw/links/besiktas_links/besiktas_waffle_restaurants.csv", "Beşiktaş"),
    ("../data/raw/links/beyoglu_links/beyoglu_balik_restaurants.csv", "Beyoğlu"),
    ("../data/raw/links/beyoglu_links/beyoglu_burger_restaurants.csv", "Beyoğlu"),
    ("../data/raw/links/beyoglu_links/beyoglu_cigkofte_restaurants.csv", "Beyoğlu"),
    ("../data/raw/links/beyoglu_links/beyoglu_dondurma_restaurants.csv", "Beyoğlu"),
    ("../data/raw/links/beyoglu_links/beyoglu_doner_restaurants.csv", "Beyoğlu"),
    ("../data/raw/links/beyoglu_links/beyoglu_dunyamutfagi_restaurants.csv", "Beyoğlu"),
    ("../data/raw/links/beyoglu_links/beyoglu_evyemekleri_restaurants.csv", "Beyoğlu"),
    ("../data/raw/links/beyoglu_links/beyoglu_kahvalti_restaurants.csv", "Beyoğlu"),
    ("../data/raw/links/beyoglu_links/beyoglu_kahve_restaurants.csv", "Beyoğlu"),
    ("../data/raw/links/beyoglu_links/beyoglu_kebap_restaurants.csv", "Beyoğlu"),
    ("../data/raw/links/beyoglu_links/beyoglu_kofte_restaurants.csv", "Beyoğlu"),
    ("../data/raw/links/beyoglu_links/beyoglu_kokorec_restaurants.csv", "Beyoğlu"),
    ("../data/raw/links/beyoglu_links/beyoglu_kumpir_restaurants.csv", "Beyoğlu"),
    ("../data/raw/links/beyoglu_links/beyoglu_makarna_restaurants.csv", "Beyoğlu"),
    ("../data/raw/links/beyoglu_links/beyoglu_manti_restaurants.csv", "Beyoğlu"),
    ("../data/raw/links/beyoglu_links/beyoglu_meze_restaurants.csv", "Beyoğlu"),
    ("../data/raw/links/beyoglu_links/beyoglu_pastane_restaurants.csv", "Beyoğlu"),
    ("../data/raw/links/beyoglu_links/beyoglu_pide_restaurants.csv", "Beyoğlu"),
    ("../data/raw/links/beyoglu_links/beyoglu_pilav_restaurants.csv", "Beyoğlu"),
    ("../data/raw/links/beyoglu_links/beyoglu_pizza_restaurants.csv", "Beyoğlu"),
    ("../data/raw/links/beyoglu_links/beyoglu_salata_restaurants.csv", "Beyoğlu"),
    ("../data/raw/links/beyoglu_links/beyoglu_steak_restaurants.csv", "Beyoğlu"),
    ("../data/raw/links/beyoglu_links/beyoglu_tantuni_restaurants.csv", "Beyoğlu"),
    ("../data/raw/links/beyoglu_links/beyoglu_tatli_restaurants.csv", "Beyoğlu"),
    ("../data/raw/links/beyoglu_links/beyoglu_tavuk_restaurants.csv", "Beyoğlu"),
    ("../data/raw/links/beyoglu_links/beyoglu_tost_restaurants.csv", "Beyoğlu"),
    ("../data/raw/links/beyoglu_links/beyoglu_uzakdogu_restaurants.csv", "Beyoğlu"),
    ("../data/raw/links/beyoglu_links/beyoglu_vejetaryen_restaurants.csv", "Beyoğlu"),
    ("../data/raw/links/beyoglu_links/beyoglu_waffle_restaurants.csv", "Beyoğlu")
]

dfs = []
for file, district in csv_files:
    if os.path.exists(file):
        df = pd.read_csv(file)
        df['District'] = district  
        dfs.append(df)
    else:
        print(f"Dosya bulunamadı: {file}")

merged = pd.concat(dfs, ignore_index=True)

merged.to_csv("../data/raw/restaurant_links.csv", index=False, encoding="utf-8")

print("✅ Dosyalar başarıyla birleştirildi -> restaurant_links.csv")


✅ Dosyalar başarıyla birleştirildi -> restaurant_links.csv


collecting comments

In [None]:
# comments from low to high ratings
import pandas as pd
import requests
import csv
import time
import random
import os
import urllib.parse

INPUT_CSV = "../data/raw/links/restaurant_links.csv"
OUTPUT_CSV = "../data/raw/reviews/comments_from_low_to_high_ratings.csv"

df = pd.read_csv(INPUT_CSV)

# Restaurant columns in CSV
restaurant_columns = df.columns.tolist()

# New columns for reviews
restaurant_columns_extended = (
    restaurant_columns
    + ["restaurant_slug"]
)

review_columns = [
    "uuid", "reviewer_name", "reviewer_id",
    "overall_score", "restaurant_food_score", "rider_score",
    "text", "created_at", "like_count", "replies_count", "product_names"
]

# Header
if not os.path.exists(OUTPUT_CSV):
    with open(OUTPUT_CSV, "w", newline="", encoding="utf-8-sig") as f:
        writer = csv.writer(f)
        writer.writerow(restaurant_columns_extended + review_columns)

records = df.to_dict(orient="records")

for i, restaurant in enumerate(records, 1):
    link = restaurant["link"]

    try:
        # Slug name from the link
        parts = link.strip("/").split("/")
        if "restaurant" in parts:
            idx = parts.index("restaurant")
            vendor_id = parts[idx + 1]
            restaurant_slug = parts[idx + 2]
        else:
            vendor_id = parts[-2]
            restaurant_slug = parts[-1]

        # The Turkish name in the CSV file is PRESERVED.
        restaurant["restaurant_slug"] = restaurant_slug
        restaurant["vendor_id"] = vendor_id  

        restaurant_name_tr = restaurant.get("restaurant_name", "")
        print(f"\n[{i}/{len(records)}] {restaurant_name_tr} ({restaurant_slug})")

        next_page = None

        while True:
            url = (
                f"https://reviews-api-tr.fd-api.com/reviews/vendor/{vendor_id}"
                f"?global_entity_id=YS_TR&limit=50&rating=asc&has_dish=true"
            )

            if next_page:
                url += f"&nextPageKey={urllib.parse.quote(next_page)}"

            r = requests.get(url, timeout=15)
            if r.status_code != 200:
                break

            data = r.json()
            reviews = data.get("data", [])
            if not reviews:
                break

            with open(OUTPUT_CSV, "a", newline="", encoding="utf-8-sig") as f:
                writer = csv.writer(f)

                for rev in reviews:
                    row_review = {
                        "uuid": rev.get("uuid", ""),
                        "reviewer_name": rev.get("reviewerName", ""),
                        "reviewer_id": rev.get("reviewerId", ""),
                        "text": rev.get("text", ""),
                        "created_at": rev.get("createdAt", ""),
                        "like_count": rev.get("likeCount", 0),
                        "replies_count": len(rev.get("replies", [])),
                        "overall_score": "",
                        "restaurant_food_score": "",
                        "rider_score": "",
                        "product_names": ""
                    }

                    for rr in rev.get("ratings", []):
                        topic = rr.get("topic", "").lower()
                        if topic == "overall":
                            row_review["overall_score"] = rr.get("score", "")
                        elif topic == "restaurant_food":
                            row_review["restaurant_food_score"] = rr.get("score", "")
                        elif topic == "rider":
                            row_review["rider_score"] = rr.get("score", "")

                    products = []
                    for p in rev.get("productVariations", []) or []:
                        prod = p.get("product", {})
                        name = prod.get("defaultTitle", "")
                        if name:
                            products.append(name)

                    row_review["product_names"] = " | ".join(products)

                    writer.writerow(
                        [restaurant[col] for col in restaurant_columns] +
                        [restaurant["restaurant_slug"]] +
                        [row_review[col] for col in review_columns]
                    )

            next_page = data.get("nextPageKey")
            if not next_page:
                break

            time.sleep(random.uniform(3, 6))

        print(f"✔ {restaurant_name_tr}")

    except Exception as e:
        print("⚠ Hata:", e)
        time.sleep(5)
        continue

print("✅ TÜM RESTORANLAR TAMAMLANDI!")


[1/2165] Mis Balık Restaurant (mis-balik-restaurant)
✔ Mis Balık Restaurant tamamlandı

[2/2165] İstanbul Çiğ Köfte (istanbul-cig-kofte-p9zl)
✔ İstanbul Çiğ Köfte tamamlandı

[3/2165] Çınar Balık Izgara (cinar-balik-izgara)
✔ Çınar Balık Izgara tamamlandı

[4/2165] Karadeniz Balık Lokantası (karadeniz-balik-lokantasi-lhwv)
✔ Karadeniz Balık Lokantası tamamlandı

[5/2165] Küçükyalı Balıkçısı (kucukyali-balikcisi-tgvo)
✔ Küçükyalı Balıkçısı tamamlandı

[6/2165] Hepsi Meze & Sandviç Küçükyalı (hepsi-meze-and-sandvic-kucukyali)
✔ Hepsi Meze & Sandviç Küçükyalı tamamlandı

[7/2165] Balıkçı Selami (balikci-selami-znjl)
✔ Balıkçı Selami tamamlandı

[8/2165] Mavi Kıyı Balıkçısı (mavi-kiyi-balikcisi)
✔ Mavi Kıyı Balıkçısı tamamlandı

[9/2165] Urla Balıkçısı (urla-balikcisi-t360)
✔ Urla Balıkçısı tamamlandı

[10/2165] İstanbul Midye (istanbul-midye-nnt1)
✔ İstanbul Midye tamamlandı

[11/2165] Maltepe Şırdancısı (maltepe-sirdancisi)
✔ Maltepe Şırdancısı tamamlandı

[12/2165] Es Balık

In [None]:
# New → Back
import pandas as pd
import requests
import csv
import time
import random
import os
import urllib.parse

INPUT_CSV = "../data/raw/links/restaurant_links.csv"
OUTPUT_CSV = "../data/raw/reviews/new_to_old_comments.csv"

df = pd.read_csv(INPUT_CSV)

# Restaurant-level columns
restaurant_columns = df.columns.tolist()

# The new column we will derive from the link
restaurant_columns_extended = restaurant_columns + ["restaurant_slug"]

# Review-level columns
review_columns = [
    "uuid", "reviewer_name", "reviewer_id",
    "overall_score", "restaurant_food_score", "rider_score",
    "text", "created_at", "like_count", "replies_count", "product_names"
]

# Output header
if not os.path.exists(OUTPUT_CSV):
    with open(OUTPUT_CSV, "w", newline="", encoding="utf-8-sig") as f:
        writer = csv.writer(f)
        writer.writerow(restaurant_columns_extended + review_columns)

records = df.to_dict(orient="records")

for i, restaurant in enumerate(records, 1):
    link = restaurant["link"]

    try:
        # vendor_id & slug
        parts = link.strip("/").split("/")
        if "restaurant" in parts:
            idx = parts.index("restaurant")
            vendor_id = parts[idx + 1]
            restaurant_slug = parts[idx + 2]
        else:
            vendor_id = parts[-2]
            restaurant_slug = parts[-1]

        # The Turkish name in the CSV file is PRESERVED.
        restaurant["vendor_id"] = vendor_id
        restaurant["restaurant_slug"] = restaurant_slug
        restaurant_name_tr = restaurant.get("restaurant_name", "")

        print(f"\n[{i}/{len(records)}] {restaurant_name_tr} ({restaurant_slug})")

        next_page = None
        total_yorum = 0

        while True:
            url = (
                f"https://reviews-api-tr.fd-api.com/reviews/vendor/{vendor_id}"
                f"?global_entity_id=YS_TR&limit=50&created_at=desc&has_dish=true"
            )

            if next_page:
                url += f"&nextPageKey={urllib.parse.quote(next_page)}"

            resp = requests.get(url, timeout=15)
            if resp.status_code != 200:
                print(f"  -> API hatası: {resp.status_code}")
                break

            data = resp.json()
            reviews = data.get("data", [])
            if not reviews:
                break

            with open(OUTPUT_CSV, "a", newline="", encoding="utf-8-sig") as f:
                writer = csv.writer(f)

                for r in reviews:
                    row_review = {
                        "uuid": r.get("uuid", ""),
                        "reviewer_name": r.get("reviewerName", ""),
                        "reviewer_id": r.get("reviewerId", ""),
                        "text": r.get("text", ""),
                        "created_at": r.get("createdAt", ""),
                        "like_count": r.get("likeCount", 0),
                        "replies_count": len(r.get("replies", [])),
                        "overall_score": "",
                        "restaurant_food_score": "",
                        "rider_score": "",
                        "product_names": ""
                    }

                    for rr in r.get("ratings", []):
                        topic = rr.get("topic", "").lower()
                        if topic == "overall":
                            row_review["overall_score"] = rr.get("score", "")
                        elif topic == "restaurant_food":
                            row_review["restaurant_food_score"] = rr.get("score", "")
                        elif topic == "rider":
                            row_review["rider_score"] = rr.get("score", "")

                    products = []
                    for p in r.get("productVariations", []) or []:
                        prod = p.get("product", {})
                        name = prod.get("defaultTitle", "")
                        if name:
                            products.append(name)

                    row_review["product_names"] = " | ".join(products)

                    # --- FINAL MERGED ROW ---
                    writer.writerow(
                        [restaurant[col] for col in restaurant_columns] +
                        [restaurant["restaurant_slug"]] +
                        [row_review[col] for col in review_columns]
                    )

            total_yorum += len(reviews)
            print(f"  → {len(reviews)} yorum eklendi (toplam {total_yorum})")

            next_page = data.get("nextPageKey")
            if not next_page:
                break

            time.sleep(random.uniform(3, 6))

        print(f"✔ {restaurant_name_tr}")

    except Exception as e:
        print("⚠ Hata:", e)
        time.sleep(5)
        continue

print("✅ TÜM RESTORANLAR TAMAMLANDI!")


[1/2165] Mis Balık Restaurant (mis-balik-restaurant)
  → 50 yorum eklendi (toplam 50)
✔ Mis Balık Restaurant tamamlandı

[2/2165] İstanbul Çiğ Köfte (istanbul-cig-kofte-p9zl)
  → 11 yorum eklendi (toplam 11)
✔ İstanbul Çiğ Köfte tamamlandı

[3/2165] Çınar Balık Izgara (cinar-balik-izgara)
  → 16 yorum eklendi (toplam 16)
✔ Çınar Balık Izgara tamamlandı

[4/2165] Karadeniz Balık Lokantası (karadeniz-balik-lokantasi-lhwv)
  → 4 yorum eklendi (toplam 4)
✔ Karadeniz Balık Lokantası tamamlandı

[5/2165] Küçükyalı Balıkçısı (kucukyali-balikcisi-tgvo)
  → 50 yorum eklendi (toplam 50)
✔ Küçükyalı Balıkçısı tamamlandı

[6/2165] Hepsi Meze & Sandviç Küçükyalı (hepsi-meze-and-sandvic-kucukyali)
  → 50 yorum eklendi (toplam 50)
✔ Hepsi Meze & Sandviç Küçükyalı tamamlandı

[7/2165] Balıkçı Selami (balikci-selami-znjl)
  → 10 yorum eklendi (toplam 10)
✔ Balıkçı Selami tamamlandı

[8/2165] Mavi Kıyı Balıkçısı (mavi-kiyi-balikcisi)
  → 20 yorum eklendi (toplam 20)
✔ Mavi Kıyı Balıkçısı tama

dataset limitation

In [7]:
import pandas as pd

csv_files = [
    "../data/raw/reviews/comments_from_low_to_high_ratings.csv",
    "../data/raw/reviews/new_to_old_comments.csv",
]

dfs = []
for file in csv_files:
    df = pd.read_csv(file)
    dfs.append(df)

merged = pd.concat(dfs, ignore_index=True)

merged.to_csv("../data/raw/reviews/restaurants_reviews_raw.csv", index=False, encoding="utf-8")

print("✅ restaurants_reviews_raw.csv")


✅ restaurants_reviews_raw.csv
