In [1]:
import requests
import json
import pandas as pd
import sqlite3

# Base URL for the listings endpoint
base_url = "https://www.athome.lu/portal/api/listings"

# This function will page through listings for a given price range.
def fetch_listings(price_gte, price_lte, page_size=200):
    listings = []
    page = 1
    while True:
        params = {
            "filters[price][gte]": str(price_gte),
            "filters[price][lte]": str(price_lte),
            "pagination[size]": page_size,
            "pagination[page]": page,
            "sort[createdAt]": "desc",
            "site": "athomelu"
        }
        print(f"Fetching page {page} for price range {price_gte}-{price_lte}...")
        response = requests.get(base_url, params=params)
        
        if response.status_code == 422:
            print("Received 422 Unprocessable Entity error; stopping pagination for this price range.")
            break
        elif response.status_code != 200:
            print(f"Error: Received status code {response.status_code} on page {page} for price range {price_gte}-{price_lte}.")
            break

        data = response.json().get("data", [])
        if not data:
            print("No more data returned; stopping pagination for this price range.")
            break

        listings.extend(data)
        # Save a simplified record for each listing
        for listing in data:
            price_results.append({
                "page": page,
                "price": listing.get("price"),
                "price_from": price_gte,
                "price_to": price_lte
            })
        print(f"Retrieved {len(data)} listings on page {page}.")
        page += 1
    return listings

# List to accumulate full listing objects and simplified price records.
all_listings = []
price_results = []

# Build the list of price ranges.
price_ranges = []

# 1. From 0 to 10,000 in steps of 1,000.
for p in range(0, 10000, 1000):
    price_ranges.append((p, p + 1000))

# 2. From 10,000 to 30,000 in steps of 10,000.
for p in range(10000, 30000, 10000):
    price_ranges.append((p, p + 10000))

# 3. From 30,000 to 10,000,000 in steps of 100,000.
for p in range(30000, 10000000, 100000):
    price_ranges.append((p, p + 100000))

print(f"Total price ranges to query: {len(price_ranges)}")

# Iterate over each price range and collect listings.
for (price_gte, price_lte) in price_ranges:
    listings = fetch_listings(price_gte, price_lte, page_size=200)
    if listings:
        all_listings.extend(listings)
    else:
        print(f"No listings found for price range {price_gte}-{price_lte}.")

# Save the raw JSON data for backup.
with open("athome_listings_raw.json", "w", encoding="utf-8") as f:
    json.dump(all_listings, f, indent=4)
print("\nRaw JSON data saved to 'athome_listings_raw.json'.")

# Save the simplified price data to CSV.
df_price = pd.DataFrame(price_results)
df_price.to_csv("price_selection.csv", index=False, encoding="utf-8")
print("Saved the price selection data to 'price_selection.csv'.")

# Normalize (flatten) the full JSON data.
df = pd.json_normalize(all_listings)
print("Data normalized. DataFrame shape:", df.shape)

# Convert complex types (lists, dicts) to JSON strings so SQLite can store them.
def convert_complex(val):
    if isinstance(val, (list, dict)):
        return json.dumps(val)
    return val

df = df.applymap(convert_complex)

# Save the flattened DataFrame to an SQLite database.
db_filename = "athome_listings.db"
conn = sqlite3.connect(db_filename)
df.to_sql("listings", conn, if_exists="replace", index=False)
conn.commit()
conn.close()

print(f"Flattened data saved to SQLite database '{db_filename}' in table 'listings'.")


Total price ranges to query: 112
Fetching page 1 for price range 0-1000...
Retrieved 200 listings on page 1.
Fetching page 2 for price range 0-1000...
Retrieved 200 listings on page 2.
Fetching page 3 for price range 0-1000...
Retrieved 200 listings on page 3.
Fetching page 4 for price range 0-1000...
Retrieved 200 listings on page 4.
Fetching page 5 for price range 0-1000...
Retrieved 200 listings on page 5.
Fetching page 6 for price range 0-1000...
Retrieved 200 listings on page 6.
Fetching page 7 for price range 0-1000...
Retrieved 200 listings on page 7.
Fetching page 8 for price range 0-1000...
Retrieved 200 listings on page 8.
Fetching page 9 for price range 0-1000...
Retrieved 200 listings on page 9.
Fetching page 10 for price range 0-1000...
Retrieved 200 listings on page 10.
Fetching page 11 for price range 0-1000...
Retrieved 200 listings on page 11.
Fetching page 12 for price range 0-1000...
Retrieved 200 listings on page 12.
Fetching page 13 for price range 0-1000...
Retrie

  df = df.applymap(convert_complex)


Flattened data saved to SQLite database 'athome_listings.db' in table 'listings'.


In [1]:
import sqlite3
import pandas as pd

def analyze_database(db_path):
    # Verbindung zur Datenbank herstellen
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()

    # Alle Tabellen in der Datenbank finden
    cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
    tables = cursor.fetchall()

    print("Datenbankstruktur:")
    print("-" * 50)

    for table in tables:
        table_name = table[0]
        print(f"\nTabelle: {table_name}")
        
        # Spalteninformationen für jede Tabelle abrufen
        cursor.execute(f"PRAGMA table_info({table_name})")
        columns = cursor.fetchall()
        
        print("\nSpalten:")
        for col in columns:
            print(f"- {col[1]} ({col[2]})")
        
        # Beispieldaten aus der Tabelle (erste 5 Zeilen)
        try:
            df = pd.read_sql_query(f"SELECT * FROM {table_name} LIMIT 5", conn)
            print("\nBeispieldaten (erste 5 Zeilen):")
            print(df)
        except Exception as e:
            print(f"Fehler beim Lesen der Daten: {e}")
        
        print("-" * 50)

    conn.close()

if __name__ == "__main__":
    db_path = "athome_listings.db"
    try:
        analyze_database(db_path)
    except Exception as e:
        print(f"Fehler beim Öffnen der Datenbank: {e}")


Datenbankstruktur:
--------------------------------------------------

Tabelle: listings

Spalten:
- id (INTEGER)
- externalReference (TEXT)
- type (TEXT)
- typeKey (TEXT)
- typeId (INTEGER)
- group (TEXT)
- groupId (TEXT)
- format (TEXT)
- isNewBuild (INTEGER)
- statusBanner (TEXT)
- createdAt (TEXT)
- updatedAt (TEXT)
- soldPrice (TEXT)
- baselinePrice (TEXT)
- status (TEXT)
- transaction (TEXT)
- publishTo (TEXT)
- price (REAL)
- isPriceOnDemand (INTEGER)
- permalink.de (TEXT)
- permalink.en (TEXT)
- permalink.fr (TEXT)
- address.street (TEXT)
- address.zip (TEXT)
- address.city (TEXT)
- address.district (TEXT)
- address.department (TEXT)
- address.region (TEXT)
- address.suburb (TEXT)
- address.country (TEXT)
- address.geohash (TEXT)
- address.pin.lon (REAL)
- address.pin.lat (REAL)
- address.floor (INTEGER)
- address.display (TEXT)
- contact.id (REAL)
- contact.type (TEXT)
- contact.name (TEXT)
- contact.email (TEXT)
- contact.phone (TEXT)
- contact.mobilePhone (TEXT)
- contact.re

In [None]:
import requests
import json
import pandas as pd
import psycopg2  # PostgreSQL driver
from psycopg2.extras import execute_values


# Base URL for the listings endpoint
base_url = "https://www.athome.lu/portal/api/listings"

# This function will page through listings for a given price range.
def fetch_listings(price_gte, price_lte, page_size=200):
    listings = []
    price_results = []
    page = 1
    while True:
        params = {
            "filters[price][gte]": str(price_gte),
            "filters[price][lte]": str(price_lte),
            "pagination[size]": page_size,
            "pagination[page]": page,
            "sort[createdAt]": "desc",
            "site": "athomelu"
        }
        print(f"Fetching page {page} for price range {price_gte}-{price_lte}...")
        response = requests.get(base_url, params=params)

        if response.status_code == 422:
            print("Received 422 Unprocessable Entity error; stopping pagination for this price range.")
            break
        elif response.status_code != 200:
            print(f"Error: Received status code {response.status_code} on page {page} for price range {price_gte}-{price_lte}.")
            break

        data = response.json().get("data", [])
        if not data:
            print("No more data returned; stopping pagination for this price range.")
            break

        listings.extend(data)

        # Save a simplified record for each listing
        for listing in data:
            price_results.append({
                "page": page,
                "price": listing.get("price"),
                "price_from": price_gte,
                "price_to": price_lte
            })
        
        print(f"Retrieved {len(data)} listings on page {page}.")
        page += 1

    return listings, price_results

# List to accumulate full listing objects and simplified price records.
all_listings = []
price_results = []

# Build the list of price ranges.
price_ranges = []

# 1. From 0 to 10,000 in steps of 1,000.
for p in range(0, 10000, 1000):
    price_ranges.append((p, p + 1000))

# 2. From 10,000 to 30,000 in steps of 10,000.
for p in range(10000, 30000, 10000):
    price_ranges.append((p, p + 10000))

# 3. From 30,000 to 10,000,000 in steps of 100,000.
for p in range(30000, 10000000, 100000):
    price_ranges.append((p, p + 100000))

print(f"Total price ranges to query: {len(price_ranges)}")

# Iterate over each price range and collect listings.
for (price_gte, price_lte) in price_ranges:
    listings, prices = fetch_listings(price_gte, price_lte, page_size=200)
    if listings:
        all_listings.extend(listings)
    if prices:
        price_results.extend(prices)
    else:
        print(f"No listings found for price range {price_gte}-{price_lte}.")

# Save the raw JSON data for backup.
with open("athome_listings_raw.json", "w", encoding="utf-8") as f:
    json.dump(all_listings, f, indent=4)
print("\nRaw JSON data saved to 'athome_listings_raw.json'.")

# Save the simplified price data to CSV.
df_price = pd.DataFrame(price_results)
df_price.to_csv("price_selection.csv", index=False, encoding="utf-8")
print("Saved the price selection data to 'price_selection.csv'.")

# Normalize (flatten) the full JSON data.
df = pd.json_normalize(all_listings)
print("Data normalized. DataFrame shape:", df.shape)

# Convert complex types (lists, dicts) to JSON strings so PostgreSQL can store them.
def convert_complex(val):
    if isinstance(val, (list, dict)):
        return json.dumps(val)
    return val

df = df.applymap(convert_complex)

# Connect to PostgreSQL
conn = psycopg2.connect(POSTGRES_URL)
cur = conn.cursor()

# Create the table if it does not exist
cur.execute("""
CREATE TABLE IF NOT EXISTS listings (
    id SERIAL PRIMARY KEY,
    data JSONB
);
""")

# Insert data into PostgreSQL
data_tuples = [(json.dumps(row),) for row in df.to_dict(orient="records")]

execute_values(cur, "INSERT INTO listings (data) VALUES %s", data_tuples)

conn.commit()
cur.close()
conn.close()

print("Data successfully saved to PostgreSQL on Railway! 🚀")


Total price ranges to query: 112
Fetching page 1 for price range 0-1000...
Retrieved 200 listings on page 1.
Fetching page 2 for price range 0-1000...
Retrieved 200 listings on page 2.
Fetching page 3 for price range 0-1000...
Retrieved 200 listings on page 3.
Fetching page 4 for price range 0-1000...
Retrieved 200 listings on page 4.
Fetching page 5 for price range 0-1000...
Retrieved 200 listings on page 5.
Fetching page 6 for price range 0-1000...
Retrieved 200 listings on page 6.
Fetching page 7 for price range 0-1000...
Retrieved 200 listings on page 7.
Fetching page 8 for price range 0-1000...
Retrieved 200 listings on page 8.
Fetching page 9 for price range 0-1000...


KeyboardInterrupt: 