In [None]:
import json
import time

import pandas as pd
import requests
import unidecode
from bs4 import BeautifulSoup

In [None]:
# Get database credentials

f = open("database.txt", "r")
lines = f.readlines()

username = lines[0].replace("\n", "")
password = lines[1].replace("\n", "")
host = lines[2].replace("\n", "")
port = lines[3].replace("\n", "")
table = lines[4].replace("\n", "")
f.close()

# print(
#     f"username: {username}, password: {password}, host: {host}, port: {port}, table: {table}"
# )

In [None]:
offer_url_1 = "https://www.otodom.pl/pl/oferta/2-pok-55-m2-z-widokiem-na-ogrod-krasinskich-ID4lszi"
offer_url_2 = (
    "https://www.otodom.pl/pl/oferta/widokowe-3-pokoje-przy-metrze-wawrzyszew-ID4lxZ0"
)
listing_url_1 = "https://www.otodom.pl/pl/oferty/sprzedaz/mieszkanie/warszawa?distanceRadius=0&locations=%5Bcities_6-26%5D&viewType=listing"

In [None]:
# Pobieranie parametrów oferty
def get_offer_params(offer_url):
    """
    Docstring
    """
    r = requests.get(offer_url)
    soup = BeautifulSoup(r.content, "html.parser")

    offer_params = soup.find_all("div", {"class": "css-1wi2w6s enb64yk4"})

    results = dict()

    price = soup.find("strong", {"aria-label": "Cena"})
    results["price"] = unidecode.unidecode(price.text)

    price_m2 = soup.find("div", {"aria-label": "Cena za metr kwadratowy"})
    results["price_m2"] = unidecode.unidecode(price_m2.text)

    address = soup.find("a", {"aria-label": "Adres"})
    results["address"] = unidecode.unidecode(address.text)

    for line in offer_params:
        k = unidecode.unidecode(
            str(line.parent.parent["aria-label"])
            .replace(" / ", "_")
            .replace(" ", "_")
            .lower()
        )
        v = unidecode.unidecode(str(line.text).strip())
        results[k] = v

    return results

In [None]:
# Pobieranie linków z listingu
def get_offers_urls(listing_url):
    """
    Docstring
    """

    r = requests.get(listing_url)
    soup = BeautifulSoup(r.content, "html.parser")

    urls_json = soup.find("script", {"id": "__NEXT_DATA__"}).get_text()

    json_dict = json.loads(urls_json)
    offers = json_dict["props"]["pageProps"]["schemaMarkupData"]["@graph"][2]["offers"][
        "offers"
    ]

    offers_list = []
    for offer in offers:
        offers_list.append({"name": offer["name"], "url": offer["url"]})

    return offers_list

In [None]:
# Pobieranie ofert z linków z listingu
def create_offers_table(offers_list):
    """
    Docstring
    """
    results = list()
    for offer in offers_list:
        enriched_offer = dict()

        enriched_offer["offer_name"] = offer["name"]
        enriched_offer["offer_url"] = offer["url"]

        enriched_offer = {**enriched_offer, **get_offer_params(offer["url"])}
        results.append(enriched_offer)

    return results

In [None]:
offers_listing = create_offers_table(get_offers_urls(listing_url_1))

In [None]:
df = pd.DataFrame(offers_listing)

In [None]:
import datetime

In [None]:
df.insert(loc=0, column="create_timestamp", value=datetime.datetime.now())

In [None]:
df

In [None]:
from sqlalchemy import create_engine

engine = create_engine(f"postgresql://{username}:{password}@{host}:{port}/{table}")
df.to_sql("otodom_offers_1", engine, if_exists="append", index=False)

In [None]:
df.to_csv(f"results_{int(time.time())}.csv", index=False)