In [1]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException

In [4]:
# Priprema selenium drivera
options = Options()
#options.add_argument('--headless')  # Ne otvara prozor
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')


# Pravilna inicijalizacija drivera
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=options)

# Funkcija za generisanje linkova
def generate_links(start_year=2012, end_year=2017):
    links = []
    for year in range(start_year, end_year + 1):
        for month in range(1, 13):
            if year == 2012 and month < 12:
                continue
            url = f"https://www.wunderground.com/calendar/ec/latacunga-canton/SELT/date/{year}-{month:02d}"
            links.append((year, month, url))
    return links

# Parsiranje podataka sa stranice
def parse_month(year, month, url):
    driver.get(url)
    
    try:
        # Poku≈°aj da klikne≈° na "Reject all" dugme ako postoji popup za kolaƒçiƒáe
        WebDriverWait(driver, 5).until(
        EC.element_to_be_clickable((By.XPATH, "//button[@title='Reject-all']"))
        ).click()
        print("Kliknuto na 'Reject all' dugme za kolaƒçiƒáe.")
    except (TimeoutException, NoSuchElementException):
        # Ako nema popup ili dugme nije pronaƒëeno, nastavi dalje bez gre≈°ke
        print("Nije pronaƒëeno 'Reject all' dugme, nastavljam sa parsiranjem.")

    # Sada saƒçekaj da se pojave podaci za trenutni mesec
    WebDriverWait(driver, 15).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, "li.calendar-day.current-month.history"))
    )
    time.sleep(5)

    days = driver.find_elements(By.CSS_SELECTOR, "li.calendar-day.current-month.history")
    print(f"Pronaƒëeno dana: {len(days)}")
    data = []
    for day in days:
        try:
            date_element = day.find_element(By.CSS_SELECTOR, ".date")
            date = date_element.text.strip()
            if not date.isdigit():
                # Ako nema validan broj dana, preskoƒçi
                continue

            description_element = day.find_element(By.CSS_SELECTOR, ".phrase")
            description = description_element.text.strip() if description_element else ""

            # Proveri da li spanovi za temperature postoje i imaju tekst
            try:
                high_element = day.find_element(By.CSS_SELECTOR, ".temperature .hi")
                high = high_element.text.replace("¬∞", "").strip() if high_element.text.strip() else None
            except:
                high = None

            try:
                low_element = day.find_element(By.CSS_SELECTOR, ".temperature .low")
                low = low_element.text.replace("¬∞", "").strip() if low_element.text.strip() else None
            except:
                low = None

            try:
                precip_element = day.find_element(By.CSS_SELECTOR, ".precipitation .wu-value")
                precip = precip_element.text.strip() if precip_element.text.strip() else None
            except:
                precip = None

            # Preskoƒçi ako nema minimalno broj dana i bar visoku temperaturu ili opis
            if not date or not high or description == "":
                continue

            print(f"{date} | {description} | High: {high} | Low: {low} | Precip: {precip}")

            data.append({
                "Datum": f"{year}-{month:02d}-{int(date):02d}",
                "Opis": description,
                "MaxTemp": high,
                "MinTemp": low,
                "Padavine (in)": precip
            })
        except Exception as e:
            print(f"Gre≈°ka prilikom parsiranja dana: {e}")
            continue
    return data

# Glavni deo
svi_podaci = []
linkovi = generate_links(2012, 2017)
for year, month, link in linkovi:
    print(f"Obrada: {year}-{month:02d}")
    podaci = parse_month(year, month, link)
    svi_podaci.extend(podaci)

# Snimanje u CSV
df = pd.DataFrame(svi_podaci)
df.to_csv("vremenske_prilike.csv", index=False)
driver.quit()
print("Zavr≈°eno! Podaci su saƒçuvani u vremenske_prilike.csv")

Obrada: 2012-12
Nije pronaƒëeno 'Reject all' dugme, nastavljam sa parsiranjem.
Pronaƒëeno dana: 30
1 | Mostly Cloudy | High: 73 | Low: 39 | Precip: 0
2 | Mostly Cloudy | High: 73 | Low: 43 | Precip: 0
3 | Partly Cloudy | High: 73 | Low: 39 | Precip: 0
4 | Mostly Cloudy | High: 70 | Low: 39 | Precip: 0
5 | Partly Cloudy | High: 73 | Low: 50 | Precip: 0
6 | Mostly Cloudy | High: 75 | Low: 39 | Precip: 0
7 | Mostly Cloudy | High: 72 | Low: 48 | Precip: 0
8 | Mostly Cloudy | High: 64 | Low: 52 | Precip: 0
Obrada: 2013-01


KeyboardInterrupt: 

In [5]:
# Pokretanje browsera (koristi ChromeDriver, proveri da ga ima≈°)
driver = webdriver.Chrome()

# Otvori stranicu
url = "https://www.wunderground.com/history/monthly/rs/belgrade/LYBE/date/2015-12"
driver.get(url)

# Priƒçekaj da se stranica uƒçita
time.sleep(3)

# Poku≈°aj da klikne≈° "Reject all" dugme u bilo kom iframe-u
def try_click_reject_all():
    found = False
    iframes = driver.find_elements(By.TAG_NAME, "iframe")
    for iframe in iframes:
        try:
            driver.switch_to.frame(iframe)
            reject_button = WebDriverWait(driver, 2).until(
                EC.element_to_be_clickable((By.XPATH, "//button[@title='Reject all']"))
            )
            reject_button.click()
            print("‚úÖ Kliknuto na 'Reject all'")
            found = True
            break
        except:
            driver.switch_to.default_content()
    if not found:
        print("‚ö†Ô∏è Dugme 'Reject all' nije pronaƒëeno.")

    # Vrati se na glavni sadr≈æaj
    driver.switch_to.default_content()

# Klikni "Reject all" ako postoji
try_click_reject_all()

# Saƒçekaj da se stranica eventualno osve≈æi
time.sleep(2)

# Parsiranje dana
print("üìÖ Parsirani dani:")
for day in range(1, 32):
    try:
        # Poku≈°aj da pronaƒëe≈° dan po njegovom rednom broju
        day_block = driver.find_element(By.XPATH, f"//lib-city-history-observation//td[contains(text(), '{day}')]/..")

        # Parsiraj podatke iz reda
        cells = day_block.find_elements(By.TAG_NAME, "td")
        if len(cells) >= 4:
            condition = cells[1].text.strip()
            high = cells[2].text.strip()
            low = cells[3].text.strip()
            precip = cells[4].text.strip() if len(cells) > 4 else "N/A"
            print(f"{day} | {condition} | High: {high} | Low: {low} | Precip: {precip}")
        else:
            print(f"{day} | Nedovoljno podataka.")
    except:
        print(f"{day} | ‚ùå Podaci nisu pronaƒëeni ‚Äì preskaƒçem.")
        continue

# Zatvori browser kada zavr≈°i≈°
driver.quit()

‚úÖ Kliknuto na 'Reject all'
üìÖ Parsirani dani:
1 | Nedovoljno podataka.
2 | Nedovoljno podataka.
3 | Nedovoljno podataka.
4 | Nedovoljno podataka.
5 | Nedovoljno podataka.
6 | Nedovoljno podataka.
7 | Nedovoljno podataka.
8 | Nedovoljno podataka.
9 | Nedovoljno podataka.
10 | Nedovoljno podataka.
11 | Nedovoljno podataka.
12 | Nedovoljno podataka.
13 | Nedovoljno podataka.
14 | Nedovoljno podataka.
15 | Nedovoljno podataka.
16 | Nedovoljno podataka.
17 | Nedovoljno podataka.
18 | Nedovoljno podataka.
19 | Nedovoljno podataka.
20 | Nedovoljno podataka.
21 | Nedovoljno podataka.
22 | Nedovoljno podataka.
23 | Nedovoljno podataka.
24 | Nedovoljno podataka.
25 | Nedovoljno podataka.
26 | Nedovoljno podataka.
27 | Nedovoljno podataka.
28 | Nedovoljno podataka.
29 | Nedovoljno podataka.
30 | Nedovoljno podataka.
31 | Nedovoljno podataka.


In [2]:
driver = webdriver.Chrome()
city = 'quito'

# Funkcija za generisanje linkova za Latacunga (SELT)
def generate_links(start_year, end_year):
    links = []
    for year in range(start_year, end_year + 1):
        for month in range(1, 13):
            if year == start_year and month < 12:
                continue  # poƒçinjemo od decembra 2012
            url = f"https://www.wunderground.com/calendar/ec/{city}-canton/SELT/date/{year}-{month:02d}"
            links.append((year, month, url))
    return links

# Glavna funkcija za parsiranje jednog meseca
def parse_month(year, month, url):
    driver.get(url)

    # Poku≈°aj da klikne≈° na "Reject all" u iframe-u
    def try_click_reject_all():
        iframes = driver.find_elements(By.TAG_NAME, "iframe")
        for iframe in iframes:
            try:
                driver.switch_to.frame(iframe)
                reject_button = WebDriverWait(driver, 3).until(
                    EC.element_to_be_clickable((By.XPATH, "//button[@title='Reject all']"))
                )
                reject_button.click()
                print("‚úÖ Kliknuto na 'Reject all'")
                driver.switch_to.default_content()
                return
            except:
                driver.switch_to.default_content()
        print("‚ö†Ô∏è 'Reject all' nije pronaƒëen")

    try_click_reject_all()

    # Saƒçekaj da se uƒçitaju dani
    try:
        WebDriverWait(driver, 15).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "li.calendar-day.current-month.history"))
        )
    except:
        print("‚ö†Ô∏è Dani nisu uƒçitani")
        return []

    time.sleep(5)
    days = driver.find_elements(By.CSS_SELECTOR, "li.calendar-day.current-month.history")
    print(f"üìÖ Pronaƒëeno dana: {len(days)}")

    data = []
    for day in days:
        try:
            date_element = day.find_element(By.CSS_SELECTOR, ".date")
            date = date_element.text.strip()
            if not date.isdigit():
                continue

            # Opis
            try:
                description_element = day.find_element(By.CSS_SELECTOR, ".phrase")
                description = description_element.text.strip()
            except:
                description = ""

            # Maks temperatura
            try:
                high_element = day.find_element(By.CSS_SELECTOR, ".temperature .hi")
                high = high_element.text.replace("¬∞", "").strip()
            except:
                high = None

            # Min temperatura
            try:
                low_element = day.find_element(By.CSS_SELECTOR, ".temperature .low")
                low = low_element.text.replace("¬∞", "").strip()
            except:
                low = None

            # Padavine
            try:
                precip_element = day.find_element(By.CSS_SELECTOR, ".precipitation .wu-value")
                precip = precip_element.text.strip()
            except:
                precip = None

            # Preskoƒçi ako nema kljuƒçne informacije
            if not date or not high or description == "":
                continue

            print(f"{date} | {description} | High: {high} | Low: {low} | Precip: {precip}")
            data.append({
                "Datum": f"{year}-{month:02d}-{int(date):02d}",
                "Opis": description,
                "MaxTemp": high,
                "MinTemp": low,
                "Padavine (in)": precip
            })
        except Exception as e:
            print(f"‚ö†Ô∏è Gre≈°ka pri parsiranju dana: {e}")
            continue

    return data

# Glavni deo
svi_podaci = []
linkovi = generate_links(2012, 2017)

for year, month, link in linkovi:
    print(f"\nüîÑ Obrada meseca: {year}-{month:02d}")
    podaci = parse_month(year, month, link)
    svi_podaci.extend(podaci)

# Zatvaranje drajvera
driver.quit()

pd.DataFrame(svi_podaci).to_csv(f"vreme_{city}.csv", index=False)



üîÑ Obrada meseca: 2012-12
‚ö†Ô∏è 'Reject all' nije pronaƒëen
üìÖ Pronaƒëeno dana: 30
1 | Mostly Cloudy | High: 73 | Low: 39 | Precip: 0
2 | Mostly Cloudy | High: 73 | Low: 43 | Precip: 0
3 | Partly Cloudy | High: 73 | Low: 39 | Precip: 0
4 | Mostly Cloudy | High: 70 | Low: 39 | Precip: 0
5 | Partly Cloudy | High: 73 | Low: 50 | Precip: 0
6 | Mostly Cloudy | High: 75 | Low: 39 | Precip: 0
7 | Mostly Cloudy | High: 72 | Low: 48 | Precip: 0
8 | Mostly Cloudy | High: 64 | Low: 52 | Precip: 0

üîÑ Obrada meseca: 2013-01
‚ö†Ô∏è 'Reject all' nije pronaƒëen
üìÖ Pronaƒëeno dana: 31
1 | Mostly Cloudy | High: 59 | Low: 50 | Precip: 0
2 | Mostly Cloudy | High: 61 | Low: 48 | Precip: 0
3 | Mostly Cloudy | High: 63 | Low: 50 | Precip: 0
4 | Partly Cloudy | High: 66 | Low: 50 | Precip: 0
5 | Mostly Cloudy | High: 66 | Low: 52 | Precip: 0
6 | Mostly Cloudy | High: 66 | Low: 50 | Precip: 0
7 | Mostly Cloudy | High: 68 | Low: 52 | Precip: 0
8 | Mostly Cloudy | High: 66 | Low: 52 | Precip: 0
9 | Mo


KeyboardInterrupt

