In [None]:
# !pip install selenium

Collecting selenium
  Downloading selenium-4.38.0-py3-none-any.whl.metadata (7.5 kB)
Collecting trio<1.0,>=0.31.0 (from selenium)
  Downloading trio-0.32.0-py3-none-any.whl.metadata (8.5 kB)
Collecting trio-websocket<1.0,>=0.12.2 (from selenium)
  Downloading trio_websocket-0.12.2-py3-none-any.whl.metadata (5.1 kB)
Collecting outcome (from trio<1.0,>=0.31.0->selenium)
  Downloading outcome-1.3.0.post0-py2.py3-none-any.whl.metadata (2.6 kB)
Collecting wsproto>=0.14 (from trio-websocket<1.0,>=0.12.2->selenium)
  Downloading wsproto-1.3.1-py3-none-any.whl.metadata (5.2 kB)
Downloading selenium-4.38.0-py3-none-any.whl (9.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.7/9.7 MB[0m [31m26.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading trio-0.32.0-py3-none-any.whl (512 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m512.0/512.0 kB[0m [31m27.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading trio_websocket-0.12.2-py3-none-any.whl (21 kB)
Downloadin

In [None]:
# !pip install webdriver-manager

Collecting webdriver-manager
  Downloading webdriver_manager-4.0.2-py2.py3-none-any.whl.metadata (12 kB)
Downloading webdriver_manager-4.0.2-py2.py3-none-any.whl (27 kB)
Installing collected packages: webdriver-manager
Successfully installed webdriver-manager-4.0.2


In [15]:
"""
Weather.com HTML Parser - Improved Version
Parses the saved HTML file to extract weather data
"""

from bs4 import BeautifulSoup
import json
import csv
import re

def parse_weather_html(html_file):
    """
    Parse weather data from the saved HTML file

    Args:
        html_file: Path to the saved HTML file

    Returns:
        List of dictionaries containing weather data
    """
    with open(html_file, 'r', encoding='utf-8') as f:
        html_content = f.read()

    soup = BeautifulSoup(html_content, 'html.parser')
    weather_data = []

    # Find all date buttons - they have data-id attributes like "calendar-10/26"
    date_buttons = soup.find_all('button', {'data-id': re.compile(r'calendar-\d+/\d+')})

    print(f"Found {len(date_buttons)} date entries\n")

    for button in date_buttons:
        try:
            day_data = {}

            # Extract date from data-id attribute
            data_id = button.get('data-id', '')
            if data_id:
                # Format: "calendar-10/26" -> extract "10/26"
                date_match = re.search(r'calendar-(\d+/\d+)', data_id)
                if date_match:
                    day_data['date'] = date_match.group(1)

            # Extract temperatures
            temp_elements = button.find_all('span', {'data-testid': 'TemperatureValue'})
            if len(temp_elements) >= 2:
                # First temperature is high, second is low
                high_temp = temp_elements[0].get_text(strip=True).replace('°', '')
                low_temp = temp_elements[1].get_text(strip=True).replace('°', '')
                day_data['temp_high'] = high_temp + '°C'
                day_data['temp_low'] = low_temp + '°C'

            # Extract weather icon description from SVG name attribute
            # Skip the first SVG (moon phase) and get the weather icon
            all_svgs = button.find_all('svg')
            weather_icon = None

            for svg in all_svgs:
                icon_name = svg.get('name', '')
                # Skip moon phases (they start with 'phase-')
                if not icon_name.startswith('phase'):
                    weather_icon = icon_name
                    break

            if weather_icon:
                # Convert icon names like "partly-cloudy-day" to readable format
                weather_desc = weather_icon.replace('-', ' ').title()
                day_data['weather'] = weather_desc
            else:
                day_data['weather'] = 'N/A'

            # Only add if we have essential data
            if 'date' in day_data and 'temp_high' in day_data:
                weather_data.append(day_data)
                print(f"Date: {day_data['date']} | High: {day_data['temp_high']} | Low: {day_data.get('temp_low', 'N/A')} | Weather: {day_data.get('weather', 'N/A')}")

        except Exception as e:
            print(f"Error parsing entry: {e}")
            continue

    return weather_data

def save_to_json(data, filename='timisoara_weather.json'):
    """Save data to JSON file"""
    with open(filename, 'w', encoding='utf-8') as f:
        json.dump(data, f, indent=2, ensure_ascii=False)
    print(f"\n✓ Data saved to {filename}")

def save_to_csv(data, filename='timisoara_weather.csv'):
    """Save data to CSV file"""
    if not data:
        return

    with open(filename, 'w', newline='', encoding='utf-8') as f:
        fieldnames = ['date', 'temp_high', 'temp_low', 'weather']
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(data)

    print(f"✓ Data saved to {filename}")

if __name__ == "__main__":
    # Path to the uploaded HTML file
    html_file = '/content/Prognoză meteo lunară pentru Timișoara, Timiș 300000 - weather.com.html'

    print("="*70)
    print("Weather.com HTML Parser for Timișoara")
    print("="*70)
    print()

    # Parse the data
    weather_data = parse_weather_html(html_file)

    if weather_data:
        print("\n" + "="*70)
        print(f"Successfully extracted {len(weather_data)} days of weather data!")
        print("="*70)

        # Save to files
        save_to_json(weather_data)
        save_to_csv(weather_data)

        print("\n" + "="*70)
        print("Summary Statistics:")
        print("="*70)

        # Calculate some statistics
        temps_high = [int(d['temp_high'].replace('°C', '')) for d in weather_data if 'temp_high' in d and d['temp_high'] != '--°C']
        temps_low = [int(d['temp_low'].replace('°C', '')) for d in weather_data if 'temp_low' in d and '--' not in d['temp_low']]

        if temps_high:
            print(f"Highest temperature: {max(temps_high)}°C")
            print(f"Lowest temperature: {min(temps_low)}°C")
            print(f"Average high: {sum(temps_high) / len(temps_high):.1f}°C")
            print(f"Average low: {sum(temps_low) / len(temps_low):.1f}°C")
    else:
        print("\n✗ No data was extracted. Please check the HTML file structure.")

Weather.com HTML Parser for Timișoara

Found 42 date entries

Date: 10/26 | High: 14°C | Low: 6°C | Weather: Rain
Date: 10/27 | High: 14°C | Low: 7°C | Weather: Partly Cloudy Day
Date: 10/28 | High: 16°C | Low: 3°C | Weather: Partly Cloudy Day
Date: 10/29 | High: 20°C | Low: 4°C | Weather: Partly Cloudy Day
Date: 10/30 | High: 21°C | Low: 12°C | Weather: Partly Cloudy Day
Date: 10/31 | High: 21°C | Low: 9°C | Weather: Partly Cloudy Day
Date: 11/1 | High: 24°C | Low: 7°C | Weather: Mostly Clear Day
Date: 11/2 | High: 23°C | Low: 9°C | Weather: Partly Cloudy Day
Date: 11/3 | High: 20°C | Low: 5°C | Weather: Scattered Showers Day
Date: 11/4 | High: 15°C | Low: 0°C | Weather: Partly Cloudy Day
Date: 11/5 | High: 15°C | Low: 1°C | Weather: Foggy
Date: 11/6 | High: 15°C | Low: 2°C | Weather: Rain
Date: 11/7 | High: 16°C | Low: 8°C | Weather: Heavy Rain
Date: 11/8 | High: 11°C | Low: 8°C | Weather: Heavy Rain
Date: 11/9 | High: 11°C | Low: 7°C | Weather: Rain
Date: 11/10 | High: 10°C | Low: 3