In [6]:
import pandas as pd

url = "https://www.ogimet.com/cgi-bin/gsynres?ind=98430&decoded=yes&ndays=2&ano=2025&mes=07&day=01&hora=00"

# Read table with the first row as header
df = pd.read_html(url, header=0)[0]

# Check what columns we got
print("Columns:", df.columns.tolist())
print(df.head())

# Parse datetime (Fecha + Hora combined into one column here)
df["datetime"] = pd.to_datetime(df["Fecha"], dayfirst=True, errors="coerce")

# Keep only wind columns
wind_df = df[["datetime", "ddd", "ff kmh"]].copy()
wind_df.rename(columns={"ddd": "wind_dir", "ff kmh": "wind_speed"}, inplace=True)

print(wind_df.head())

Columns: ['Fecha', 'Fecha.1', 'T (C)', 'Td (C)', 'Hr %', 'Tmax (C)', 'Tmin (C)', 'ddd', 'ff kmh', 'P0 hPa', 'P mar hPa', 'P Tnd', 'Prec (mm)', 'N t', 'N h', 'H Km', 'Inso D-1', 'Vis km', 'WW', 'W1', 'W2']
        Fecha Fecha.1 T (C)  Td (C)  Hr % Tmax (C) Tmin (C)  ddd  ff kmh  \
0  01/07/2025   00:00  28.2    23.7  77.0    -----     24.1    E     3.6   
1  30/06/2025   21:00  24.7    23.9  95.0    -----    -----    E     3.6   
2  30/06/2025   18:00  25.0    24.2  95.0    -----    -----  CAL     0.0   
3  30/06/2025   15:00  25.5    24.6  95.0    -----    -----  CAL     0.0   
4  30/06/2025   12:00  26.0    24.8  93.0     31.3    -----  SSW     3.6   

   P0 hPa  ...  P Tnd  Prec (mm)  N t  N h  H Km  Inso D-1 Vis km  WW  W1  W2  
0  1003.4  ...    0.6   11.0/24h  5.0  1.0   0.6       3.6    8.0 NaN NaN NaN  
1  1002.8  ...   -0.0       ----  5.0  1.0   0.6       ---    7.0 NaN NaN NaN  
2  1002.8  ...   -0.8     0.0/6h  7.0  2.0   0.6       ---    7.0 NaN NaN NaN  
3  1003.6  ...    

In [9]:
import pandas as pd
from datetime import datetime, timedelta
import time

# Mapping compass directions to degrees
def wind_dir_to_degrees(direction):
    mapping = {
        "N": 0, "NNE": 22.5, "NE": 45, "ENE": 67.5,
        "E": 90, "ESE": 112.5, "SE": 135, "SSE": 157.5,
        "S": 180, "SSW": 202.5, "SW": 225, "WSW": 247.5,
        "W": 270, "WNW": 292.5, "NW": 315, "NNW": 337.5,
        "CAL": None
    }
    return mapping.get(direction, None)

def get_ogimet_wind(station="98430", start="2025-01-01", ndays=1, hour=0):
    """Scrape hourly wind data from Ogimet for given start + ndays."""
    y, m, d = start.split("-")
    url = (
        f"https://www.ogimet.com/cgi-bin/gsynres?"
        f"ind={station}&decoded=yes&ndays={ndays}&ano={y}&mes={m}&day={d}&hora={hour:02d}"
    )
    try:
        df = pd.read_html(url, header=0)[0]
    except Exception as e:
        print(f"❌ Failed for {start}: {e}")
        return pd.DataFrame()

    # Merge date + time
    df["datetime"] = pd.to_datetime(
        df["Fecha"] + " " + df["Fecha.1"],
        format="%d/%m/%Y %H:%M",
        dayfirst=True,
        errors="coerce"
    )

    wind_df = df[["datetime", "ddd", "ff kmh"]].copy()
    wind_df.rename(columns={"ddd": "wind_dir", "ff kmh": "wind_speed"}, inplace=True)
    wind_df["wind_dir_deg"] = wind_df["wind_dir"].apply(wind_dir_to_degrees)

    return wind_df.dropna(subset=["datetime"])

# ============================
# Collect data from Jan 1 – Jul 31, 2025
# ============================

station = "98430"
start_date = datetime(2025, 1, 1)
end_date   = datetime(2025, 7, 31)

all_data = []
current = start_date

while current <= end_date:
    chunk = get_ogimet_wind(
        station=station, start=current.strftime("%Y-%m-%d"), ndays=1
    )
    if not chunk.empty:
        all_data.append(chunk)
        print(f"✅ Got {len(chunk)} rows for {current.date()}")
    else:
        print(f"⚠️ No data for {current.date()}")

    current += timedelta(days=1)
    time.sleep(1)  # polite delay so Ogimet doesn’t block

# Merge
winds_full = pd.concat(all_data, ignore_index=True)
winds_full = winds_full.sort_values("datetime").reset_index(drop=True)

print(winds_full.head())
print(winds_full.tail())

# Save to CSV
winds_full.to_csv("winds_science_garden_JanJul2025.csv", index=False)
print("✅ Saved full dataset to winds_science_garden_JanJul2025.csv")

✅ Got 7 rows for 2025-01-01
✅ Got 9 rows for 2025-01-02
✅ Got 9 rows for 2025-01-03
✅ Got 9 rows for 2025-01-04
✅ Got 9 rows for 2025-01-05
✅ Got 9 rows for 2025-01-06
✅ Got 9 rows for 2025-01-07
✅ Got 9 rows for 2025-01-08
✅ Got 9 rows for 2025-01-09
✅ Got 9 rows for 2025-01-10
✅ Got 9 rows for 2025-01-11
✅ Got 9 rows for 2025-01-12
✅ Got 9 rows for 2025-01-13
✅ Got 9 rows for 2025-01-14
✅ Got 9 rows for 2025-01-15
✅ Got 9 rows for 2025-01-16
✅ Got 9 rows for 2025-01-17
✅ Got 9 rows for 2025-01-18
✅ Got 9 rows for 2025-01-19
✅ Got 9 rows for 2025-01-20
✅ Got 9 rows for 2025-01-21
✅ Got 9 rows for 2025-01-22
✅ Got 9 rows for 2025-01-23
✅ Got 9 rows for 2025-01-24
✅ Got 9 rows for 2025-01-25
✅ Got 9 rows for 2025-01-26
✅ Got 9 rows for 2025-01-27
✅ Got 9 rows for 2025-01-28
✅ Got 9 rows for 2025-01-29
✅ Got 9 rows for 2025-01-30
✅ Got 9 rows for 2025-01-31
✅ Got 9 rows for 2025-02-01
✅ Got 9 rows for 2025-02-02
✅ Got 9 rows for 2025-02-03
✅ Got 9 rows for 2025-02-04
✅ Got 9 rows for 202