In [None]:
# Install required libraries for BigQuery and HTTP requests
%pip install pandas-gbq google-cloud-bigquery requests

In [None]:
import pandas_gbq
import pandas as pd
import time
import requests
import os
from datetime import datetime, timedelta
from google.cloud import bigquery

In [None]:
# Google Cloud Project
project_id = "adcz-adoki-poc"
os.environ["GOOGLE_CLOUD_PROJECT"] = project_id

print("project_id =", project_id)

### Get data from BigQuery
- using local/Workbanch computing

In [None]:
# Use local computing power
sql = "SELECT property_id, address_latitude, address_longitude FROM demo_real_estate.property WHERE address_latitude > 0 LIMIT 10"
df_property = pandas_gbq.read_gbq(sql, project_id=project_id)

display(df_property)

### Historické počasí (včerejšek) pro GPS souřadnice
Získáme denní metriky (max/min teplota, srážky) z Open-Meteo Archive API pro datum včera (časová zóna Europe/Prague).

In [None]:
# Fetch historical hourly weather for property coordinates using Open-Meteo API
import time
import requests
from datetime import datetime, timedelta
import pandas as pd

# Prepare unique coordinates from the property DataFrame
coords_df = df_property[['address_latitude', 'address_longitude']].dropna().drop_duplicates().rename(columns={
    'address_latitude': 'lat',
    'address_longitude': 'lon'
})

# Define hourly weather parameters to fetch
hourly_params = [
    'temperature_2m',
    'relative_humidity_2m',
    'rain',
    'snowfall',
    'snow_depth',
    'windspeed_10m'
 ]

results = []
BASE_URL = "https://archive-api.open-meteo.com/v1/archive"
target_date = (datetime.now() - timedelta(days=2)).date()

for row in coords_df.itertuples(index=False):
    lat, lon = row.lat, row.lon
    params = {
        'latitude': lat,
        'longitude': lon,
        'start_date': target_date,
        'end_date': target_date,
        'hourly': ','.join(hourly_params),
        'timezone': 'Europe/Prague'
    }
    try:
        r = requests.get(BASE_URL, params=params, timeout=60)
        r.raise_for_status()
        data = r.json()
        hourly = data.get('hourly', {})
        times = hourly.get('time', [])

        for i, ts in enumerate(times):
            try:
                date_part, time_part = ts.split('T')
                hour_part = int(time_part.split(':')[0])
                record = {
                    'lat': lat,
                    'lon': lon,
                    'date': date_part,
                    'hour': hour_part,
                    'temperature': hourly.get('temperature_2m', [None]*len(times))[i],
                    'relative_humidity': hourly.get('relative_humidity_2m', [None]*len(times))[i],
                    'rain': hourly.get('rain', [None]*len(times))[i],
                    'wind_speed': hourly.get('windspeed_10m', [None]*len(times))[i],
                    'snow_depth': None
                }
                # Prefer snow_depth, fallback to snowfall
                snow_depth_val = hourly.get('snow_depth', [None]*len(times))[i]
                snowfall_val = hourly.get('snowfall', [None]*len(times))[i]
                record['snow_depth'] = snow_depth_val if snow_depth_val is not None else snowfall_val

                results.append(record)
            except Exception:
                continue
        time.sleep(0.15)  # Be gentle to the API
    except Exception as e:
        print(f"Error fetching weather for {lat},{lon}: {e}")

weather_df = pd.DataFrame(results)

# Map property_id to coordinates (many properties may share the same coordinates)
prop_coord_map = df_property[['property_id', 'address_latitude', 'address_longitude']].dropna()

# Merge weather data with property IDs
merged = weather_df.merge(
    prop_coord_map,
    left_on=['lat', 'lon'],
    right_on=['address_latitude', 'address_longitude'],
    how='left'
 )

# Select and rename columns for final DataFrame
final_weather_df = merged[['property_id', 'date', 'hour', 'temperature', 'relative_humidity', 'rain', 'snow_depth', 'wind_speed']].copy()

if not final_weather_df.empty:
    final_weather_df['hour'] = final_weather_df['hour'].astype(int)

print(f"Total hourly weather records: {len(final_weather_df)}")
display(final_weather_df.head(30))

### Import do BigQuery
Target tabulka: 'property_weather'
- if does not exist then create one

In [None]:
# Add metadata columns before importing to BigQuery
from datetime import datetime
process_id = "manual"  # Set as needed or load from variable/notebook
now = datetime.utcnow()
final_weather_df["ins_dt"] = now
final_weather_df["ins_process_id"] = process_id
final_weather_df["upd_dt"] = now
final_weather_df["upd_process_id"] = process_id
final_weather_df["del_flag"] = False

# Ensure 'date' column is of type datetime.date
import pandas as pd
if "date" in final_weather_df.columns:
    final_weather_df["date"] = pd.to_datetime(final_weather_df["date"]).dt.date

# Import to BigQuery
from google.cloud import bigquery
project_id = "adcz-adoki-poc"
dataset_id = "demo_real_estate"
table_id = "property_weather"
full_table_id = f"{project_id}.{dataset_id}.{table_id}"

client = bigquery.Client(project=project_id)
job_config = bigquery.LoadJobConfig(write_disposition=bigquery.WriteDisposition.WRITE_APPEND)
client.load_table_from_dataframe(final_weather_df, full_table_id, job_config=job_config).result()
print("Data successfully uploaded to BigQuery.")