In [72]:
import requests_cache
import pandas as pd
from retry_requests import retry
import requests

# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after=3600)
retry_session = retry(cache_session, retries=5, backoff_factor=0.2)

# Function to fetch weather data for a specific station
def fetch_weather_data(station_id, latitude, longitude):
    url = "https://api.open-meteo.com/v1/dwd-icon"
    timezone = "Europe/Berlin"
    params = {
        "latitude": latitude,
        "longitude": longitude,
        "hourly": [
            "temperature_2m",
            "relative_humidity_2m",
            "apparent_temperature",
            "precipitation",
            "cloud_cover",
            "wind_speed_10m",
            "wind_direction_10m",
            "direct_radiation",
            "diffuse_radiation",
            "sunshine_duration"
        ],
        "timezone": timezone
    }
    response = retry_session.get(url, params=params)
    response.raise_for_status()
    data = response.json()
    
    hourly = data['hourly']
    dates = pd.date_range(
        start=pd.to_datetime(hourly['time'][0], utc=False),
        periods=len(hourly['time']),
        freq=pd.Timedelta(hours=1)
    )
    
    
    fetch_timestamp = pd.Timestamp.now(tz=timezone)
    
    hourly_data = pd.DataFrame({
        'timestamp_forecast': dates.tz_localize(tz=timezone),
        'timestamp_fetched': fetch_timestamp.floor("s"),
        'stations_id': station_id,
        'temperature_2m': hourly['temperature_2m'],
        'relative_humidity_2m': hourly['relative_humidity_2m'],
        'apparent_temperature': hourly['apparent_temperature'],
        'precipitation': hourly['precipitation'],
        'cloud_cover': hourly['cloud_cover'],
        'wind_speed_10m': hourly['wind_speed_10m'],
        'wind_direction_10m': hourly['wind_direction_10m'],
        'direct_radiation': hourly['direct_radiation'],
        'diffuse_radiation': hourly['diffuse_radiation'],
        'sunshine_duration': hourly['sunshine_duration']               
    })
    
    return hourly_data

stations_id = ["183", "662"]
stations_latitude = [54.6791, 52.2915]
stations_longitude = [13.4344, 10.4464]

all_data = []

for i in range(len(stations_id)):
    station_data = fetch_weather_data(stations_id[i], stations_latitude[i], stations_longitude[i])
    all_data.append(station_data)

# Combine all data into a single DataFrame
final_data = pd.concat(all_data, ignore_index=True)

# Display the final DataFrame
final_data

Unnamed: 0,timestamp_forecast,timestamp_fetched,stations_id,temperature_2m,relative_humidity_2m,apparent_temperature,precipitation,cloud_cover,wind_speed_10m,wind_direction_10m,direct_radiation,diffuse_radiation,sunshine_duration
0,2024-06-27 00:00:00+02:00,2024-06-27 13:20:32+02:00,183,17.3,87,17.4,0.0,0,11.3,83,0.0,0.0,0.00
1,2024-06-27 01:00:00+02:00,2024-06-27 13:20:32+02:00,183,16.9,89,17.2,0.0,0,9.7,59,0.0,0.0,0.00
2,2024-06-27 02:00:00+02:00,2024-06-27 13:20:32+02:00,183,16.9,92,17.6,0.0,0,8.1,69,0.0,0.0,0.00
3,2024-06-27 03:00:00+02:00,2024-06-27 13:20:32+02:00,183,17.5,91,18.3,0.0,0,8.5,62,0.0,0.0,0.00
4,2024-06-27 04:00:00+02:00,2024-06-27 13:20:32+02:00,183,17.6,91,18.7,0.0,79,7.1,66,0.0,0.0,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...
331,2024-07-03 19:00:00+02:00,2024-06-27 13:20:32+02:00,662,22.1,62,22.5,0.0,92,7.4,241,79.0,107.0,3600.00
332,2024-07-03 20:00:00+02:00,2024-06-27 13:20:32+02:00,662,21.3,68,22.0,0.0,95,6.8,238,26.0,65.0,903.14
333,2024-07-03 21:00:00+02:00,2024-06-27 13:20:32+02:00,662,20.4,73,21.3,0.0,97,6.4,232,8.0,33.0,0.00
334,2024-07-03 22:00:00+02:00,2024-06-27 13:20:32+02:00,662,19.4,77,20.4,0.0,98,5.9,223,2.0,7.0,0.00


In [74]:
import psycopg2
from sqlalchemy import create_engine, DateTime, Float, String, Integer, Column
from dotenv import load_dotenv
import os

# Load login data from .env file
load_dotenv()

DB_NAME = os.getenv('DB_NAME')
DB_USERNAME = os.getenv('DB_USERNAME')
DB_PASSWORD = os.getenv('DB_PASSWORD')
DB_HOST = os.getenv('DB_HOST')
DB_PORT = os.getenv('DB_PORT')

DB_STRING = f'postgresql://{DB_USERNAME}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}'

# Create SQLAlchemy engine
engine = create_engine(DB_STRING)

# Create a new connection using psycopg2 for non-pandas operations
conn = psycopg2.connect(
    database=DB_NAME,
    user=DB_USERNAME,
    password=DB_PASSWORD,
    host=DB_HOST,
    port=DB_PORT
)

try:
    cursor = conn.cursor()
    cursor.execute("SELECT version();")
    record = cursor.fetchone()
    print("You are connected to -", record, "\n")
    
    # Load data from the database using SQLAlchemy engine
    print("Consumption data loading..")
    query_string1 = 'SELECT * FROM "02_silver"."dim_weather_stations"'
    weather_stations = pd.read_sql(query_string1, engine)
    print("Loading finished!")
    
    stations_id = weather_stations.stations_id.to_list    
    latitude = weather_stations.latitude.to_list
    longitude = weather_stations.longitude.to_list

except Exception as error:
    print("Error while connecting to PostgreSQL:", error)
    
finally:
    if conn:
        cursor.close()
        conn.close()
        print("PostgreSQL connection is closed")
            


You are connected to - ('PostgreSQL 16.3 on x86_64-pc-linux-gnu, compiled by gcc (GCC) 7.3.1 20180712 (Red Hat 7.3.1-12), 64-bit',) 

Consumption data loading..
Loading finished!
PostgreSQL connection is closed


<bound method IndexOpsMixin.tolist of 0     54.6791
1     52.2915
2     53.0451
3     50.7913
4     51.1278
5     50.4283
6     48.5451
7     51.1620
8     53.6330
9     47.8010
10    47.6952
11    52.2090
12    53.7123
13    49.5030
14    52.3810
15    54.1803
16    49.2128
17    54.3279
18    54.5280
19    48.8281
20    49.7479
21    48.4024
22    49.7704
23    47.4210
Name: latitude, dtype: float64>

In [80]:
import psycopg2
from sqlalchemy import create_engine, DateTime, Float, String, Integer, Column
from dotenv import load_dotenv
import os

# Load login data from .env file
load_dotenv()

DB_NAME = os.getenv('DB_NAME')
DB_USERNAME = os.getenv('DB_USERNAME')
DB_PASSWORD = os.getenv('DB_PASSWORD')
DB_HOST = os.getenv('DB_HOST')
DB_PORT = os.getenv('DB_PORT')

DB_STRING = f'postgresql://{DB_USERNAME}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}'

# Create SQLAlchemy engine
engine = create_engine(DB_STRING)

# Create a new connection using psycopg2 for non-pandas operations
conn = psycopg2.connect(
    database=DB_NAME,
    user=DB_USERNAME,
    password=DB_PASSWORD,
    host=DB_HOST,
    port=DB_PORT
)

try:
    cursor = conn.cursor()
    cursor.execute("SELECT version();")
    record = cursor.fetchone()
    print("You are connected to -", record, "\n")
    
    # Load data from the database using SQLAlchemy engine
    print("Consumption data loading..")
    query_string1 = 'SELECT * FROM "02_silver"."dim_weather_stations"'
    weather_stations = pd.read_sql(query_string1, engine)
    print("Loading finished!")
    
    stations_id = weather_stations.stations_id.to_list()    
    stations_latitude = weather_stations.latitude.to_list()
    stations_longitude = weather_stations.longitude.to_list()

except Exception as error:
    print("Error while connecting to PostgreSQL:", error)
    
finally:
    if conn:
        cursor.close()
        conn.close()
        print("PostgreSQL connection is closed")

You are connected to - ('PostgreSQL 16.3 on x86_64-pc-linux-gnu, compiled by gcc (GCC) 7.3.1 20180712 (Red Hat 7.3.1-12), 64-bit',) 

Consumption data loading..
Loading finished!
PostgreSQL connection is closed


In [81]:
type(stations_id)

list

In [82]:
import requests_cache
import pandas as pd
from retry_requests import retry
import requests

# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after=3600)
retry_session = retry(cache_session, retries=5, backoff_factor=0.2)

# Function to fetch weather data for a specific station
def fetch_weather_data(station_id, latitude, longitude):
    url = "https://api.open-meteo.com/v1/dwd-icon"
    timezone = "Europe/Berlin"
    params = {
        "latitude": latitude,
        "longitude": longitude,
        "hourly": [
            "temperature_2m",
            "relative_humidity_2m",
            "apparent_temperature",
            "precipitation",
            "cloud_cover",
            "wind_speed_10m",
            "wind_direction_10m",
            "direct_radiation",
            "diffuse_radiation",
            "sunshine_duration"
        ],
        "timezone": timezone
    }
    response = retry_session.get(url, params=params)
    response.raise_for_status()
    data = response.json()
    
    hourly = data['hourly']
    dates = pd.date_range(
        start=pd.to_datetime(hourly['time'][0], utc=False),
        periods=len(hourly['time']),
        freq=pd.Timedelta(hours=1)
    )
    
    
    fetch_timestamp = pd.Timestamp.now(tz=timezone)
    
    hourly_data = pd.DataFrame({
        'timestamp_forecast': dates.tz_localize(tz=timezone),
        'timestamp_fetched': fetch_timestamp.floor("s"),
        'stations_id': station_id,
        'temperature_2m': hourly['temperature_2m'],
        'relative_humidity_2m': hourly['relative_humidity_2m'],
        'apparent_temperature': hourly['apparent_temperature'],
        'precipitation': hourly['precipitation'],
        'cloud_cover': hourly['cloud_cover'],
        'wind_speed_10m': hourly['wind_speed_10m'],
        'wind_direction_10m': hourly['wind_direction_10m'],
        'direct_radiation': hourly['direct_radiation'],
        'diffuse_radiation': hourly['diffuse_radiation'],
        'sunshine_duration': hourly['sunshine_duration']               
    })
    
    return hourly_data

all_data = []

for i in range(len(stations_id)):
    station_data = fetch_weather_data(stations_id[i], stations_latitude[i], stations_longitude[i])
    all_data.append(station_data)

# Combine all data into a single DataFrame
final_weather_data = pd.concat(all_data, ignore_index=True)

final_weather_data.to_sql('fact_weather_forecast', engine, schema='02_silver', if_exists='replace', index=False)

32