### Notebook purpose
- Connect to sql database and get table of coordinates
- Connect to open-meteo API
- Extract historical weather data for defined coordinated
- Convert data into a pandas dataframe object
- Create a table in SQL database (hosted on Microsoft Azure)
- Store historical weather data in SQL DB

In [15]:
# Import required libraries
import numpy as np
import pandas as pd

import openmeteo_requests
import requests_cache
from retry_requests import retry

import overpy
import json
import os
from sqlalchemy import create_engine
import pyodbc
import urllib



In [10]:
# Load database access configuration from config/db_config.json
with open('../config/db_config.json', 'r') as f:
    db_config = json.load(f)

# Access db credentials
server = db_config['server']
database = db_config['database']
db_user = db_config['db_user']
db_password = db_config['db_password']

In [11]:
# Define the connection string
connection_string = f'DRIVER={{ODBC Driver 17 for SQL Server}};SERVER={server};DATABASE={database};UID={db_user};PWD={db_password}'

# Establish connection
params = urllib.parse.quote_plus(connection_string)
engine = create_engine(f'mssql+pyodbc:///?odbc_connect={params}')

In [12]:
# Run test query to check connection and data availability
df_test = pd.read_sql_query('SELECT TOP 5 * FROM wanderwege', con=engine)

print(df_test)

     timestamp_apicall      id                                          name  \
0  2024-09-19 17:03:32   22614  Nationalpark Wanderroute 15 (Munt la Schera)   
1  2024-09-19 17:03:32  103607                                 Wanderwege SG   
2  2024-09-19 17:03:32  112830                Uetliberg - Uetliberg Uto Kulm   
3  2024-09-19 17:03:32  112831                           Folenweid - Baldern   
4  2024-09-19 17:03:32  112833                          Felsenegg - Balderen   

          lat         lon  
0  46.6501430  10.2301992  
1  47.4309774   9.6201700  
2  47.3511680   8.4897796  
3  47.3291235   8.5007261  
4  47.3152439   8.5050559  


In [13]:
# Get trail coordinates from sql table
df_trailcoords = pd.read_sql_query('SELECT * FROM wanderwege', con=engine)

print(df_trailcoords)

         timestamp_apicall        id  \
0      2024-09-19 17:03:32     22614   
1      2024-09-19 17:03:32    103607   
2      2024-09-19 17:03:32    112830   
3      2024-09-19 17:03:32    112831   
4      2024-09-19 17:03:32    112833   
...                    ...       ...   
15191  2024-09-19 17:03:32  18057730   
15192  2024-09-19 17:03:32  18057731   
15193  2024-09-19 17:03:32  18057943   
15194  2024-09-19 17:03:32  18058034   
15195  2024-09-19 17:03:32  18058036   

                                               name         lat         lon  
0      Nationalpark Wanderroute 15 (Munt la Schera)  46.6501430  10.2301992  
1                                     Wanderwege SG  47.4309774   9.6201700  
2                    Uetliberg - Uetliberg Uto Kulm  47.3511680   8.4897796  
3                               Folenweid - Baldern  47.3291235   8.5007261  
4                              Felsenegg - Balderen  47.3152439   8.5050559  
...                                             ...

In [16]:
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

In [89]:
# Define a start and end date of data to fetch from Open-Meteo API
# start_date = "1941-01-01"
start_date = "2022-09-18"
end_date = "2024-09-18"

AttributeError: 'Client' object has no attribute 'close'

In [91]:
latitude = df_trailcoords['lat'].to_list()[:181]
longitude = df_trailcoords['lon'].to_list()[:181]

#latitude = df_trailcoords['lat'].to_list()
#longitude = df_trailcoords['lon'].to_list()
print("Latidute: ",latitude)
print("Longitude: ",longitude)

print("Lenght of latitude-list: ",len(latitude))
print("Lenght of Longitude-list: ",len(longitude))

# 15196 coordinaten, 2 Tage -> Abbruch: HTTPSConnectionPool(host='archive-api.open-meteo.com', port=443): Max retries exceeded with url:
# 1000 coordinaten, 2 Tage -> Abbruch: 414 Client Error: Request-URI Too Large for url:
# 500 coordinaten, 2 Tage -> Abbruch: 414 Client Error: Request-URI Too Large for url:
# 250 coordinaten, 2 Tage -> Abbruch: 414 Client Error: Request-URI Too Large for url:
# 200 coordinaten, 2 Tage -> Abbruch: 414 Client Error: Request-URI Too Large for url:

# 100 coordinaten, 2 Tage -> OK
# 150 coordinaten, 2 Tage -> OK
# 175 coordinaten, 2 Tage -> OK
# 180 coordinaten, 2 Tage -> OK
# 181 coordinaten, 2 Tage -> OK

# 182 coordinaten, 2 Tage -> Abbruch: 414 Client Error: Request-URI Too Large for url:

# 181 coordinaten, 365 Tage -> OK

# 181 coordinaten, 730 Tage -> ?? OpenMeteoRequestsError: {'error': True, 'reason': 'Hourly API request limit exceeded. Please try again in the next hour.'}
# Morgen erneut testen


Latidute:  ['46.6501430', '47.4309774', '47.3511680', '47.3291235', '47.3152439', '47.3164826', '46.6864945', '46.7582930', '47.5352432', '46.8255681', '46.8326800', '46.8356755', '46.8435439', '46.8556668', '46.8634566', '46.8609414', '46.8964957', '47.3265932', '47.3204765', '47.3139131', '47.3096563', '47.3005555', '47.2960901', '47.2915794', '47.2808333', '47.2717847', '47.2671226', '47.3028875', '47.2857101', '47.2712226', '47.2635986', '47.2647956', '47.5490567', '47.3623550', '47.3552026', '47.3489275', '47.2887772', '47.2950460', '47.2852540', '47.2831134', '47.2750342', '47.2776044', '47.2819308', '47.2784240', '47.2686736', '47.2691917', '47.2684960', '47.2646347', '47.2616268', '47.2928383', '47.2889124', '47.2793684', '47.2739260', '47.2715449', '47.2672644', '47.2637302', '47.2678451', '47.2698096', '47.2540752', '47.2706680', '47.2729998', '47.2642278', '47.2828357', '47.2736486', '47.3001847', '47.2585925', '47.2573848', '47.2841676', '47.2892260', '47.2923335', '47.2974

In [92]:
# 
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
	"latitude": latitude,
	"longitude": longitude,
	"start_date": start_date,
	"end_date": end_date,
	"hourly": ["temperature_2m", "relative_humidity_2m", "dew_point_2m", "apparent_temperature", "precipitation", "rain", "snowfall", "snow_depth", "weather_code", "pressure_msl", "surface_pressure", "cloud_cover", "cloud_cover_low", "cloud_cover_mid", "cloud_cover_high", "wind_speed_10m", "wind_gusts_10m", "is_day", "sunshine_duration"],
	"daily": ["weather_code", "temperature_2m_max", "temperature_2m_min", "temperature_2m_mean", "apparent_temperature_max", "apparent_temperature_min", "apparent_temperature_mean", "sunrise", "sunset", "daylight_duration", "sunshine_duration", "precipitation_sum", "rain_sum", "snowfall_sum", "precipitation_hours", "wind_speed_10m_max", "wind_gusts_10m_max"],
	"timezone": "Europe/Berlin"
}
responses = openmeteo.weather_api(url, params=params)

OpenMeteoRequestsError: {'error': True, 'reason': 'Hourly API request limit exceeded. Please try again in the next hour.'}

In [95]:
openmeteo.close() 

AttributeError: 'Client' object has no attribute 'close'

In [None]:
""" 

# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

"""

In [None]:
"""
# Process hourly data. The order of variables needs to be the same as requested.
hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
hourly_relative_humidity_2m = hourly.Variables(1).ValuesAsNumpy()
hourly_dew_point_2m = hourly.Variables(2).ValuesAsNumpy()
hourly_apparent_temperature = hourly.Variables(3).ValuesAsNumpy()
hourly_precipitation = hourly.Variables(4).ValuesAsNumpy()
hourly_rain = hourly.Variables(5).ValuesAsNumpy()
hourly_snowfall = hourly.Variables(6).ValuesAsNumpy()
hourly_snow_depth = hourly.Variables(7).ValuesAsNumpy()
hourly_weather_code = hourly.Variables(8).ValuesAsNumpy()
hourly_pressure_msl = hourly.Variables(9).ValuesAsNumpy()
hourly_surface_pressure = hourly.Variables(10).ValuesAsNumpy()
hourly_cloud_cover = hourly.Variables(11).ValuesAsNumpy()
hourly_cloud_cover_low = hourly.Variables(12).ValuesAsNumpy()
hourly_cloud_cover_mid = hourly.Variables(13).ValuesAsNumpy()
hourly_cloud_cover_high = hourly.Variables(14).ValuesAsNumpy()
hourly_wind_speed_10m = hourly.Variables(15).ValuesAsNumpy()
hourly_wind_gusts_10m = hourly.Variables(16).ValuesAsNumpy()
hourly_is_day = hourly.Variables(17).ValuesAsNumpy()
hourly_sunshine_duration = hourly.Variables(18).ValuesAsNumpy()

hourly_data = {"date": pd.date_range(
	start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
	end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = hourly.Interval()),
	inclusive = "left"
)}

hourly_data["lat"] = response.Latitude()
hourly_data["lon"] = response.Longitude()

hourly_data["temperature_2m"] = hourly_temperature_2m
hourly_data["relative_humidity_2m"] = hourly_relative_humidity_2m
hourly_data["dew_point_2m"] = hourly_dew_point_2m
hourly_data["apparent_temperature"] = hourly_apparent_temperature
hourly_data["precipitation"] = hourly_precipitation
hourly_data["rain"] = hourly_rain
hourly_data["snowfall"] = hourly_snowfall
hourly_data["snow_depth"] = hourly_snow_depth
hourly_data["weather_code"] = hourly_weather_code
hourly_data["pressure_msl"] = hourly_pressure_msl
hourly_data["surface_pressure"] = hourly_surface_pressure
hourly_data["cloud_cover"] = hourly_cloud_cover
hourly_data["cloud_cover_low"] = hourly_cloud_cover_low
hourly_data["cloud_cover_mid"] = hourly_cloud_cover_mid
hourly_data["cloud_cover_high"] = hourly_cloud_cover_high
hourly_data["wind_speed_10m"] = hourly_wind_speed_10m
hourly_data["wind_gusts_10m"] = hourly_wind_gusts_10m
hourly_data["is_day"] = hourly_is_day
hourly_data["sunshine_duration"] = hourly_sunshine_duration

hourly_dataframe = pd.DataFrame(data = hourly_data)
print(hourly_dataframe)

"""

In [None]:
"""
# Process daily data. The order of variables needs to be the same as requested.
daily = response.Daily()
daily_weather_code = daily.Variables(0).ValuesAsNumpy()
daily_temperature_2m_max = daily.Variables(1).ValuesAsNumpy()
daily_temperature_2m_min = daily.Variables(2).ValuesAsNumpy()
daily_temperature_2m_mean = daily.Variables(3).ValuesAsNumpy()
daily_apparent_temperature_max = daily.Variables(4).ValuesAsNumpy()
daily_apparent_temperature_min = daily.Variables(5).ValuesAsNumpy()
daily_apparent_temperature_mean = daily.Variables(6).ValuesAsNumpy()
daily_sunrise = daily.Variables(7).ValuesAsNumpy()
daily_sunset = daily.Variables(8).ValuesAsNumpy()
daily_daylight_duration = daily.Variables(9).ValuesAsNumpy()
daily_sunshine_duration = daily.Variables(10).ValuesAsNumpy()
daily_precipitation_sum = daily.Variables(11).ValuesAsNumpy()
daily_rain_sum = daily.Variables(12).ValuesAsNumpy()
daily_snowfall_sum = daily.Variables(13).ValuesAsNumpy()
daily_precipitation_hours = daily.Variables(14).ValuesAsNumpy()
daily_wind_speed_10m_max = daily.Variables(15).ValuesAsNumpy()
daily_wind_gusts_10m_max = daily.Variables(16).ValuesAsNumpy()

daily_data = {"date": pd.date_range(
	start = pd.to_datetime(daily.Time(), unit = "s", utc = True),
	end = pd.to_datetime(daily.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = daily.Interval()),
	inclusive = "left"
)}

daily_data["lat"] = response.Latitude()
daily_data["lon"] = response.Longitude()

daily_data["weather_code"] = daily_weather_code
daily_data["temperature_2m_max"] = daily_temperature_2m_max
daily_data["temperature_2m_min"] = daily_temperature_2m_min
daily_data["temperature_2m_mean"] = daily_temperature_2m_mean
daily_data["apparent_temperature_max"] = daily_apparent_temperature_max
daily_data["apparent_temperature_min"] = daily_apparent_temperature_min
daily_data["apparent_temperature_mean"] = daily_apparent_temperature_mean
daily_data["sunrise"] = daily_sunrise
daily_data["sunset"] = daily_sunset
daily_data["daylight_duration"] = daily_daylight_duration
daily_data["sunshine_duration"] = daily_sunshine_duration
daily_data["precipitation_sum"] = daily_precipitation_sum
daily_data["rain_sum"] = daily_rain_sum
daily_data["snowfall_sum"] = daily_snowfall_sum
daily_data["precipitation_hours"] = daily_precipitation_hours
daily_data["wind_speed_10m_max"] = daily_wind_speed_10m_max
daily_data["wind_gusts_10m_max"] = daily_wind_gusts_10m_max

daily_dataframe = pd.DataFrame(data = daily_data)
print(daily_dataframe)
"""

In [83]:
# Initialize a list to store data for each location
all_hourly_data = []

In [84]:
# Loop through all responses (for multiple locations)
for response in responses:
    # Process location metadata
    print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
    print(f"Elevation {response.Elevation()} m asl")
    print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
    print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

    # Process hourly data for this location
    hourly = response.Hourly()

    # Extract variables (same order as requested)
    hourly_data = {
        "date": pd.date_range(
            start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
            end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
            freq=pd.Timedelta(seconds=hourly.Interval()),
            inclusive="left"
        ),
        "lat": response.Latitude(),
        "lon": response.Longitude(),
        "temperature_2m": hourly.Variables(0).ValuesAsNumpy(),
        "relative_humidity_2m": hourly.Variables(1).ValuesAsNumpy(),
        "dew_point_2m": hourly.Variables(2).ValuesAsNumpy(),
        "apparent_temperature": hourly.Variables(3).ValuesAsNumpy(),
        "precipitation": hourly.Variables(4).ValuesAsNumpy(),
        "rain": hourly.Variables(5).ValuesAsNumpy(),
        "snowfall": hourly.Variables(6).ValuesAsNumpy(),
        "snow_depth": hourly.Variables(7).ValuesAsNumpy(),
        "weather_code": hourly.Variables(8).ValuesAsNumpy(),
        "pressure_msl": hourly.Variables(9).ValuesAsNumpy(),
        "surface_pressure": hourly.Variables(10).ValuesAsNumpy(),
        "cloud_cover": hourly.Variables(11).ValuesAsNumpy(),
        "cloud_cover_low": hourly.Variables(12).ValuesAsNumpy(),
        "cloud_cover_mid": hourly.Variables(13).ValuesAsNumpy(),
        "cloud_cover_high": hourly.Variables(14).ValuesAsNumpy(),
        "wind_speed_10m": hourly.Variables(15).ValuesAsNumpy(),
        "wind_gusts_10m": hourly.Variables(16).ValuesAsNumpy(),
        "is_day": hourly.Variables(17).ValuesAsNumpy(),
        "sunshine_duration": hourly.Variables(18).ValuesAsNumpy(),
    }

    # Convert to DataFrame and append to list
    all_hourly_data.append(pd.DataFrame(hourly_data))


Coordinates 46.64323043823242°N 10.289855003356934°E
Elevation 2169.0 m asl
Timezone b'Europe/Berlin' b'CEST'
Timezone difference to GMT+0 7200 s
Coordinates 47.41651916503906°N 9.590164184570312°E
Elevation 493.0 m asl
Timezone b'Europe/Berlin' b'CEST'
Timezone difference to GMT+0 7200 s
Coordinates 47.34621810913086°N 8.396072387695312°E
Elevation 861.0 m asl
Timezone b'Europe/Berlin' b'CEST'
Timezone difference to GMT+0 7200 s
Coordinates 47.34621810913086°N 8.396072387695312°E
Elevation 748.0 m asl
Timezone b'Europe/Berlin' b'CEST'
Timezone difference to GMT+0 7200 s
Coordinates 47.34621810913086°N 8.396072387695312°E
Elevation 787.0 m asl
Timezone b'Europe/Berlin' b'CEST'
Timezone difference to GMT+0 7200 s
Coordinates 47.34621810913086°N 8.54337215423584°E
Elevation 498.0 m asl
Timezone b'Europe/Berlin' b'CEST'
Timezone difference to GMT+0 7200 s
Coordinates 46.713531494140625°N 8.564516067504883°E
Elevation 997.0 m asl
Timezone b'Europe/Berlin' b'CEST'
Timezone difference to GMT

In [85]:
# Concatenate all location data into a single DataFrame
hourly_dataframe = pd.concat(all_hourly_data, ignore_index=True)

# Print the resulting DataFrame
print(hourly_dataframe.info())
print(hourly_dataframe)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 39096 entries, 0 to 39095
Data columns (total 22 columns):
 #   Column                Non-Null Count  Dtype              
---  ------                --------------  -----              
 0   date                  39096 non-null  datetime64[ns, UTC]
 1   lat                   39096 non-null  float64            
 2   lon                   39096 non-null  float64            
 3   temperature_2m        35295 non-null  float32            
 4   relative_humidity_2m  35295 non-null  float32            
 5   dew_point_2m          35295 non-null  float32            
 6   apparent_temperature  35295 non-null  float32            
 7   precipitation         35295 non-null  float32            
 8   rain                  35295 non-null  float32            
 9   snowfall              35295 non-null  float32            
 10  snow_depth            17738 non-null  float32            
 11  weather_code          35295 non-null  float32            
 12  pres