In [1]:
import openmeteo_requests
import pandas as pd
import requests_cache
from retry_requests import retry

In [2]:
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
	"latitude": 52.52,
	"longitude": 13.41,
	"start_date": "2023-01-01",
	"end_date": "2025-10-15",
	"hourly": ["temperature_2m", "relative_humidity_2m", "dew_point_2m", "apparent_temperature", "precipitation", "wind_speed_10m", "cloud_cover", "pressure_msl"],
	"timezone": "Europe/Berlin",
}
responses = openmeteo.weather_api(url, params=params)

# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates: {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation: {response.Elevation()} m asl")
print(f"Timezone: {response.Timezone()}{response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0: {response.UtcOffsetSeconds()}s")

# Process hourly data. The order of variables needs to be the same as requested.
hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
hourly_relative_humidity_2m = hourly.Variables(1).ValuesAsNumpy()
hourly_dew_point_2m = hourly.Variables(2).ValuesAsNumpy()
hourly_apparent_temperature = hourly.Variables(3).ValuesAsNumpy()
hourly_precipitation = hourly.Variables(4).ValuesAsNumpy()
hourly_wind_speed_10m = hourly.Variables(5).ValuesAsNumpy()
hourly_cloud_cover = hourly.Variables(6).ValuesAsNumpy()
hourly_pressure_msl = hourly.Variables(7).ValuesAsNumpy()

hourly_data = {"date": pd.date_range(
	start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
	end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = hourly.Interval()),
	inclusive = "left"
)}

hourly_data["temperature_2m"] = hourly_temperature_2m
hourly_data["relative_humidity_2m"] = hourly_relative_humidity_2m
hourly_data["dew_point_2m"] = hourly_dew_point_2m
hourly_data["apparent_temperature"] = hourly_apparent_temperature
hourly_data["precipitation"] = hourly_precipitation
hourly_data["wind_speed_10m"] = hourly_wind_speed_10m
hourly_data["cloud_cover"] = hourly_cloud_cover
hourly_data["pressure_msl"] = hourly_pressure_msl

hourly_dataframe = pd.DataFrame(data = hourly_data)
print("\nHourly data\n", hourly_dataframe)

Coordinates: 52.5483283996582°N 13.407821655273438°E
Elevation: 38.0 m asl
Timezone: b'Europe/Berlin'b'GMT+1'
Timezone difference to GMT+0: 3600s

Hourly data
                            date  temperature_2m  relative_humidity_2m  \
0     2022-12-31 23:00:00+00:00         14.5585             63.924866   
1     2023-01-01 00:00:00+00:00         15.3085             58.668945   
2     2023-01-01 01:00:00+00:00         15.7085             54.692139   
3     2023-01-01 02:00:00+00:00         15.7585             52.490711   
4     2023-01-01 03:00:00+00:00         15.8585             50.558018   
...                         ...             ...                   ...   
24451 2025-10-15 18:00:00+00:00         11.7585             86.669823   
24452 2025-10-15 19:00:00+00:00         11.6085             87.826500   
24453 2025-10-15 20:00:00+00:00         11.5585             87.234390   
24454 2025-10-15 21:00:00+00:00         11.5085             87.229691   
24455 2025-10-15 22:00:00+00:00      

In [None]:
variables = ["temperature_2m", "relative_humidity_2m", "dew_point_2m", "apparent_temperature", "precipitation", "wind_speed_10m", "cloud_cover", "pressure_msl"]
tmp = {"date": pd.date_range(
	start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
	end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = hourly.Interval()),
	inclusive = "left"
)}
for idx, var in enumerate(variables):
    tmp[var] = hourly.Variables(idx).ValuesAsNumpy()

pd.DataFrame(tmp)

Unnamed: 0,date,temperature_2m,relative_humidity_2m,dew_point_2m,apparent_temperature,precipitation,wind_speed_10m,cloud_cover,pressure_msl
0,2022-12-31 23:00:00+00:00,14.5585,63.924866,7.808500,9.859200,0.0,28.312117,100.0,1011.000000
1,2023-01-01 00:00:00+00:00,15.3085,58.668945,7.258500,10.149809,0.0,30.532158,100.0,1010.799988
2,2023-01-01 01:00:00+00:00,15.7085,54.692139,6.608500,10.370908,0.0,30.699368,100.0,1010.900024
3,2023-01-01 02:00:00+00:00,15.7585,52.490711,6.058500,10.213141,0.0,31.259941,100.0,1011.299988
4,2023-01-01 03:00:00+00:00,15.8585,50.558018,5.608500,10.631021,0.0,28.394392,100.0,1011.299988
...,...,...,...,...,...,...,...,...,...
24451,2025-10-15 18:00:00+00:00,11.7585,86.669823,9.608500,10.796570,0.0,5.950899,100.0,1024.099976
24452,2025-10-15 19:00:00+00:00,11.6085,87.826500,9.658501,10.686743,0.0,5.771239,100.0,1023.900024
24453,2025-10-15 20:00:00+00:00,11.5585,87.234390,9.508500,10.426756,0.0,6.924738,100.0,1023.599976
24454,2025-10-15 21:00:00+00:00,11.5085,87.229691,9.458500,10.183384,0.0,8.155807,100.0,1023.500000


In [None]:
hourly_dataframe.to_csv("./raw_data/weather_data_20230101-20251015.csv")

In [3]:
from statsmodels.tsa.seasonal import seasonal_decompose
hourly_dataframe.set_index("date").asfreq("h")
results = seasonal_decompose(hourly_dataframe[['temperature_2m']], model="additive")
results.plot()

ValueError: You must specify a period or x must be a pandas object with a PeriodIndex or a DatetimeIndex with a freq not set to None

In [None]:
!pip install geopy

In [13]:
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="my_geocoder")
location = geolocator.geocode("asdasd")
print(location.latitude, location.longitude)

AttributeError: 'NoneType' object has no attribute 'latitude'

In [25]:
a = hourly.Variables(0).ValuesAsNumpy()
b = hourly.Variables(1).ValuesAsNumpy()


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,24446,24447,24448,24449,24450,24451,24452,24453,24454,24455
0,14.5585,15.3085,15.7085,15.7585,15.8585,15.8085,14.6085,14.4585,14.4585,14.6085,...,13.5085,13.7085,13.3585,12.408501,11.908501,11.7585,11.6085,11.5585,11.5085,11.3085
1,63.924866,58.668945,54.692139,52.490711,50.558018,49.503017,54.973766,56.282368,56.672729,56.906807,...,72.980606,68.700912,73.200577,79.487122,82.97953,86.669823,87.8265,87.23439,87.229691,88.689003


# test of functions

In [52]:
#!pip install geopy
#!pip install timezonefinder

Collecting timezonefinder
  Downloading timezonefinder-8.1.0-cp39-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (28.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m28.2/28.2 MB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0mm
Collecting h3>4
  Downloading h3-4.3.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (1.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0mm
Installing collected packages: h3, timezonefinder
Successfully installed h3-4.3.1 timezonefinder-8.1.0

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.2.1[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [59]:
from geopy.geocoders import Nominatim
from timezonefinder import TimezoneFinder
from pathlib import Path

In [54]:
def get_coords_from_location_name(location: str = "Berlin, Germany"):
    '''
    Translates place name like "Berlin, Germany" into (latitude, longitude)
    '''
    geolocator = Nominatim(user_agent="my_geocoder")
    location = geolocator.geocode(location)
    if location is None:
        print("Location could not be resolved. Using 'Berlin, Germany' instead.")
        location = geolocator.geocode("Berlin, Germany")
    return (location.latitude, location.longitude)

def get_timezone_from_coords(latitude, longitude):
    tf = TimezoneFinder()
    tz_name = tf.timezone_at(lat=latitude, lng=longitude)
    return tz_name


In [55]:
lat, lon = get_coords_from_location_name("Befghxfrmany")
lat, lon

Location could not be resolved. Using 'Berlin, Germany' instead.


(52.5173885, 13.3951309)

In [None]:
def get_data_with_cache(start_date: str = "2023-01-01",
                        end_date: str = "2025-10-15",
                        location: str = "Berlin, Germany"):
    """
    Retrieve data through the Open-Meteo API (), or from local file, if the file exists
    Stores data at raw_data/weatherdata_startdate_enddate_location.csv if retrieved from API for future use
    Returns a pandas.DataFrame
    """

    latitude, longitude = get_coords_from_location_name(location)
    timezone = get_timezone_from_coords(latitude, longitude)

    # TODO: load variables from parameters file instead
    variables = ["temperature_2m",
                 "relative_humidity_2m",
                 "dew_point_2m",
                 "apparent_temperature",
                 "precipitation",
                 "wind_speed_10m",
                 "cloud_cover",
                 "pressure_msl"]

    # TODO: get this from parameters file instead
    LOCAL_DATA_PATH = "/home/stefanas/code/pawarsp/what-to-wear-today"
    tmp_location = location.replace(", ", "_").lower()
    tmp_start_date = start_date.replace("-", "")
    tmp_end_date = end_date.replace("-", "")
    filepath = Path(LOCAL_DATA_PATH).joinpath("raw_data", f'{tmp_location}_{tmp_start_date}_{tmp_end_date}.csv')

    if filepath.is_file():
        print("Load data from local CSV.")
        df = pd.read_csv(filepath, header="infer")

    else:
        print("Load data from Open-Meteo server.")

        # Setup the Open-Meteo API client with cache and retry on error
        cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
        retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
        openmeteo = openmeteo_requests.Client(session = retry_session)

        url = "https://archive-api.open-meteo.com/v1/archive"
        params = {
            "latitude": latitude,
            "longitude": longitude,
            "start_date": start_date,
            "end_date": end_date,
            "hourly": variables,
            "timezone": timezone,
        }
        responses = openmeteo.weather_api(url, params=params)
        response = responses[0]
        hourly = response.Hourly()
        data = {"date": pd.date_range(
            start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
            end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
            freq = pd.Timedelta(seconds = hourly.Interval()),
            inclusive = "left"
        )}

        for idx, var in enumerate(params):
            data[var] = hourly.Variables(idx).ValuesAsNumpy()

        df = pd.DataFrame(data)

        # save data to csv
        if df_shape[0] > 1:
            df.to_csv(filepath, index=False)
    return df

In [57]:
start_date = "2023-01-01"
end_date = "2025-10-15"
location = "Berlin, Germany"

In [63]:
get_data_with_cache(start_date, end_date, location)

AttributeError: 'str' object has no attribute 'is_file'

In [None]:
filename = f'raw_data/weatherdata{start_date.replace("-", "")}_{end_date.replace("-", "")}_{location.replace(", ", "_").lower()}.csv'
filename

'raw_data/weatherdata20230101_20251015_berlin_germany.csv'