In [1]:
#!pip install pandas
#!pip install openmeteo-requests
#!pip install requests-cache
#!pip install retry-requests
#!pip install kafka

In [2]:
#imports
import openmeteo_requests
import requests_cache
import pandas as pd
from retry_requests import retry
from kafka import KafkaProducer
import json

In [3]:
#set parameters
source_url = "https://archive-api.open-meteo.com/v1/archive"
kafka_server = 'localhost:9092'
api_params = {
	"latitude": 48.210033,
	"longitude": 16.363449,
	"start_date": "2020-01-01",
	"end_date": "2024-01-11",
	"hourly": ["temperature_2m", "relative_humidity_2m", "wind_speed_10m"]
}

In [8]:
#set up api
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)


# The order of variables in hourly or daily is important to assign them correctly b

#get api data and parse into variables
responses = openmeteo.weather_api(source_url, params=api_params)
response = responses[0]

hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
hourly_relative_humidity_2m = hourly.Variables(1).ValuesAsNumpy()
hourly_wind_speed_10m = hourly.Variables(2).ValuesAsNumpy()
#create dataframe
hourly_data = {"time": pd.date_range(
start = pd.to_datetime(hourly.Time(), unit = "s"),
end = pd.to_datetime(hourly.TimeEnd(), unit = "s"),
freq = pd.Timedelta(seconds = hourly.Interval()),
inclusive = "left"
)}
hourly_data["temperature_2m"] = hourly_temperature_2m
hourly_data["relative_humidity_2m"] = hourly_relative_humidity_2m
hourly_data["wind_speed_10m"] = hourly_wind_speed_10m
hourly_data["time"]=hourly_data["time"].strftime('%Y-%m-%d %H:%M:%S')
hourly_dataframe = pd.DataFrame(data = hourly_data)
hourly_dataframe.head()

Unnamed: 0,time,temperature_2m,relative_humidity_2m,wind_speed_10m
0,2020-01-01 00:00:00,1.7735,79.606735,19.191748
1,2020-01-01 01:00:00,1.9735,78.477814,19.665359
2,2020-01-01 02:00:00,1.8235,78.454346,19.052811
3,2020-01-01 03:00:00,1.7735,77.872047,19.191748
4,2020-01-01 04:00:00,1.3235,78.665314,18.204042


In [9]:
#init producer
producer = KafkaProducer(bootstrap_servers=kafka_server,value_serializer=lambda v: json.dumps(v).encode('utf-8'))

In [10]:
#produce df to kafka row by row
for index, row in hourly_dataframe.iterrows():
    producer.send('weather', row.to_dict())
    producer.flush()

In [7]:
producer.close()

In [1]:
import openmeteo_requests

import requests_cache
import pandas as pd
from retry_requests import retry

# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://api.open-meteo.com/v1/forecast"
params = {
	"latitude": 52.52,
	"longitude": 13.41,
	"hourly": ["temperature_2m", "wind_speed_10m", "relative_humidity_2m", "dew_point_2m", "rain", "snowfall", "snow_depth", "visibility"],
	"wind_speed_unit": "ms"
}
responses = openmeteo.weather_api(url, params=params)

# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()}{response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

# Process hourly data. The order of variables needs to be the same as requested.
hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
hourly_wind_speed_10m = hourly.Variables(1).ValuesAsNumpy()
hourly_relative_humidity_2m = hourly.Variables(2).ValuesAsNumpy()
hourly_dew_point_2m = hourly.Variables(3).ValuesAsNumpy()
hourly_rain = hourly.Variables(4).ValuesAsNumpy()
hourly_snowfall = hourly.Variables(5).ValuesAsNumpy()
hourly_snow_depth = hourly.Variables(6).ValuesAsNumpy()
hourly_visibility = hourly.Variables(7).ValuesAsNumpy()

hourly_data = {"date": pd.date_range(
	start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
	end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = hourly.Interval()),
	inclusive = "left"
)}

hourly_data["temperature_2m"] = hourly_temperature_2m
hourly_data["wind_speed_10m"] = hourly_wind_speed_10m
hourly_data["relative_humidity_2m"] = hourly_relative_humidity_2m
hourly_data["dew_point_2m"] = hourly_dew_point_2m
hourly_data["rain"] = hourly_rain
hourly_data["snowfall"] = hourly_snowfall
hourly_data["snow_depth"] = hourly_snow_depth
hourly_data["visibility"] = hourly_visibility

hourly_dataframe = pd.DataFrame(data = hourly_data)
print(hourly_dataframe)



Coordinates 52.52000045776367°N 13.419998168945312°E
Elevation 38.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
                         date  temperature_2m  wind_speed_10m  \
0   2025-04-07 00:00:00+00:00        4.745500        2.607681   
1   2025-04-07 01:00:00+00:00        4.345500        2.830194   
2   2025-04-07 02:00:00+00:00        4.045500        2.745906   
3   2025-04-07 03:00:00+00:00        4.845500        2.906888   
4   2025-04-07 04:00:00+00:00        4.645500        3.182766   
..                        ...             ...             ...   
163 2025-04-13 19:00:00+00:00       18.769501        0.824621   
164 2025-04-13 20:00:00+00:00       17.819500        0.707107   
165 2025-04-13 21:00:00+00:00       16.969501        0.707107   
166 2025-04-13 22:00:00+00:00       16.369501        0.632456   
167 2025-04-13 23:00:00+00:00       15.869500        0.721110   

     relative_humidity_2m  dew_point_2m  rain  snowfall  snow_depth  \
0                    49