In [1]:
import openmeteo_requests
import requests_cache
import pandas as pd
from retry_requests import retry

# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://historical-forecast-api.open-meteo.com/v1/forecast"
params = {
	"latitude": 45.7537,
	"longitude": 21.2257,
	"start_date": "2018-10-01",
	"end_date": "2024-10-01",
	"hourly": ["temperature_2m", "relative_humidity_2m", "precipitation", "cloud_cover", "wind_speed_10m", "wind_direction_10m", "wind_gusts_10m", "uv_index"],
	"daily": ["temperature_2m_max", "temperature_2m_min", "uv_index_max", "precipitation_sum"],
	"timezone": "auto"
}
responses = openmeteo.weather_api(url, params=params)

# Process location
response = responses[0]
print("Details for Timisoara city from 1 oct 2018 to 1 oct 2024.")
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

# Process hourly data. The order of variables needs to be the same as requested.
hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
hourly_relative_humidity_2m = hourly.Variables(1).ValuesAsNumpy()
hourly_precipitation = hourly.Variables(2).ValuesAsNumpy()
hourly_cloud_cover = hourly.Variables(3).ValuesAsNumpy()
hourly_wind_speed_10m = hourly.Variables(4).ValuesAsNumpy()
hourly_wind_direction_10m = hourly.Variables(5).ValuesAsNumpy()
hourly_wind_gusts_10m = hourly.Variables(6).ValuesAsNumpy()
hourly_uv_index = hourly.Variables(7).ValuesAsNumpy()

hourly_data = {"date": pd.date_range(
	start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
	end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = hourly.Interval()),
	inclusive = "left"
)}
hourly_data["temperature_2m"] = hourly_temperature_2m
hourly_data["relative_humidity_2m"] = hourly_relative_humidity_2m
hourly_data["precipitation"] = hourly_precipitation
hourly_data["cloud_cover"] = hourly_cloud_cover
hourly_data["wind_speed_10m"] = hourly_wind_speed_10m
hourly_data["wind_direction_10m"] = hourly_wind_direction_10m
hourly_data["wind_gusts_10m"] = hourly_wind_gusts_10m
hourly_data["uv_index"] = hourly_uv_index

hourly_dataframe = pd.DataFrame(data = hourly_data)

# Saving it for later after data cleaning
# file_path = r"/workspaces/weather-scraper-analyzer/data/hourly_weather_data.csv"
# hourly_dataframe.to_sql(file_path)

Details for Timisoara city from 1 oct 2018 to 1 oct 2024.
Coordinates 45.75717544555664°N 21.21600341796875°E
Elevation 96.0 m asl
Timezone b'Europe/Bucharest' b'EEST'
Timezone difference to GMT+0 10800 s


In [2]:
# Process daily data. The order of variables needs to be the same as requested.
daily = response.Daily()
daily_temperature_2m_max = daily.Variables(0).ValuesAsNumpy()
daily_temperature_2m_min = daily.Variables(1).ValuesAsNumpy()
daily_uv_index_max = daily.Variables(2).ValuesAsNumpy()
daily_precipitation_sum = daily.Variables(3).ValuesAsNumpy()

daily_data = {"date": pd.date_range(
	start = pd.to_datetime(daily.Time(), unit = "s", utc = True),
	end = pd.to_datetime(daily.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = daily.Interval()),
	inclusive = "left"
)}
daily_data["temperature_2m_max"] = daily_temperature_2m_max
daily_data["temperature_2m_min"] = daily_temperature_2m_min
daily_data["uv_index_max"] = daily_uv_index_max
daily_data["precipitation_sum"] = daily_precipitation_sum

daily_dataframe = pd.DataFrame(data = daily_data)

# Saving it for later after data cleaning
# file_path = r"/workspaces/weather-scraper-analyzer/data/daily_weather_data.csv"
# daily_dataframe.to_sql(file_path)

#### Data cleaning, exploratory data analysis for **Hourly dataframe**

In [3]:
hourly_dataframe.head()

Unnamed: 0,date,temperature_2m,relative_humidity_2m,precipitation,cloud_cover,wind_speed_10m,wind_direction_10m,wind_gusts_10m,uv_index
0,2018-09-30 21:00:00+00:00,,,,,,,,
1,2018-09-30 22:00:00+00:00,,,,,,,,
2,2018-09-30 23:00:00+00:00,,,,,,,,
3,2018-10-01 00:00:00+00:00,,,,,,,,
4,2018-10-01 01:00:00+00:00,,,,,,,,


In [6]:
print(hourly_dataframe.shape)
print(hourly_dataframe.isna().sum())

(52632, 9)
date                        0
temperature_2m          21699
relative_humidity_2m    21699
precipitation           21700
cloud_cover             21700
wind_speed_10m          21699
wind_direction_10m      21699
wind_gusts_10m          21699
uv_index                21700
dtype: int64


In [8]:
h_df = hourly_dataframe.dropna()
h_df.shape

(30932, 9)

#### Data cleaning, exploratory data analysis for **Daily dataframe**

In [4]:
daily_dataframe.head()

Unnamed: 0,date,temperature_2m_max,temperature_2m_min,uv_index_max,precipitation_sum
0,2018-09-30 21:00:00+00:00,,,,
1,2018-10-01 21:00:00+00:00,,,,
2,2018-10-02 21:00:00+00:00,,,,
3,2018-10-03 21:00:00+00:00,,,,
4,2018-10-04 21:00:00+00:00,,,,
