## Fetching data from [Open-Meteo Historical Weather API ](https://open-meteo.com/en/docs/historical-weather-api). 


In [34]:
import openmeteo_requests
import requests_cache
import pandas as pd
from retry_requests import retry

# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
	"latitude": 45.7537,
	"longitude": 21.2257,
	"start_date": "2000-01-01",
	"end_date": "2024-10-23",
	"hourly": ["weather_code", "temperature_2m", "relative_humidity_2m", "precipitation", "wind_speed_10m", "wind_direction_10m", "wind_gusts_10m"],
	"daily": ["weather_code", "temperature_2m_max", "temperature_2m_min", "temperature_2m_mean", "precipitation_sum", "wind_speed_10m_max", "wind_gusts_10m_max", "wind_direction_10m_dominant"],
	"timezone": "auto"
}
responses = openmeteo.weather_api(url, params=params)

# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print("Details about Timisoara weather (hourly and daily) between 2000-2024")
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

# Process hourly data. The order of variables needs to be the same as requested.
hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
hourly_relative_humidity_2m = hourly.Variables(1).ValuesAsNumpy()
hourly_precipitation = hourly.Variables(2).ValuesAsNumpy()
hourly_weather_code = hourly.Variables(3).ValuesAsNumpy()
hourly_wind_speed_10m = hourly.Variables(4).ValuesAsNumpy()
hourly_wind_direction_10m = hourly.Variables(5).ValuesAsNumpy()
hourly_wind_gusts_10m = hourly.Variables(6).ValuesAsNumpy()

hourly_data = {"date": pd.date_range(
	start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
	end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = hourly.Interval()),
	inclusive = "left"
)}
hourly_data["temperature_2m"] = hourly_temperature_2m
hourly_data["relative_humidity_2m"] = hourly_relative_humidity_2m
hourly_data["precipitation"] = hourly_precipitation
hourly_data["weather_code"] = hourly_weather_code
hourly_data["wind_speed_10m"] = hourly_wind_speed_10m
hourly_data["wind_direction_10m"] = hourly_wind_direction_10m
hourly_data["wind_gusts_10m"] = hourly_wind_gusts_10m

hourly_dataframe = pd.DataFrame(data = hourly_data)

# Process daily data. The order of variables needs to be the same as requested.
daily = response.Daily()
daily_weather_code = daily.Variables(0).ValuesAsNumpy()
daily_temperature_2m_max = daily.Variables(1).ValuesAsNumpy()
daily_temperature_2m_min = daily.Variables(2).ValuesAsNumpy()
daily_temperature_2m_mean = daily.Variables(3).ValuesAsNumpy()
daily_precipitation_sum = daily.Variables(4).ValuesAsNumpy()
daily_wind_speed_10m_max = daily.Variables(5).ValuesAsNumpy()
daily_wind_gusts_10m_max = daily.Variables(6).ValuesAsNumpy()
daily_wind_direction_10m_dominant = daily.Variables(7).ValuesAsNumpy()

daily_data = {"date": pd.date_range(
	start = pd.to_datetime(daily.Time(), unit = "s", utc = True),
	end = pd.to_datetime(daily.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = daily.Interval()),
	inclusive = "left"
)}
daily_data["weather_code"] = daily_weather_code
daily_data["temperature_2m_max"] = daily_temperature_2m_max
daily_data["temperature_2m_min"] = daily_temperature_2m_min
daily_data["temperature_2m_mean"] = daily_temperature_2m_mean
daily_data["precipitation_sum"] = daily_precipitation_sum
daily_data["wind_speed_10m_max"] = daily_wind_speed_10m_max
daily_data["wind_gusts_10m_max"] = daily_wind_gusts_10m_max
daily_data["wind_direction_10m_dominant"] = daily_wind_direction_10m_dominant

daily_dataframe = pd.DataFrame(data = daily_data)
# print df shapes
print("""
      .
      .
      .
""")
print(f"Hourly dataframe shape: {hourly_dataframe.shape}. Null values: \n{hourly_dataframe.isna().sum()}\nDaily dataframe shape: {daily_dataframe.shape}. Null values: \n{daily_dataframe.isna().sum()}")


Details about Timisoara weather (hourly and daily) between 2000-2024
Coordinates 45.729347229003906°N 21.29337501525879°E
Elevation 96.0 m asl
Timezone b'Europe/Bucharest' b'EEST'
Timezone difference to GMT+0 10800 s

      .
      .
      .

Hourly dataframe shape: (217512, 8). Null values: 
date                     0
temperature_2m          44
relative_humidity_2m    44
precipitation           44
weather_code            44
wind_speed_10m          44
wind_direction_10m      44
wind_gusts_10m          44
dtype: int64
Daily dataframe shape: (9063, 9). Null values: 
date                           0
weather_code                   1
temperature_2m_max             1
temperature_2m_min             1
temperature_2m_mean            2
precipitation_sum              2
wind_speed_10m_max             1
wind_gusts_10m_max             1
wind_direction_10m_dominant    2
dtype: int64


#### Data cleaning, exploratory data analysis for **Hourly dataframe**

In [35]:
hourly_dataframe.head()

Unnamed: 0,date,temperature_2m,relative_humidity_2m,precipitation,weather_code,wind_speed_10m,wind_direction_10m,wind_gusts_10m
0,1999-12-31 21:00:00+00:00,3.0,-1.128,86.920624,0.0,13.722565,355.486084,25.199999
1,1999-12-31 22:00:00+00:00,3.0,-1.328,86.577271,0.0,12.287555,354.957642,25.559999
2,1999-12-31 23:00:00+00:00,3.0,-1.778,87.833557,0.0,12.496719,348.366394,24.48
3,2000-01-01 00:00:00+00:00,3.0,-1.978,87.486717,0.0,12.849528,348.690094,23.4
4,2000-01-01 01:00:00+00:00,3.0,-2.128,86.170082,0.0,12.758432,343.610382,24.84


In [37]:
hourly_dataframe.dropna(inplace=True)
hourly_dataframe.isna().sum()

date                    0
temperature_2m          0
relative_humidity_2m    0
precipitation           0
weather_code            0
wind_speed_10m          0
wind_direction_10m      0
wind_gusts_10m          0
dtype: int64

#### Data cleaning, exploratory data analysis for **Daily dataframe**

In [38]:
daily_dataframe.head()

Unnamed: 0,date,weather_code,temperature_2m_max,temperature_2m_min,temperature_2m_mean,precipitation_sum,wind_speed_10m_max,wind_gusts_10m_max,wind_direction_10m_dominant
0,1999-12-31 21:00:00+00:00,3.0,1.022,-3.178,-1.711334,0.0,14.003029,29.519999,334.442993
1,2000-01-01 21:00:00+00:00,3.0,1.472,-3.528,-1.278,0.0,9.007196,19.440001,15.975966
2,2000-01-02 21:00:00+00:00,3.0,-0.228,-4.928,-2.430083,0.0,7.091177,18.719999,244.04129
3,2000-01-03 21:00:00+00:00,3.0,1.922,-1.078,0.167833,0.0,5.860375,14.4,173.779129
4,2000-01-04 21:00:00+00:00,3.0,1.872,-1.078,0.063667,0.0,7.704336,16.919998,243.012177


In [39]:
daily_dataframe.dropna(inplace=True)
print("Cleaned.")
daily_dataframe.isna().sum()

Cleaned.


date                           0
weather_code                   0
temperature_2m_max             0
temperature_2m_min             0
temperature_2m_mean            0
precipitation_sum              0
wind_speed_10m_max             0
wind_gusts_10m_max             0
wind_direction_10m_dominant    0
dtype: int64