In [40]:
import pandas as pd
import openmeteo_requests
import requests_cache
from retry_requests import retry

import os

In [41]:
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

In [42]:
df = pd.read_csv('data/cities.csv')

print(df.head())

       Country  Latitude  Longitude           Capital  elevation  dis
0  Afghanistan   34.5289    69.1725             Kabul       1800    0
1      Albania   41.3275    19.8189            Tirane        110    1
2      Algeria   36.7525     3.0420           Algiers        186    1
3      Andorra   42.5078     1.5211  Andorra la Vella       1045    0
4       Angola   -8.8368    13.2343            Luanda         75    1


In [43]:
df.count()

Country      193
Latitude     193
Longitude    193
Capital      193
elevation    193
dis          193
dtype: int64

In [11]:
timezones_df = pd.read_json('data/countries.json')
timezones_df.head()

Unnamed: 0,id,name,iso3,iso2,numeric_code,phone_code,capital,currency,currency_name,currency_symbol,...,native,region,subregion,nationality,timezones,translations,latitude,longitude,emoji,emojiU
0,1,Afghanistan,AFG,AF,4,93,Kabul,AFN,Afghan afghani,؋,...,افغانستان,Asia,Southern Asia,Afghan,"[{'zoneName': 'Asia/Kabul', 'gmtOffset': 16200...","{'kr': '아프가니스탄', 'pt-BR': 'Afeganistão', 'pt':...",33.0,65.0,🇦🇫,U+1F1E6 U+1F1EB
1,2,Aland Islands,ALA,AX,248,+358-18,Mariehamn,EUR,Euro,€,...,Åland,Europe,Northern Europe,Aland Island,"[{'zoneName': 'Europe/Mariehamn', 'gmtOffset':...","{'kr': '올란드 제도', 'pt-BR': 'Ilhas de Aland', 'p...",60.116667,19.9,🇦🇽,U+1F1E6 U+1F1FD
2,3,Albania,ALB,AL,8,355,Tirana,ALL,Albanian lek,Lek,...,Shqipëria,Europe,Southern Europe,Albanian,"[{'zoneName': 'Europe/Tirane', 'gmtOffset': 36...","{'kr': '알바니아', 'pt-BR': 'Albânia', 'pt': 'Albâ...",41.0,20.0,🇦🇱,U+1F1E6 U+1F1F1
3,4,Algeria,DZA,DZ,12,213,Algiers,DZD,Algerian dinar,دج,...,الجزائر,Africa,Northern Africa,Algerian,"[{'zoneName': 'Africa/Algiers', 'gmtOffset': 3...","{'kr': '알제리', 'pt-BR': 'Argélia', 'pt': 'Argél...",28.0,3.0,🇩🇿,U+1F1E9 U+1F1FF
4,5,American Samoa,ASM,AS,16,+1-684,Pago Pago,USD,US Dollar,$,...,American Samoa,Oceania,Polynesia,American Samoan,"[{'zoneName': 'Pacific/Pago_Pago', 'gmtOffset'...","{'kr': '아메리칸사모아', 'pt-BR': 'Samoa Americana', ...",-14.333333,-170.0,🇦🇸,U+1F1E6 U+1F1F8


In [44]:
timezones_df.count()

name         250
capital      250
timezones    249
dtype: int64

In [13]:
# only get name and timezone
timezones_df = timezones_df[['name', 'timezones']]
# map timezones to first element of list

new_df = pd.DataFrame(columns=['name', 'timezones'])

for index, row in timezones_df.iterrows():
    if row['timezones'] is not None:
        new_row = pd.DataFrame({'name': [row['name']], 'timezones': [row['timezones'][0]["zoneName"]]})
        new_df = pd.concat([new_df, new_row], ignore_index=True)

new_df.head()

Unnamed: 0,name,timezones
0,Afghanistan,Asia/Kabul
1,Aland Islands,Europe/Mariehamn
2,Albania,Europe/Tirane
3,Algeria,Africa/Algiers
4,American Samoa,Pacific/Pago_Pago


In [14]:
new_df.count()

name         249
timezones    249
dtype: int64

In [15]:
countries_timezones = pd.merge(df, new_df, how='left', left_on='Country', right_on='name')
countries_timezones.head()

Unnamed: 0,Country,Latitude,Longitude,Capital,elevation,dis,name,timezones
0,Afghanistan,34.5289,69.1725,Kabul,1800,0,Afghanistan,Asia/Kabul
1,Albania,41.3275,19.8189,Tirane,110,1,Albania,Europe/Tirane
2,Algeria,36.7525,3.042,Algiers,186,1,Algeria,Africa/Algiers
3,Andorra,42.5078,1.5211,Andorra la Vella,1045,0,Andorra,Europe/Andorra
4,Angola,-8.8368,13.2343,Luanda,75,1,Angola,Africa/Luanda


In [16]:
countries_timezones.count()

Country      193
Latitude     193
Longitude    193
Capital      193
elevation    193
dis          193
name         180
timezones    180
dtype: int64

In [109]:
countries_timezones = countries_timezones[['Country', 'Capital', 'Latitude', 'Longitude', 'timezones']]
countries_timezones.head()
countries_timezones.to_csv('data/countries_timezones.csv', index=False)

In [None]:
# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://archive-api.open-meteo.com/v1/archive"

for index, row in df.iterrows():
	if not (os.path.exists(f'output/{row["Capital"]}_hourly.csv'.replace("*", "_"))):

		params = {
			"latitude": row['Latitude'],
			"longitude": row['Longitude'],
			"start_date": "2023-01-01",
			"end_date": "2023-12-31",
			"hourly": ["temperature_2m", "relative_humidity_2m", "dew_point_2m", "apparent_temperature", "precipitation", "rain", "snowfall", "snow_depth", "weather_code", "pressure_msl", "surface_pressure", "cloud_cover", "cloud_cover_low", "cloud_cover_mid", "cloud_cover_high", "et0_fao_evapotranspiration", "vapour_pressure_deficit", "wind_speed_10m", "wind_speed_100m", "wind_direction_10m", "wind_direction_100m", "wind_gusts_10m", "soil_temperature_0_to_7cm", "soil_temperature_7_to_28cm", "soil_temperature_28_to_100cm", "soil_temperature_100_to_255cm", "soil_moisture_0_to_7cm", "soil_moisture_7_to_28cm", "soil_moisture_28_to_100cm", "soil_moisture_100_to_255cm"],
			"daily": ["weather_code", "temperature_2m_max", "temperature_2m_min", "temperature_2m_mean", "apparent_temperature_max", "apparent_temperature_min", "apparent_temperature_mean", "sunrise", "sunset", "daylight_duration", "sunshine_duration", "precipitation_sum", "rain_sum", "snowfall_sum", "precipitation_hours", "wind_speed_10m_max", "wind_gusts_10m_max", "wind_direction_10m_dominant", "shortwave_radiation_sum", "et0_fao_evapotranspiration"],
			"timezone": "Asia/Bangkok"
		}

		responses = openmeteo.weather_api(url, params=params)

		# Process first location. Add a for-loop for multiple locations or weather models
		response = responses[0]
		print(f"Coordinates {response.Latitude()}°E {response.Longitude()}°N")
		print(f"Elevation {response.Elevation()} m asl")
		print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
		print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

		# Process hourly data. The order of variables needs to be the same as requested.
		hourly = response.Hourly()
		hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
		hourly_relative_humidity_2m = hourly.Variables(1).ValuesAsNumpy()
		hourly_dew_point_2m = hourly.Variables(2).ValuesAsNumpy()
		hourly_apparent_temperature = hourly.Variables(3).ValuesAsNumpy()
		hourly_precipitation = hourly.Variables(4).ValuesAsNumpy()
		hourly_rain = hourly.Variables(5).ValuesAsNumpy()
		hourly_snowfall = hourly.Variables(6).ValuesAsNumpy()
		hourly_snow_depth = hourly.Variables(7).ValuesAsNumpy()
		hourly_weather_code = hourly.Variables(8).ValuesAsNumpy()
		hourly_pressure_msl = hourly.Variables(9).ValuesAsNumpy()
		hourly_surface_pressure = hourly.Variables(10).ValuesAsNumpy()
		hourly_cloud_cover = hourly.Variables(11).ValuesAsNumpy()
		hourly_cloud_cover_low = hourly.Variables(12).ValuesAsNumpy()
		hourly_cloud_cover_mid = hourly.Variables(13).ValuesAsNumpy()
		hourly_cloud_cover_high = hourly.Variables(14).ValuesAsNumpy()
		hourly_et0_fao_evapotranspiration = hourly.Variables(15).ValuesAsNumpy()
		hourly_vapour_pressure_deficit = hourly.Variables(16).ValuesAsNumpy()
		hourly_wind_speed_10m = hourly.Variables(17).ValuesAsNumpy()
		hourly_wind_speed_100m = hourly.Variables(18).ValuesAsNumpy()
		hourly_wind_direction_10m = hourly.Variables(19).ValuesAsNumpy()
		hourly_wind_direction_100m = hourly.Variables(20).ValuesAsNumpy()
		hourly_wind_gusts_10m = hourly.Variables(21).ValuesAsNumpy()
		hourly_soil_temperature_0_to_7cm = hourly.Variables(22).ValuesAsNumpy()
		hourly_soil_temperature_7_to_28cm = hourly.Variables(23).ValuesAsNumpy()
		hourly_soil_temperature_28_to_100cm = hourly.Variables(24).ValuesAsNumpy()
		hourly_soil_temperature_100_to_255cm = hourly.Variables(25).ValuesAsNumpy()
		hourly_soil_moisture_0_to_7cm = hourly.Variables(26).ValuesAsNumpy()
		hourly_soil_moisture_7_to_28cm = hourly.Variables(27).ValuesAsNumpy()
		hourly_soil_moisture_28_to_100cm = hourly.Variables(28).ValuesAsNumpy()
		hourly_soil_moisture_100_to_255cm = hourly.Variables(29).ValuesAsNumpy()

		hourly_data = {"date": pd.date_range(
			start = pd.to_datetime(hourly.Time(), unit = "s"),
			end = pd.to_datetime(hourly.TimeEnd(), unit = "s"),
			freq = pd.Timedelta(seconds = hourly.Interval()),
			inclusive = "left"
		)}
		hourly_data["temperature_2m"] = hourly_temperature_2m
		hourly_data["relative_humidity_2m"] = hourly_relative_humidity_2m
		hourly_data["dew_point_2m"] = hourly_dew_point_2m
		hourly_data["apparent_temperature"] = hourly_apparent_temperature
		hourly_data["precipitation"] = hourly_precipitation
		hourly_data["rain"] = hourly_rain
		hourly_data["snowfall"] = hourly_snowfall
		hourly_data["snow_depth"] = hourly_snow_depth
		hourly_data["weather_code"] = hourly_weather_code
		hourly_data["pressure_msl"] = hourly_pressure_msl
		hourly_data["surface_pressure"] = hourly_surface_pressure
		hourly_data["cloud_cover"] = hourly_cloud_cover
		hourly_data["cloud_cover_low"] = hourly_cloud_cover_low
		hourly_data["cloud_cover_mid"] = hourly_cloud_cover_mid
		hourly_data["cloud_cover_high"] = hourly_cloud_cover_high
		hourly_data["et0_fao_evapotranspiration"] = hourly_et0_fao_evapotranspiration
		hourly_data["vapour_pressure_deficit"] = hourly_vapour_pressure_deficit
		hourly_data["wind_speed_10m"] = hourly_wind_speed_10m
		hourly_data["wind_speed_100m"] = hourly_wind_speed_100m
		hourly_dataframe = pd.DataFrame(data=hourly_data)
		hourly_dataframe.to_csv(f'output/{row["Capital"]}_hourly.csv'.replace("*", "_"))


Coordinates -17.75044059753418°E 168.31394958496094°N
Elevation 56.0 m asl
Timezone b'Asia/Bangkok' b'+07'
Timezone difference to GMT+0 25200 s
Coordinates 10.509666442871094°E -66.92510986328125°N
Elevation 894.0 m asl
Timezone b'Asia/Bangkok' b'+07'
Timezone difference to GMT+0 25200 s
Coordinates 21.054479598999023°E 105.8071060180664°N
Elevation 10.0 m asl
Timezone b'Asia/Bangkok' b'+07'
Timezone difference to GMT+0 25200 s
Coordinates 15.360280990600586°E 44.240150451660156°N
Elevation 2252.0 m asl
Timezone b'Asia/Bangkok' b'+07'
Timezone difference to GMT+0 25200 s
Coordinates -15.43057918548584°E 28.309858322143555°N
Elevation 1279.0 m asl
Timezone b'Asia/Bangkok' b'+07'
Timezone difference to GMT+0 25200 s
Coordinates -17.820737838745117°E 31.07662582397461°N
Elevation 1481.0 m asl
Timezone b'Asia/Bangkok' b'+07'
Timezone difference to GMT+0 25200 s


In [None]:
# Process daily data. The order of variables needs to be the same as requested.
daily = response.Daily()
daily_weather_code = daily.Variables(0).ValuesAsNumpy()
daily_temperature_2m_max = daily.Variables(1).ValuesAsNumpy()
daily_temperature_2m_min = daily.Variables(2).ValuesAsNumpy()
daily_temperature_2m_mean = daily.Variables(3).ValuesAsNumpy()
daily_apparent_temperature_max = daily.Variables(4).ValuesAsNumpy()
daily_apparent_temperature_min = daily.Variables(5).ValuesAsNumpy()
daily_apparent_temperature_mean = daily.Variables(6).ValuesAsNumpy()
daily_sunrise = daily.Variables(7).ValuesAsNumpy()
daily_sunset = daily.Variables(8).ValuesAsNumpy()
daily_daylight_duration = daily.Variables(9).ValuesAsNumpy()
daily_sunshine_duration = daily.Variables(10).ValuesAsNumpy()
daily_precipitation_sum = daily.Variables(11).ValuesAsNumpy()
daily_rain_sum = daily.Variables(12).ValuesAsNumpy()
daily_snowfall_sum = daily.Variables(13).ValuesAsNumpy()
daily_precipitation_hours = daily.Variables(14).ValuesAsNumpy()
daily_wind_speed_10m_max = daily.Variables(15).ValuesAsNumpy()
daily_wind_gusts_10m_max = daily.Variables(16).ValuesAsNumpy()
daily_wind_direction_10m_dominant = daily.Variables(17).ValuesAsNumpy()
daily_shortwave_radiation_sum = daily.Variables(18).ValuesAsNumpy()
daily_et0_fao_evapotranspiration = daily.Variables(19).ValuesAsNumpy()

daily_data = {"date": pd.date_range(
	start = pd.to_datetime(daily.Time(), unit = "s"),
	end = pd.to_datetime(daily.TimeEnd(), unit = "s"),
	freq = pd.Timedelta(seconds = daily.Interval()),
	inclusive = "left"
)}
daily_data["weather_code"] = daily_weather_code
daily_data["temperature_2m_max"] = daily_temperature_2m_max
daily_data["temperature_2m_min"] = daily_temperature_2m_min
daily_data["temperature_2m_mean"] = daily_temperature_2m_mean
daily_data["apparent_temperature_max"] = daily_apparent_temperature_max
daily_data["apparent_temperature_min"] = daily_apparent_temperature_min
daily_data["apparent_temperature_mean"] = daily_apparent_temperature_mean
daily_data["sunrise"] = daily_sunrise
daily_data["sunset"] = daily_sunset
daily_data["daylight_duration"] = daily_daylight_duration
daily_data["sunshine_duration"] = daily_sunshine_duration
daily_data["precipitation_sum"] = daily_precipitation_sum
daily_data["rain_sum"] = daily_rain_sum
daily_data["snowfall_sum"] = daily_snowfall_sum
daily_data["precipitation_hours"] = daily_precipitation_hours
daily_data["wind_speed_10m_max"] = daily_wind_speed_10m_max
daily_data["wind_gusts_10m_max"] = daily_wind_gusts_10m_max
daily_data["wind_direction_10m_dominant"] = daily_wind_direction_10m_dominant
daily_data["shortwave_radiation_sum"] = daily_shortwave_radiation_sum
daily_data["et0_fao_evapotranspiration"] = daily_et0_fao_evapotranspiration

daily_dataframe = pd.DataFrame(data = daily_data)

print(daily_dataframe)