In [1]:
import openmeteo_requests
import time
import requests_cache
import pandas as pd
from retry_requests import retry

In [2]:
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

In [3]:
# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://archive-api.open-meteo.com/v1/archive"

In [4]:
countries = [
    {"name": "Angola", "lat": -11.2027, "lon": 17.8739},
    {"name": "Brazil", "lat": -14.2350, "lon": -51.9253},
    {"name": "Myanmar", "lat": 21.9162, "lon": 95.9560},
    {"name": "Cambodia", "lat": 12.5657, "lon": 104.9910},
    {"name": "Congo", "lat": -0.2280, "lon": 15.8277},
    {"name": "Dem. Rep. of the Congo", "lat": -4.0383, "lon": 21.7587},
    {"name": "Cyprus", "lat": 35.1264, "lon": 33.4299},
    {"name": "Dominican Republic", "lat": 18.7357, "lon": -70.1627},
    {"name": "Gambia", "lat": 13.4432, "lon": -15.3101},
    {"name": "Germany", "lat": 51.1657, "lon": 10.4515},
    {"name": "Ghana", "lat": 7.9465, "lon": -1.0232},
    {"name": "China", "lat": 22.3193, "lon": 114.1694},
    {"name": "Kuwait", "lat": 29.3759, "lon": 47.9774},
    {"name": "Lao People's Dem. Rep.", "lat": 19.8563, "lon": 102.4955},
    {"name": "Latvia", "lat": 56.8796, "lon": 24.6032},
    {"name": "Madagascar", "lat": -18.7669, "lon": 46.8691},
    {"name": "Mexico", "lat": 23.6345, "lon": -102.5528},
    {"name": "Peru", "lat": -9.1900, "lon": -75.0152},
    {"name": "Saint Kitts and Nevis", "lat": 17.3578, "lon": -62.7830},
    {"name": "Seychelles", "lat": -4.6796, "lon": 55.4920},
    {"name": "Slovakia", "lat": 48.6690, "lon": 19.6990},
    {"name": "South Africa", "lat": -30.5595, "lon": 22.9375},
    {"name": "Spain", "lat": 40.4637, "lon": -3.7492},
    {"name": "Sudan", "lat": 12.8628, "lon": 30.2176},
    {"name": "Sweden", "lat": 60.1282, "lon": 18.6435},
    {"name": "Thailand", "lat": 15.8700, "lon": 100.9925},
    {"name": "Togo", "lat": 8.6195, "lon": 0.8248},
    {"name": "Trinidad and Tobago", "lat": 10.6918, "lon": -61.2225},
    {"name": "Türkiye", "lat": 38.9637, "lon": 35.2433},
    {"name": "Egypt", "lat": 26.8206, "lon": 30.8025},
    {"name": "Zambia", "lat": -13.1339, "lon": 27.8493},
    {"name": "Bulgaria", "lat": 42.7339, "lon": 25.4858},
    {"name": "French Polynesia", "lat": -17.6797, "lon": -149.4068},
    {"name": "Georgia", "lat": 42.3154, "lon": 43.3569},
    {"name": "Portugal", "lat": 39.3999, "lon": -8.2245},
    {"name": "Uganda", "lat": 1.3733, "lon": 32.2903},
    {"name": "Montenegro", "lat": 42.7087, "lon": 19.3744},
    {"name": "Australia", "lat": -25.2744, "lon": 133.7751},
    {"name": "Belize", "lat": 17.1899, "lon": -88.4976},
    {"name": "Ecuador", "lat": -1.8312, "lon": -78.1834},
    {"name": "Finland", "lat": 61.9241, "lon": 25.7482},
    {"name": "Kiribati", "lat": -3.3704, "lon": -168.7340},
    {"name": "Guyana", "lat": 4.8604, "lon": -58.9302},
    {"name": "Honduras", "lat": 15.2000, "lon": -86.2419},
    {"name": "Hungary", "lat": 47.1625, "lon": 19.5033},
    {"name": "Kyrgyzstan", "lat": 41.2044, "lon": 74.7661},
    {"name": "Mozambique", "lat": -18.6657, "lon": 35.5296},
    {"name": "Namibia", "lat": -22.9576, "lon": 18.4904},
    {"name": "Romania", "lat": 45.9432, "lon": 24.9668},
]

In [5]:
weather_data = {}

# Create extract_daily_dataframe function
def extract_daily_dataframe(response):
    daily_data = response.Daily()

    # Extract each daily data variable
    temp_max = daily_data.Variables(0).ValuesAsNumpy()
    temp_min = daily_data.Variables(1).ValuesAsNumpy()
    temp_mean = daily_data.Variables(2).ValuesAsNumpy()
    prec_sum = daily_data.Variables(3).ValuesAsNumpy()
    rain_sum = daily_data.Variables(4).ValuesAsNumpy()
    wind_max = daily_data.Variables(5).ValuesAsNumpy()
    wind_gusts_max = daily_data.Variables(6).ValuesAsNumpy()


    daily_time = pd.date_range(
    start  = pd.to_datetime(daily_data.Time(),    unit="s", utc=True),
    end    = pd.to_datetime(daily_data.TimeEnd(), unit="s", utc=True),
    freq   = pd.Timedelta(seconds=daily_data.Interval()),
    inclusive="left"
    )


    df = pd.DataFrame({
    "date": daily_time,
    "temperature_2m_max": temp_max,
    "temperature_2m_min": temp_min,
    "temperature_2m_mean": temp_mean,
    "precipitation_sum": prec_sum,
    "rain_sum": rain_sum,
    "wind_speed_10m_max": wind_max,
    "wind_gusts_10m_max": wind_gusts_max,
    })
    
    return df

for i, country in enumerate(countries):
    params = {
        "latitude": country["lat"],
        "longitude": country["lon"],
        "start_date": "2015-01-01",
        "end_date": "2024-12-31",
        "daily": [
            "temperature_2m_max",
            "temperature_2m_min",
            "temperature_2m_mean",
            "precipitation_sum",
            "rain_sum",
            "wind_speed_10m_max",
            "wind_gusts_10m_max",
        ],
        "timezone": "UTC",  # or local port timezone, e.g. "America/Los_Angeles"
    }
    responses = openmeteo.weather_api(url, params=params)
    # process the single response[0] for the given port
    response = responses[0]
        
    daily_df = extract_daily_dataframe(response)

    # store the resulting dataframe in a dict keyed by port name
    weather_data[country["name"]] = {"daily": daily_df}
    if i < len(countries) - 1:
        time.sleep(15)

print(weather_data)

{'Angola': {'daily':                           date  temperature_2m_max  temperature_2m_min  \
0    2015-01-01 00:00:00+00:00           29.823502           18.323502   
1    2015-01-02 00:00:00+00:00           29.073502           18.523500   
2    2015-01-03 00:00:00+00:00           26.973501           17.823502   
3    2015-01-04 00:00:00+00:00           26.723501           18.773500   
4    2015-01-05 00:00:00+00:00           29.273500           17.273500   
...                        ...                 ...                 ...   
3648 2024-12-27 00:00:00+00:00           25.652500           18.102501   
3649 2024-12-28 00:00:00+00:00           27.852501           18.102501   
3650 2024-12-29 00:00:00+00:00           28.152500           17.802500   
3651 2024-12-30 00:00:00+00:00           27.902500           18.002501   
3652 2024-12-31 00:00:00+00:00           26.002501           18.502501   

      temperature_2m_mean  precipitation_sum   rain_sum  wind_speed_10m_max  \
0          

In [6]:
# Create a dataframe
dfs = []
for country_info in countries:
    country_name = country_info["name"]
    lat = country_info["lat"]
    lon = country_info["lon"]

    # Get daily dataframe for this port
    daily_df = weather_data[country_name]["daily"].copy()

    # Optionally add a column identifying the port
    daily_df["port"] = country_name
    daily_df["latitude"] = lat
    daily_df["longitude"] = lon

    # Collect the DataFrame
    dfs.append(daily_df)

# Now combine them all
combined_df = pd.concat(dfs, ignore_index=True)

# Now you can do combined_df.head()
print(combined_df.head())

                       date  temperature_2m_max  temperature_2m_min  \
0 2015-01-01 00:00:00+00:00           29.823502           18.323502   
1 2015-01-02 00:00:00+00:00           29.073502           18.523500   
2 2015-01-03 00:00:00+00:00           26.973501           17.823502   
3 2015-01-04 00:00:00+00:00           26.723501           18.773500   
4 2015-01-05 00:00:00+00:00           29.273500           17.273500   

   temperature_2m_mean  precipitation_sum   rain_sum  wind_speed_10m_max  \
0            23.136002           0.000000   0.000000           12.538134   
1            22.898504           2.700000   2.700000           10.144082   
2            21.829750           3.400000   3.400000            9.904906   
3            21.715170           7.300000   7.300000           10.163227   
4            22.258919          11.499999  11.499999            8.217153   

   wind_gusts_10m_max    port  latitude  longitude  
0           27.719999  Angola  -11.2027    17.8739  
1         

In [7]:
combined_df.head()

Unnamed: 0,date,temperature_2m_max,temperature_2m_min,temperature_2m_mean,precipitation_sum,rain_sum,wind_speed_10m_max,wind_gusts_10m_max,port,latitude,longitude
0,2015-01-01 00:00:00+00:00,29.823502,18.323502,23.136002,0.0,0.0,12.538134,27.719999,Angola,-11.2027,17.8739
1,2015-01-02 00:00:00+00:00,29.073502,18.5235,22.898504,2.7,2.7,10.144082,26.280001,Angola,-11.2027,17.8739
2,2015-01-03 00:00:00+00:00,26.973501,17.823502,21.82975,3.4,3.4,9.904906,23.039999,Angola,-11.2027,17.8739
3,2015-01-04 00:00:00+00:00,26.723501,18.7735,21.71517,7.3,7.3,10.163227,29.519999,Angola,-11.2027,17.8739
4,2015-01-05 00:00:00+00:00,29.2735,17.2735,22.258919,11.499999,11.499999,8.217153,23.759998,Angola,-11.2027,17.8739


In [8]:
combined_df.shape

(178997, 11)

In [9]:
combined_df.to_pickle('/Users/bobbi/Careerfoundry/Shipping_project/02 Data/Prepared Data/country_weather.pkl')